Repository: dbpedia/lookup
Branch: master
Commit: c13d8fcf6cc1
Files: 30
Total size: 86.2 KB

Directory structure:
gitextract_g106cma5/

├── .gitattributes
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── docker/
│   ├── Dockerfile_2015-10
│   └── README.md
├── pom.xml
├── run
├── scripts/
│   └── index.sh
└── src/
    ├── main/
    │   ├── resources/
    │   │   └── logback.xml
    │   └── scala/
    │       └── org/
    │           └── dbpedia/
    │               └── lookup/
    │                   ├── entities/
    │                   │   ├── Entities.scala
    │                   │   └── EntitiesSerialization.scala
    │                   ├── inputformat/
    │                   │   ├── DBpediaNTriplesInputFormat.scala
    │                   │   ├── InputFormat.scala
    │                   │   ├── PignlprocTSVInputFormat.scala
    │                   │   └── WikiStatsExtractor.scala
    │                   ├── lucene/
    │                   │   ├── Indexer.scala
    │                   │   ├── LuceneConfig.scala
    │                   │   └── Searcher.scala
    │                   ├── server/
    │                   │   ├── LookupResource.scala
    │                   │   └── Server.scala
    │                   └── util/
    │                       └── Logging.scala
    └── test/
        ├── resources/
        │   ├── data.nt
        │   ├── logback-test.xml
        │   └── redirects.nt
        └── scala/
            └── org/
                └── dbpedia/
                    └── lookup/
                        ├── IntegrationTest.scala
                        ├── TestUtils.scala
                        └── entities/
                            ├── EntitiesSerializationTest.scala
                            └── EntitiesTest.scala

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitattributes
================================================
# normalize all text
# when committed they are stored with LF,
# on checkout they are converted to the OS's native line endings
# https://help.github.com/articles/dealing-with-line-endings

*.java text
*.scala text
*.sh text

*.xml text
*.json text
*.nt text

*.md text


================================================
FILE: .gitignore
================================================
.idea

syntax: glob
*.iml
*.ipr
*.iws
target/*


================================================
FILE: .travis.yml
================================================
language: java
jdk:
  - oraclejdk8


# Workaround for https://github.com/travis-ci/travis-ci/issues/5227
# Buffer overflow in Java_java_net_Inet4AddressImpl_getLocalHostName
before_install:
  - cat /etc/hosts # optionally check the content *before*
  - sudo hostname "$(hostname | cut -c1-63)"
  - sed -e "s/^\\(127\\.0\\.0\\.1.*\\)/\\1 $(hostname | cut -c1-63)/" /etc/hosts | sudo tee /etc/hosts
  - cat /etc/hosts # optionally check the content *after*


branches:
  only:
    - master
install: /bin/true
script: "mvn clean install"
notifications:
  email:
    recipients:
      - dbpedia-developers@lists.sourceforge.net
    on_success: change 
    on_failure: change


================================================
FILE: LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: README.md
================================================
# IMPORTANT NOTE:
There is a newer and DBpedia Databus compatible version of the DBpedia Lookup here: https://github.com/dbpedia/dbpedia-lookup. The discussion concerning the transition to the new service can be found here: https://forum.dbpedia.org/t/new-dbpedia-lookup-application/607


# DBpedia Lookup

[![Build Status](https://travis-ci.org/dbpedia/lookup.svg?branch=master)](https://travis-ci.org/dbpedia/lookup)

DBpedia Lookup is a web service that can be used to look up DBpedia URIs by related keywords. Related means that either the label of a resource matches, or an anchor text that was frequently used in Wikipedia to refer to a specific resource matches (for example the resource http://dbpedia.org/resource/United_States can be looked up by the string "USA"). The results are ranked by the number of inlinks pointing from other Wikipedia pages at a result page.


## Web APIs

Two APIs are offered: Keyword Search and Prefix Search. A hosted version of the Lookup service is available on the DBpedia server infrastructure.

### Keyword Search

The Keyword Search API can be used to find related DBpedia resources for a given string. The string may consist of a single or multiple words.

Example: Places that have the related keyword "berlin"

http://lookup.dbpedia.org/api/search/KeywordSearch?QueryClass=place&QueryString=berlin

### Prefix Search (i.e. Autocomplete)

The Prefix Search API can be used to implement autocomplete input boxes. For a given partial keyword like *berl* the API returns URIs of related DBpedia resources like http://dbpedia.org/resource/Berlin.

Example: Top five resources for which a keyword starts with "berl"

http://lookup.dbpedia.org/api/search/PrefixSearch?QueryClass=&MaxHits=5&QueryString=berl

### Parameters

The query parameters accepted by the endpoints are

* `QueryString`: a string for which a DBpedia URI should be found.
* `QueryClass`: a DBpedia class from the Ontology that the results should have (for owl#Thing and untyped resource, leave this parameter empty).
* `MaxHits`: the maximum number of returned results (default: 5)

### JSON support

By default all data is returned as XML, the service also retuns JSON to any request including the `Accept: application/json` header.

## Running a local mirror of the webservice

### Clone and build DBpedia Lookup

    git clone git://github.com/dbpedia/lookup.git
    cd lookup
    mvn clean install

### Download and configure the index

You can get our indexes from [HERE](http://downloads.dbpedia-spotlight.org/dbpedia_lookup/)

### Run the server

    
    `./run Server [PATH TO THE INDEX]/[VERSION]/`
   
   E.g:
    
    `./run Server /opt/dbpedia-lookup/2015-04`

**Note: The index file must be decompressed**
    
#### Available versions: 
    
* current - from Latest DBpedia Dump (2015-10)

    
#### Available languages (i18n working in progress): 
    
* en - English
    
    
The server should now be running at http://localhost:1111

## Rebuilding the index

Rebuilding an index is usually not required, if you only intend on running a local mirror of the service you can donwload a prebuilt index as outlined above.

To re-build the index you will require

* DBpedia datasets
* [Wikistatsextractor output](http://downloads.dbpedia-spotlight.org) - [wikistatsextractor](https://github.com/jodaiber/wikistatsextractor) is a drop-in replacement of [pignlproc](https://github.com/dbpedia-spotlight/pignlproc)
* Unix


### Get the following DBpedia datasets
from http://downloads.dbpedia.org/2015-10/core-i18n/en/

* redirects\_en.nt (or .ttl)
* short\_abstracts\_en.nt (or .ttl)
* instance\_types\_en.nt (or .ttl)
* article\_categories\_en.nt (or .ttl)

from http://downloads.dbpedia.org/2015-10/core

* instance_types_en.ttl
* instance_types_sdtyped_dbo_en.ttl
* instance_types_transitive_en.ttl

### Concatenate all data and sort by URI

This is necessary because indexing in sorted order is significantly faster.

      cat instance_types_en.nt (or .ttl)  \
          short_abstracts_en.nt (or .ttl) \
          article_categories_en.nt (or .ttl) \
          instance_types_en.ttl  \
          instance_types_sdtyped_dbo_en.ttl \
          instance_types_transitive_en.ttl | sort >all_dbpedia_data.nt (or .ttl)

### Get the dataset redirects\_en.nt (or .ttl)

Redirects are not indexed, but they are excluded as targets of lookup.

### Run Indexer

The indexer has to be run twice:

1. with the DBpedia data 

        ./run Indexer lookup_index_dir redirects_en.nt (or .ttl) all_dbpedia_data.nt (or .ttl)

2. with the wikistatsextractor data

        ./run Indexer lookup_index_dir redirects_en.nt (or .ttl) pairCounts

## Support and feedback

The best way to get support or give feedback on the Lookup project is via the [DBpedia discussion mailing list](https://lists.sourceforge.net/lists/listinfo/dbpedia-discussion). More technical queries about the code base should be directed to the [DBpedia developers mailing list](https://lists.sourceforge.net/lists/listinfo/dbpedia-developers).

The [DBpedia wiki](http://wiki.dbpedia.org/lookup/) also has useful information on the project.

## Maintainers

* Kunal Jha [@Kunal-Jha](https://github.com/Kunal-Jha)
* Sandro Coelho [@sandroacoelho](https://github.com/sandroacoelho)
* Pablo Mendes [@pablomendes](https://github.com/pablomendes) (less active)
* Max Jakob [@maxjakob](https://github.com/maxjakob) (less active)
* Matt Haynes [@matth](https://github.com/matth) (less active)


================================================
FILE: docker/Dockerfile_2015-10
================================================
FROM java:8

MAINTAINER  DBpedia Team <dbpedia-developers@lists.sourceforge.net>

RUN apt-get update && apt-get install -y \
    curl

ENV INDEX_URL downloads.dbpedia-spotlight.org/dbpedia_lookup/models
ENV INDEX_FILENAME 2015-10.tar.gz

ENV LOOKUP_JAR dbpedia-lookup-3.1-jar-with-dependencies.jar
ENV LOOKUP_URL downloads.dbpedia-spotlight.org/dbpedia_lookup/

RUN mkdir -p /opt/lookup && \
    cd /opt/lookup && \
    wget "http://$LOOKUP_URL/$LOOKUP_JAR" -O $LOOKUP_JAR  && \
    wget "http://$INDEX_URL/$INDEX_FILENAME" -O $INDEX_FILENAME  && \
    tar xvf $INDEX_FILENAME   && \
    rm  $INDEX_FILENAME

EXPOSE 1111


================================================
FILE: docker/README.md
================================================
## Supported tags and respective Dockerfile links
* latest (DBpedia dump 2015-10)

## How to run

* English    - ``docker run -p 1111:1111 -it dbpedia/lookup java -jar /opt/lookup/dbpedia-lookup-3.1-jar-with-dependencies.jar /opt/lookup/2015-10/``


And then try 

```
http://localhost:1111/api/search/PrefixSearch?QueryClass=&MaxHits=5&QueryString=berl
```

If you are using Docker Compose you can do the same with this minimal Compose file:

```yml
version: '2'
services:
  lookup:
    container_name: lookup
    image: dbpedia/lookup
    ports:
     - "1111:1111"
    command: java -jar /opt/lookup/dbpedia-lookup-3.1-jar-with-dependencies.jar /opt/lookup/2015-10/
```

## Supported Docker versions
This image is officially supported on Docker version 1.9.1.

Please see the [Docker installation documentation] (https://docs.docker.com/installation/) for details on how to upgrade your Docker daemon.


## Issues
If you have any problems with or questions about this image, please contact us through a [GitHub issue](http://github.com/dbpedia/lookup/issues).


## Contributing

First of all, thank you for helping! :) .

Please see [DBpedia Contribute Guide](https://github.com/dbpedia/lookup/wiki/Contributing) for details on how to contribute


================================================
FILE: pom.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.dbpedia.lookup</groupId>
    <artifactId>dbpedia-lookup</artifactId>
    <packaging>jar</packaging>
    <version>3.1</version>
    <name>DBpedia Lookup Service</name>


    <description>
        DBpedia Lookup is a web service that can be used to look up DBpedia URIs by related keywords
    </description>

    <url>http://www.dbpedia.org</url>

    <licenses>
        <license>
            <name>Apache License, Version 2.0</name>
            <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
            <distribution>repo</distribution>
        </license>
    </licenses>

    <developers>
        <developer>
            <name>DBpedia Team</name>
            <email>dbpedia-developers@lists.sourceforge.net</email>
            <organization>DBpedia</organization>
            <organizationUrl>http://www.dbpedia.org</organizationUrl>
        </developer>
    </developers>

    <scm>
        <connection>scm:git:git@github.com:dbpedia/extraction-framework.git</connection>
        <developerConnection>scm:git:git@github.com:dbpedia/lookup.git</developerConnection>
        <url>git@github.com:dbpedia/extraction-framework.git</url>
    </scm>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
        <maven.scala.plugin.version>3.2.2</maven.scala.plugin.version>
        <maven.surefire.plugin.version>2.12.4</maven.surefire.plugin.version>
        <scala.compiler.version.revision>2.11.7</scala.compiler.version.revision>
        <scala.compiler.version>2.11</scala.compiler.version>
        <scala.test.version>2.2.6</scala.test.version>
        <maven.scala.test.plugin.version>1.0</maven.scala.test.plugin.version>
        <dbpedia.extraction.version>4.0</dbpedia.extraction.version>
        <lucene.core.version>3.6.2</lucene.core.version>
        <jersey.server.version>1.19.1</jersey.server.version>
        <commons.compress.version>1.0</commons.compress.version>
        <lift.json.version>2.6.2</lift.json.version>
        <logback.classic.version>1.0.9</logback.classic.version>
        <yars.nxparser.version>1.2.3</yars.nxparser.version>
        <aksw.keyname>AKSW.GPG</aksw.keyname>
    </properties>

    <build>
        <plugins>

            <plugin>
                <artifactId>maven-resources-plugin</artifactId>
                <version>2.6</version>
                <executions>
                    <execution>
                        <id>process-test-resources</id>
                        <phase>process-test-resources</phase>
                        <goals>
                            <goal>testResources</goal>
                        </goals>
                    </execution>
                    <execution>
                        <id>copy-resources</id>
                        <phase>compile</phase>
                        <goals>
                            <goal>resources</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>

            <plugin>
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
                <version>${maven.scala.plugin.version}</version>

                <executions>
                    <execution>
                        <id>compile</id>
                        <goals>
                            <goal>compile</goal>
                        </goals>
                        <phase>compile</phase>
                    </execution>
                    <execution>
                        <id>test-compile</id>
                        <goals>
                            <goal>testCompile</goal>
                        </goals>
                        <phase>test-compile</phase>
                    </execution>
                    <execution>
                        <id>attach-docs-sources</id>
                        <goals>
                            <goal>add-source</goal>
                            <goal>doc-jar</goal>
                        </goals>
                   </execution>
                </executions>

                <configuration>
                    <args>
                        <arg>-deprecation</arg>
                    </args>
                    <launchers>
                        <launcher>
                            <id>Server</id>
                            <mainClass>org.dbpedia.lookup.server.Server</mainClass>
                            <jvmArgs>
                                <jvmArg>-Xmx512m</jvmArg>
                                <jvmArg>-Dlogback.configurationFile=logback.xml</jvmArg>
                                <jvmArg>-Dhttp.port=1111</jvmArg>
                            </jvmArgs>
                        </launcher>
                        <launcher>
                            <id>Indexer</id>
                            <mainClass>org.dbpedia.lookup.lucene.Indexer</mainClass>
                            <jvmArgs>
                                <jvmArg>-Xms5g</jvmArg>
                                <jvmArg>-Xmx5g</jvmArg>
                                <jvmArg>-Dlogback.configurationFile=logback.xml</jvmArg>
                            </jvmArgs>
                        </launcher>
                    </launchers>
                </configuration>
            </plugin>

            <plugin>
                <groupId>org.scalatest</groupId>
                <artifactId>scalatest-maven-plugin</artifactId>
                <version>${maven.scala.test.plugin.version}</version>
                <executions>
                    <execution>
                        <id>test</id>
                        <goals>
                            <goal>test</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>

            <plugin>
                <artifactId>maven-assembly-plugin</artifactId>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
                <configuration>
                    <archive>
                        <manifest>
                            <addClasspath>true</addClasspath>
                            <mainClass>org.dbpedia.lookup.server.Server</mainClass>
                        </manifest>
                    </archive>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
            </plugin>

            <plugin>
                <groupId>org.sonatype.plugins</groupId>
                <artifactId>nexus-staging-maven-plugin</artifactId>
                <extensions>true</extensions>
                <configuration>
                    <serverId>ossrh</serverId>
                    <nexusUrl>https://oss.sonatype.org/</nexusUrl>
                    <autoReleaseAfterClose>true</autoReleaseAfterClose>
                </configuration>
            </plugin>

            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-source-plugin</artifactId>
                <executions>
                    <execution>
                        <id>attach-sources</id>
                        <goals>
                            <goal>jar</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>

        </plugins>
    </build>

    <dependencies>
        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-compiler</artifactId>
            <version>${scala.compiler.version.revision}</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-library</artifactId>
            <version>${scala.compiler.version.revision}</version>
        </dependency>

        <dependency>
            <groupId>org.dbpedia.extraction</groupId>
            <artifactId>core</artifactId>
            <version>${dbpedia.extraction.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>${lucene.core.version}</version>
        </dependency>

        <dependency>
            <groupId>com.sun.jersey</groupId>
            <artifactId>jersey-bundle</artifactId>
            <version>${jersey.server.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-compress</artifactId>
            <version>${commons.compress.version}</version>
        </dependency>

        <dependency>
            <groupId>net.liftweb</groupId>
            <artifactId>lift-json_${scala.compiler.version}</artifactId>
            <version>${lift.json.version}</version>
        </dependency>

        <dependency>
            <groupId>ch.qos.logback</groupId>
            <artifactId>logback-classic</artifactId>
            <version>${logback.classic.version}</version>
        </dependency>

        <dependency>
            <groupId>org.semanticweb.yars</groupId>
            <artifactId>nxparser</artifactId>
            <version>${yars.nxparser.version}</version>
        </dependency>

        <dependency>
            <groupId>org.scalatest</groupId>
            <artifactId>scalatest_${scala.compiler.version}</artifactId>
            <version>${scala.test.version}</version>
            <scope>test</scope>
        </dependency>
    </dependencies>

    <repositories>

       <repository>
            <id>spotlight-releases-repository</id>
            <url>https://github.com/dbpedia-spotlight/maven-repo/raw/master/releases</url>
        </repository>

        <repository>
            <id>sonatype-oss-public</id>
            <name>Sonatype OSS Public Maven repo</name>
            <url>https://oss.sonatype.org/content/groups/public</url>
        </repository>

    </repositories>

    <pluginRepositories>
        <pluginRepository>
            <id>sonatype-oss-public</id>
            <name>Sonatype OSS Public Maven repo</name>
            <url>https://oss.sonatype.org/content/groups/public</url>
        </pluginRepository>
    </pluginRepositories>

    <profiles>
        <profile>
            <id>release</id>
            <build>
                <plugins>
                    <plugin>
                        <groupId>org.apache.maven.plugins</groupId>
                        <artifactId>maven-gpg-plugin</artifactId>
                        <version>1.6</version>
                        <executions>
                            <execution>
                                <id>sign-artifacts</id>
                                <phase>verify</phase>
                                <goals>
                                    <goal>sign</goal>
                                </goals>
                                <configuration>
                                    <keyname>AKSW</keyname>
                                    <passphraseServerId>${aksw.keyname}</passphraseServerId>
                                </configuration>
                            </execution>
                        </executions>
                    </plugin>
                    <plugin>
                        <groupId>org.apache.maven.plugins</groupId>
                        <artifactId>maven-release-plugin</artifactId>
                        <version>2.5.2</version>
                        <configuration>
                            <goals>deploy</goals>
                            <autoVersionSubmodules>true</autoVersionSubmodules>
                            <useReleaseProfile>false</useReleaseProfile>
                            <releaseProfiles>release</releaseProfiles>
                            <tagNameFormat>v@{project.version}</tagNameFormat>
                            <scmCommentPrefix>[maven-release-plugin] [ci build-cli] [ci build-webdemo] -</scmCommentPrefix>
                        </configuration>
                    </plugin>
                </plugins>
            </build>
        </profile>
    </profiles>

</project>


================================================
FILE: run
================================================
#!/bin/bash

# Shortcut for mvn scala:run -Dlauncher=... -DaddArgs=...
# Example:
# lookup> ./run LAUNCHER ARG1 ARG2 ARG3
# is equivalent to
# lookup> mvn scala:run "-Dlauncher=LAUNCHER" "-DaddArgs=ARG1|ARG2|ARG3"

LAUNCHER="$1"

ADD_ARGS="$2"
for ARG in ${@:3}
do
  ADD_ARGS="$ADD_ARGS|$ARG"
done

mvn scala:run "-Dlauncher=$LAUNCHER" "-DaddArgs=$ADD_ARGS" 


================================================
FILE: scripts/index.sh
================================================
#!/bin/bash

readonly DBPEDIA_VERSION=$1
readonly LANG_i18n=$2
readonly DBPEDIA_DOWNLOADS="http://downloads.dbpedia.org"/$DBPEDIA_VERSION/core-i18n
readonly DBPEDIA_ROOT=~/lookup
readonly DBPEDIA_DATA=$DBPEDIA_ROOT/dbpedia_data/$DBPEDIA_VERSION
readonly DBPEDIA_INDEX=dbpedia-lookup-index/$LANG_i18n/$DBPEDIA_VERSION
readonly ALL_FILES=(redirects short_abstracts instance_types article_categories)

#+------------------------------------------------------------------------------------------------------------------------------+
#| Functions                                                                                                                    |
#+------------------------------------------------------------------------------------------------------------------------------+

# Error_exit function by William Shotts. http://stackoverflow.com/questions/64786/error-handling-in-bash
function error_exit
{
    echo -e "${PROGNAME}: ${1:-"Unknown Error"}" 1>&2
    exit 1
}

# The function used to create all the directories needed
function create_dir()
{
    if [ -e $1 ]; then
        echo -e $1" already exists. Skipping creating this directory!"
    else
        mkdir -p $1
    fi
}

# A helper function to download files from a given path. The first parameter is the path from where to download the file
# without the file name, the second states the file name, and the third is where to save that file
function download_file()
{
    # Only downloads if there is no current file or there is a newer version
    echo "$#"
    case "$#" in
        "3")
            wget -q --spider $1/$2
            if [ $? -eq 0 ] ; then
                wget -N $1/$2 --directory-prefix=$3
            else
                # The file can't be found. We can extract a substring with the file name and show it to the user
                error_exit "ERROR: The file '"$2"' cannot be found for download.\n"
            fi
            ;;
        "4")
            wget -q --spider $1 $2/$3
            if [ $? -eq 0 ] ; then
                wget -N $1 $2/$3 --directory-prefix=$4
            else
                # The file can't be found. We can extract a substring with the file name and show it to the user
                error_exit "ERROR: The file '"$3"' cannot be found for download.\n"
            fi
            ;;
        *)
            error_exit "ERROR: Incorrect number of parameters!";
    esac
    echo -e "done!\n"
}

#-----------------------------------------------------------------------------------------------------------------------------+
create_dir $DBPEDIA_DATA
create_dir $DBPEDIA_ROOT/$DBPEDIA_INDEX

for i in ${ALL_FILES[@]}
do
 download_file $DBPEDIA_DOWNLOADS/$LANG_i18n ${i}_$LANG_i18n.ttl.bz2 $DBPEDIA_DATA/$LANG_i18n    
done

for i in ${ALL_FILES[@]}
do
  bunzip2 -dc  $DBPEDIA_DATA/$LANG_i18n/${i}_$LANG_i18n.ttl.bz2 >  $DBPEDIA_DATA/$LANG_i18n/${i}_$LANG_i18n.nt
done

cat $DBPEDIA_DATA/$LANG_i18n/short_abstracts_$LANG_i18n.nt $DBPEDIA_DATA/$LANG_i18n/instance_types_$LANG_i18n.nt $DBPEDIA_DATA/$LANG_i18n/article_categories_$LANG_i18n.nt > $DBPEDIA_DATA/$LANG_i18n/all_dbpedia_data.nt

git clone https://github.com/dbpedia/lookup.git
cd lookup
mvn clean install

./run Indexer $DBPEDIA_ROOT/$DBPEDIA_INDEX $DBPEDIA_DATA/$LANG_i18n/redirects_$LANG_i18n.nt $DBPEDIA_DATA/$LANG_i18n/all_dbpedia_data.nt

cd $DBPEDIA_ROOT
tar -zcvf ${LANG_i18n}_$DBPEDIA_VERSION.tar.gz $DBPEDIA_INDEX


================================================
FILE: src/main/resources/logback.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<configuration>

    <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
        <encoder>
            <pattern>%-5level - %msg%n</pattern>
            <!-- <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern> -->
        </encoder>
    </appender>

    <root level="info">
        <appender-ref ref="STDOUT" />
    </root>
</configuration>


================================================
FILE: src/main/scala/org/dbpedia/lookup/entities/Entities.scala
================================================
package org.dbpedia.lookup.entities

import org.dbpedia.extraction.util.WikiUtil._

trait Uri   { val uri   : String }
trait Label { val label : String }

case class Redirect(uri: String) extends Uri

case class Template(uri: String) extends Uri

case class Category(uri: String) extends Uri with Label {
    val label: String = wikiDecode(uri.replace("http://dbpedia.org/resource/Category:", ""))
}

case class OntologyClass(uri: String) extends Uri with Label {

    val label: String = {
        if (uri endsWith "owl#Thing") {
            "owl#Thing"
        } else {
            val s = wikiDecode(uri.replace("http://dbpedia.org/ontology/", "")
                                  .replace("http://schema.org/", "")
            )
                s.replaceAll("([A-Z])", " $1").trim.toLowerCase
        }
        }
    }

case class Result(
    uri: String,
    description: String,
    classes: Set[OntologyClass],
    categories: Set[Category],
    templates: Set[Template],
    redirects: Set[Redirect],
    refCount: Int
) extends Uri with Label {
    val label: String = wikiDecode(uri.replace("http://dbpedia.org/resource/", ""))
}


================================================
FILE: src/main/scala/org/dbpedia/lookup/entities/EntitiesSerialization.scala
================================================
package org.dbpedia.lookup.entities

import scala.xml._
import net.liftweb.json._

trait ResultSerializer {
  def prettyPrint(results: Traversable[Result]) : String
}

class ResultJsonSerializer extends ResultSerializer {

  def prettyPrint(results: Traversable[Result]) : String = {

    import net.liftweb.json.JsonDSL._

    val json = ("results" -> results.map { result =>
      ("uri" -> result.uri) ~
      ("label" -> result.label) ~
      ("description" -> result.description) ~
      ("refCount" -> result.refCount) ~
      ("classes" -> result.classes.map(c => ("uri" -> c.uri) ~ ("label" -> c.label))) ~
      ("categories" -> result.categories.map(c => ("uri" -> c.uri) ~ ("label" -> c.label))) ~
      ("templates" -> result.templates.map(c => ("uri" -> c.uri))) ~
      ("redirects" -> result.redirects.map(c => ("uri" -> c.uri)))
    })

    pretty(render(json))
  }

}

class ResultXmlSerializer extends ResultSerializer {

  def prettyPrint(results: Traversable[Result]) : String = {
    val xml = serialize(results)
  /*  val printer = new scala.xml.PrettyPrinter(120, 4)
    "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" + printer.format(xml)*/
    "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" + xml.toString()
  }

  def serialize(results : Traversable[Result]) : Node = {
    <ArrayOfResult xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                   xmlns:xsd="http://www.w3.org/2001/XMLSchema"
                   xmlns="http://lookup.dbpedia.org/">
      { results.map(r => serialize(r)) }
    </ArrayOfResult>
  }

  def serialize(result : Result) : Node = {
    <Result>
      <Label>{result.label}</Label>
      <URI>{result.uri}</URI>
      <Description>{result.description}</Description>
      <Classes>{ urisWithLabels(result.classes, "Class") }</Classes>
      <Categories>{ urisWithLabels(result.categories, "Category") }</Categories>
      <Templates>{ uris(result.templates, "Template") }</Templates>
      <Redirects>{ uris(result.redirects, "Redirect") }</Redirects>
      <Refcount>{ result.refCount }</Refcount>
    </Result>
  }

  private def urisWithLabels[A <: Uri with Label](items: Set[A], nodeName: String) = {
    items.map(item => new Elem(null, nodeName, Null, TopScope, <Label>{item.label}</Label>, <URI>{item.uri}</URI>))
  }

  private def uris[A <: Uri](items: Set[A], nodeName: String) = {
    items.map(item => new Elem(null, nodeName, Null, TopScope, <URI>{item.uri}</URI>))
  }

}


================================================
FILE: src/main/scala/org/dbpedia/lookup/inputformat/DBpediaNTriplesInputFormat.scala
================================================
package org.dbpedia.lookup.inputformat

import org.semanticweb.yars.nx.parser.NxParser
import java.io.InputStream
import org.dbpedia.lookup.lucene.LuceneConfig

/**
 * Class to itereate over DBpedia NTriples dataset and
 */
class DBpediaNTriplesInputFormat(val dataSet: InputStream, val redirects: scala.collection.Set[String]) extends InputFormat {

    private val it = new NxParser(dataSet)

    val predicate2field = Map(
        "http://lexvo.org/ontology#label" -> LuceneConfig.Fields.SURFACE_FORM_KEYWORD,   // no DBpedia dataset, has to be created
        "http://dbpedia.org/property/refCount" -> LuceneConfig.Fields.REFCOUNT,  // no DBpedia dataset, has to be created
        "http://dbpedia.org/ontology/abstract" -> LuceneConfig.Fields.DESCRIPTION,
        "http://www.w3.org/2000/01/rdf-schema#comment" -> LuceneConfig.Fields.DESCRIPTION,
        "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" -> LuceneConfig.Fields.CLASS,
        "http://purl.org/dc/terms/subject" -> LuceneConfig.Fields.CATEGORY,
        "http://dbpedia.org/property/wikiPageUsesTemplate" -> LuceneConfig.Fields.TEMPLATE,  // not really necessary
        "http://dbpedia.org/ontology/wikiPageRedirects" -> LuceneConfig.Fields.REDIRECT      // not really necessary
    )

    override def foreach[U](f: ((String,String,String)) => U) {

        while(it.hasNext) {
            val triple = it.next
            val uri = triple(0).toString
            val pred = triple(1).toString
            val obj = triple(2).toString

            predicate2field.get(pred) match {
                case Some(field: String) if(redirects.isEmpty || !redirects.contains(uri)) => {
                    if(field == LuceneConfig.Fields.REDIRECT) {
                        f( (obj, field, uri) )   // make it a "hasRedirect" relation
                    }
                    else {
                        f( (uri, field, obj) )
                    }
                }
                case _ =>
            }
        }

    }

}

================================================
FILE: src/main/scala/org/dbpedia/lookup/inputformat/InputFormat.scala
================================================
package org.dbpedia.lookup.inputformat

trait InputFormat extends Traversable[(String, String, String)] {

}


================================================
FILE: src/main/scala/org/dbpedia/lookup/inputformat/PignlprocTSVInputFormat.scala
================================================
package org.dbpedia.lookup.inputformat

import java.io.InputStream
import org.dbpedia.lookup.lucene.LuceneConfig
import io.Source
import org.dbpedia.extraction.util.WikiUtil

/**
 * Class to itereate over a pignlproc nerd-stats result.
 */

class PignlprocTSVInputFormat(dataSet: InputStream, pSfGivenUriThreshold: Double, uriField: Int=0, sfField: Int=1, pSfGivenUriField: Int=3, refCountField: Int=6)
    extends InputFormat {

    val DBPEDIA_RESOURCE_NAMESPACE = "http://dbpedia.org/resource/"

    private val it = Source.fromInputStream(dataSet, "utf-8").getLines()

    override def foreach[U](f: ((String,String,String)) => U) {

        while(it.hasNext) {
            val elements = it.next().split("\t")

            val uri = DBPEDIA_RESOURCE_NAMESPACE + WikiUtil.wikiEncode(elements(uriField))
            val sf = elements(sfField)
            //val pUriGivenSf = elements(2)
            val pSfGivenUri = elements(pSfGivenUriField)
            //val pSf = elements(4)
            //val wikiPageId = elements(5)
            val uriCount = elements(refCountField)

            if (pSfGivenUri.toDouble > pSfGivenUriThreshold) {
                f( (uri, LuceneConfig.Fields.SURFACE_FORM_KEYWORD, sf) )
            }
            f( (uri, LuceneConfig.Fields.REFCOUNT, uriCount) )
        }

    }

}

================================================
FILE: src/main/scala/org/dbpedia/lookup/inputformat/WikiStatsExtractor.scala
================================================
package org.dbpedia.lookup.inputformat

import java.io.InputStream

import org.dbpedia.extraction.util.WikiUtil
import org.dbpedia.lookup.lucene.LuceneConfig

import scala.io.Source

class WikiStatsExtractor(dataSet: InputStream, pSfGivenUriThreshold: Double) extends InputFormat {

  private val it = Source.fromInputStream(dataSet, "utf-8").getLines()

  override def foreach[U](f: ((String,String,String)) => U) {

    while(it.hasNext) {
      val elements = it.next().split("\t")

      if (elements.size >= 3) {
        val uri = WikiUtil.wikiEncode(elements(1))
        val sf = elements(0)

        val uriCount = elements(2)

        f((uri, LuceneConfig.Fields.SURFACE_FORM_KEYWORD, sf))
        f((uri, LuceneConfig.Fields.REFCOUNT, uriCount))
      } 
    }
  }
}


================================================
FILE: src/main/scala/org/dbpedia/lookup/lucene/Indexer.scala
================================================
package org.dbpedia.lookup.lucene

import org.apache.lucene.store.FSDirectory
import org.apache.lucene.document.{Field, Document}
import org.apache.lucene.index.{IndexReader, Term, IndexWriter}
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream
import java.io.{FileInputStream, InputStream, File}
import org.semanticweb.yars.nx.parser.NxParser
import org.dbpedia.extraction.util.WikiUtil
import org.dbpedia.lookup.inputformat.{WikiStatsExtractor, InputFormat, DBpediaNTriplesInputFormat, PignlprocTSVInputFormat}
import org.apache.lucene.search.{IndexSearcher, TermQuery}
import org.dbpedia.lookup.util.Logging

/**
 * Indexes the lookup data to a Lucene directory.
 */
class Indexer(val indexDir: File) extends Logging {

    private val indexWriter = new IndexWriter(FSDirectory.open(indexDir), LuceneConfig.indexWriterConfig)
    indexWriter.commit()
    private val indexSearcher = new IndexSearcher(IndexReader.open(FSDirectory.open(indexDir)))
    logger.info("Directory "+indexDir+" opened for indexing")

    /**
     * Index a data file for the lookup service.
     */
    def index(dataTraversable: InputFormat) {
        var count = 0
        val collector = scala.collection.mutable.HashMap[String, scala.collection.mutable.HashMap[String, scala.collection.mutable.HashSet[String]]]()

        dataTraversable.foreach{ case (uri:String, field:String, value:String) => {
            val fields = collector.getOrElse(uri, scala.collection.mutable.HashMap[String, scala.collection.mutable.HashSet[String]]())
            val values: scala.collection.mutable.HashSet[String] = fields.getOrElse(field, scala.collection.mutable.HashSet[String]())
            values.add(value)
            fields.put(field, values)
            collector.put(uri, fields)

            count += 1
            if(count%100000 == 0) {
                logger.info(count+" data points read")
            }
            if(count%LuceneConfig.commitAfterDataPointsNum == 0) {
                updateIndex(collector)
                collector.clear()
            }
        }}
        updateIndex(collector)
        logger.info(count+" data points indexed. Done")

        //TODO remove?
        logger.info("Optimizing")
        indexWriter.optimize()
        logger.info("Done optimizing")
    }


    private def updateIndex(collector:scala.collection.mutable.HashMap[String,scala.collection.mutable.HashMap[String, scala.collection.mutable.HashSet[String]]]) {
        logger.info("Updating")
        collector.foreach(t => {
            val (uri, fields) = t
            updateDataForUri(uri, fields)
        })
        logger.info("Committing")
        indexWriter.commit()
}

    private def updateDataForUri(currentUri: String, fieldCollector:scala.collection.mutable.HashMap[String, scala.collection.mutable.HashSet[String]]) {

        val uriTerm = new Term(LuceneConfig.Fields.URI, currentUri)
        val hits = indexSearcher.search(new TermQuery(uriTerm), 2)

        val doc =
            if (hits.scoreDocs.length == 1) {
                indexSearcher.doc(hits.scoreDocs(0).doc)
            } else if (hits.scoreDocs.length == 0) {
                val d = new Document
                updateField(d, new Field(LuceneConfig.Fields.URI, uriTerm.text, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO))

                val label = WikiUtil.wikiDecode(uriTerm.text.replace("http://dbpedia.org/resource/", ""))
                updateField(d, new Field(LuceneConfig.Fields.SURFACE_FORM_KEYWORD, label, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO))

                val prefixTerm = LuceneConfig.PrefixSearchPseudoAnalyzer.analyze(label)
                updateField(d, new Field(LuceneConfig.Fields.SURFACE_FORM_PREFIX, prefixTerm, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO))

                d
            } else { //  if (hits.scoreDocs.length > 1) {
                throw new IllegalStateException("Given Term matches more than 1 document in the index.")
            }

        indexWriter.updateDocument(uriTerm, getUpdatedDocument(doc, uriTerm, fieldCollector))
    }

    def close() {
        indexWriter.close()
        logger.info("Closed index "+indexDir)
    }


    private def getUpdatedDocument(doc: Document, uriTerm: Term, fields: scala.collection.Map[String, scala.collection.Set[String]]): Document = {
        for((field, valueSet) <- fields) {
            val addedPrefixTerms = new scala.collection.mutable.HashSet[String]()
            for(value <- valueSet) {
                if(field == LuceneConfig.Fields.SURFACE_FORM_KEYWORD) {
                    updateField(doc, new Field(LuceneConfig.Fields.SURFACE_FORM_KEYWORD, value, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO))

                    val prefixTerm = LuceneConfig.PrefixSearchPseudoAnalyzer.analyze(value)
                    if (!addedPrefixTerms.contains(prefixTerm)) {
                        updateField(doc, new Field(LuceneConfig.Fields.SURFACE_FORM_PREFIX, prefixTerm, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO))
                        addedPrefixTerms.add(prefixTerm)
                    }
                }
                else {
                    updateField(doc, new Field(field, value, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO))
                }
            }
        }

        doc
    }

    private def updateField(doc: Document, field: Field) {
        doc.add(field)
    }

}


object Indexer extends Logging {

    private val pSfGivenUriThreshold = 0.001

    /**
     * Index data to a directory.
     */
    def main(args: Array[String]) {
        val indexDir = new File(args(0))
        val redirectsFile = new File(args(1))
        val data = args.drop(2)

        val indexer = new Indexer(indexDir)

        for(fileName <- data) {
            var in: InputStream = new FileInputStream(fileName)
            if (fileName.endsWith(".bz2")) {
                in = new BZip2CompressorInputStream(in)
            }

            logger.info("Indexing "+fileName)
            indexer.index(getDataInput(fileName, in, redirectsFile))
            logger.info("Done Indexing "+fileName)
        }
        indexer.close()
    }

    private def getDataInput(fileName: String, inputStream: InputStream, redirectsFile: File) = {
        if (fileName.contains(".nt") || fileName.contains(".nq")|| fileName.contains(".ttl")) {
            logger.debug("using DBpediaNTriplesInputFormat")
            new DBpediaNTriplesInputFormat(inputStream, getRedirectUris(redirectsFile))
        }
        else if (fileName.contains(".tsv")) {
            logger.debug("using PignlprocTSVInputFormat")
            val refCountField = if (fileName.contains("_alx")) 7 else 6
            new PignlprocTSVInputFormat(inputStream, pSfGivenUriThreshold, refCountField=refCountField)
        } else if (fileName.contains("pairCounts")) {
            new WikiStatsExtractor(inputStream, pSfGivenUriThreshold)
        }
        else {
            throw new IllegalArgumentException("only know how to handle file types .nt, .nq and .tsv")
        }
    }

    private def getRedirectUris(redirectsFile: File): scala.collection.Set[String] = {
        val reds = new scala.collection.mutable.HashSet[String]()
        logger.info("Reading redirects from "+redirectsFile)
        val parser = new NxParser(new FileInputStream(redirectsFile))
        while (parser.hasNext) {
            val triple = parser.next
            if(triple(1).toString != "http://dbpedia.org/ontology/wikiPageRedirects") {
                throw new Exception("predicate must be http://dbpedia.org/ontology/wikiPageRedirects; got "+triple(1).toString)
            }
            reds.add(triple(0).toString)
        }
        logger.info("Done reading redirects")
        reds
    }

}


================================================
FILE: src/main/scala/org/dbpedia/lookup/lucene/LuceneConfig.scala
================================================
package org.dbpedia.lookup.lucene

import org.apache.lucene.index.IndexWriterConfig
import org.apache.lucene.util.Version
import org.apache.lucene.analysis._
import java.io.{Reader, File}
import standard.{StandardFilter, StandardAnalyzer}
import org.apache.lucene.queryParser.QueryParser
import org.dbpedia.lookup.util.Logging

/**
 * Created by IntelliJ IDEA.
 * User: Max
 * Date: 14.01.11
 * Time: 15:10
 * Lucene configuration data.
 */

object LuceneConfig extends Logging {

    // default_index_path is not used any more

    // Overwrite existing directories when indexing (must be true if target directory does not exist)
    val overwriteExisting = true

    // number of data points to read in memory before updating the index
    val commitAfterDataPointsNum = 1500000

    // Lucene Version
    val version = Version.LUCENE_36

    // Analyzer for KeywordSearch
    val analyzer = new StandardAnalyzer(version, StopAnalyzer.ENGLISH_STOP_WORDS_SET)

    // index writer configuration
    val indexWriterConfig = new IndexWriterConfig(version, analyzer)

    //HACK!: Analyzer for PrefixSearch. The result is converted back to a string and indexed/search NOT_ANALYZED!
    object PrefixSearchPseudoAnalyzer {
        private val prefixSearchQueryParser = new QueryParser(version, Fields.SURFACE_FORM_KEYWORD, analyzer)

        def analyze(keyword: String) = {
            prefixSearchQueryParser.parse('"' + QueryParser.escape(keyword) + '"')
                    .toString.replace(Fields.SURFACE_FORM_KEYWORD+":", "")
                             .replaceFirst("^\"", "")
                             .replaceFirst("\"$", "")
                             .toLowerCase
        }
    }

    object Fields {
        val URI = "URI"
        val SURFACE_FORM_KEYWORD = "SURFACE_FORM_KEYWORD"
        val SURFACE_FORM_PREFIX = "SURFACE_FORM_PREFIX"
        val REFCOUNT = "REFCOUNT"

        val DESCRIPTION = "DESCRIPTION"
        val CLASS = "CLASS"
        val CATEGORY = "CATEGORY"
        val TEMPLATE = "TEMPLATE"
        val REDIRECT = "REDIRECT"
    }

}


================================================
FILE: src/main/scala/org/dbpedia/lookup/lucene/Searcher.scala
================================================
package org.dbpedia.lookup.lucene

import java.io.File
import org.apache.lucene.store.FSDirectory
import org.apache.lucene.search._
import org.apache.lucene.index.{Term, IndexReader}
import org.dbpedia.extraction.util.WikiUtil
import org.dbpedia.lookup.entities._
import org.apache.lucene.queryParser.QueryParser

/**
 * Created by IntelliJ IDEA.
 * User: Max Jakob
 * Date: 14.01.11
 * Time: 14:43
 * Class to query the Lucene index for the best URI given a surface form.
 */

class Searcher(val indexDir: File) {

    private val indexReader = IndexReader.open(FSDirectory.open(indexDir))
    private val indexSearcher = new IndexSearcher(indexReader)
    private val sort = new Sort(new SortField(LuceneConfig.Fields.REFCOUNT, SortField.INT, true))
    private val queryParser = new QueryParser(LuceneConfig.version, LuceneConfig.Fields.SURFACE_FORM_KEYWORD, LuceneConfig.analyzer)


    def keywordSearch(keyword: String, ontologyClass: String="", maxResults: Int=5): List[Result] = {
        if(keyword == null || keyword.isEmpty) {
            return List.empty
        }
        val query = getQuery(keyword, ontologyClass, prefixQuery = false)
        search(query, maxResults)
    }

    def prefixSearch(keyword: String, ontologyClass: String="", maxResults: Int=5): List[Result] = {
        if(keyword == null || keyword.isEmpty) {
            return List.empty
        }
        val query = getQuery(keyword, ontologyClass, prefixQuery = true)
        search(query, maxResults)
    }

    def close() {
        indexSearcher.close()
        indexReader.close()
    }


    private def search(query: Query, maxResults: Int): List[Result] = {
        indexSearcher.search(query, null, maxResults, sort).scoreDocs.toList.map(getResult)
    }

    private def getQuery(keyword: String, ontologyClass: String, prefixQuery: Boolean = false): Query = {
        val bq = new BooleanQuery

        val decodedKeyword = WikiUtil.wikiDecode(keyword)

        if(prefixQuery) {
            val pseudoAnalyzedKeyword = LuceneConfig.PrefixSearchPseudoAnalyzer.analyze(decodedKeyword)
            val prefixQuery = new PrefixQuery(new Term(LuceneConfig.Fields.SURFACE_FORM_PREFIX, pseudoAnalyzedKeyword))
            bq.add(prefixQuery, BooleanClause.Occur.MUST)
        }
        else {
            val escapedKeyword = QueryParser.escape(decodedKeyword)
            val phraseQuery = synchronized { // query parser is not thread safe!
              queryParser.parse('"' + escapedKeyword + '"')  //quotes keep word order
            }
            bq.add(phraseQuery, BooleanClause.Occur.MUST)
        }

        getOntologyClassQuery(ontologyClass) match {
            case Some(q: Query) => bq.add(q, BooleanClause.Occur.MUST)
            case _ =>
        }

        bq
    }

    private def getOntologyClassQuery(ontologyClass: String): Option[Query] = {
        if(ontologyClass == null || ontologyClass.trim == "") {
            None
        }
        else if(ontologyClass.toLowerCase == "thing" || ontologyClass.toLowerCase.endsWith("#thing")) {
            None
        }
        else {
            val ontologyPrefix = "http://dbpedia.org/ontology/"
            //is full class URI
            if(ontologyClass startsWith ontologyPrefix) {
                Some(new TermQuery(new Term(LuceneConfig.Fields.CLASS, ontologyClass.trim)))
            }
            //abbreviated namespace prefix
            else if(ontologyClass.startsWith("dbpedia:") || ontologyClass.startsWith("dbpedia-owl:")) {
                val c = ontologyClass.trim.replace("dbpedia:", "").replace("dbpedia-owl:", "")
                Some(new TermQuery(new Term(LuceneConfig.Fields.CLASS, ontologyPrefix+c)))
            }
            //label given: make camel case and attach namespace
            else {
                val camel = ontologyClass.trim.split(" ").map(_.capitalize).mkString("")
                Some(new TermQuery(new Term(LuceneConfig.Fields.CLASS, ontologyPrefix+camel)))
            }
        }
    }

    private def getResult(scoreDoc: ScoreDoc): Result = {
        val doc = indexReader.document(scoreDoc.doc)

        val uri: String = doc.get(LuceneConfig.Fields.URI)
        val description: String = doc.get(LuceneConfig.Fields.DESCRIPTION)
        val ontologyClasses: Set[OntologyClass] = doc.getValues(LuceneConfig.Fields.CLASS) match {
            case null => Set.empty
            case classes => classes.map(uri => new OntologyClass(uri)).toSet
        }
        val categories: Set[Category] = doc.getValues(LuceneConfig.Fields.CATEGORY) match {
            case null => Set.empty
            case cats => cats.map(uri => new Category(uri)).toSet
        }
        val templates: Set[Template] = doc.getValues(LuceneConfig.Fields.TEMPLATE) match {
            case null => Set.empty
            case temps => temps.map(uri => new Template(uri)).toSet
        }
        val redirects: Set[Redirect] = doc.getValues(LuceneConfig.Fields.REDIRECT) match {
            case null => Set.empty
            case reds => reds.map(uri => new Redirect(uri)).toSet
        }
        val refCount: Int = doc.get(LuceneConfig.Fields.REFCOUNT) match {
            case null => 0
            case count: String => count.toInt
        }

        new Result(uri, description, ontologyClasses, categories, templates, redirects, refCount)
    }

}


================================================
FILE: src/main/scala/org/dbpedia/lookup/server/LookupResource.scala
================================================
package org.dbpedia.lookup.server

import javax.ws.rs._
import javax.ws.rs.core.Context
import core.Response
import org.dbpedia.lookup.entities._
import org.dbpedia.lookup.lucene.Searcher
import org.dbpedia.lookup.util.Logging

/**
 * Controller for DBpedia Lookup web service.
 */
@Path("/api/search{ext:(.asmx)?}")
@Produces(Array("application/xml", "application/json"))
class LookupResource extends Logging {

    @Context
    var searcher : Searcher = _

    @DefaultValue("") @HeaderParam("accept")
    var accept   : String   = _

    @DefaultValue("") @QueryParam("QueryString")
    var query    : String   = _

    @DefaultValue("") @QueryParam("QueryClass")
    var ontologyClass : String = _

    @DefaultValue("5") @QueryParam("MaxHits")
    var maxHits : Int = _

    @GET
    @Path("/KeywordSearch")
    def keywordSearch : Response = {
        val results = searcher.keywordSearch(query, ontologyClass, maxHits)
        logger.info("KeywordSearch found "+results.length+": MaxHits="+maxHits.toString+" QueryClass="+ontologyClass+" QueryString="+query)
        ok(results)
    }

    @GET
    @Path("/PrefixSearch")
    def prefixSearch : Response = {
        val results = searcher.prefixSearch(query, ontologyClass, maxHits)
        logger.info("PrefixSearch found "+results.length+": MaxHits="+maxHits.toString+" QueryClass="+ontologyClass+" QueryString="+query)
        ok(results)
    }

    // Sets the necessary headers in order to enable CORS
    private def ok(results: List[Result]): Response = {
        Response.ok().entity(serialize(results)).header("Access-Control-Allow-Origin", "*").build()
    }

    private def serialize(results: List[Result]): String = {
        val serializer = (accept contains "application/json") match {
            case true  => new ResultJsonSerializer
            case _     => new ResultXmlSerializer
        }
        serializer.prettyPrint(results)
    }

}


================================================
FILE: src/main/scala/org/dbpedia/lookup/server/Server.scala
================================================
package org.dbpedia.lookup.server

import com.sun.jersey.api.container.httpserver.HttpServerFactory
import com.sun.jersey.api.core.ClassNamesResourceConfig
import com.sun.jersey.spi.inject.SingletonTypeInjectableProvider
import javax.ws.rs.core.Context
import java.net.URI
import org.dbpedia.lookup.lucene.Searcher
import java.io.File
import org.dbpedia.lookup.util.Logging

/**
 * Created by IntelliJ IDEA.
 * User: Max
 * Date: 17.01.11
 * Time: 13:48
 * DBpedia Lookup Server
 */

class SearcherProvider(searcher: Searcher)
    extends SingletonTypeInjectableProvider[Context, Searcher](classOf[Searcher], searcher)

class Server(port: Int, searcher: Searcher) {

    val resources = {
        val config = new ClassNamesResourceConfig(classOf[LookupResource])
        config.getSingletons.add(new SearcherProvider(searcher))
        config
    }

    val serverUri = new URI("http://localhost:" + port.toString + "/")
    val server    = HttpServerFactory.create(serverUri, resources)

    def start() {
        server.start()
    }
    def stop() {
        server.stop(0)
    }

}

object Server extends Logging {

    @volatile private var running = true

    def main(args : Array[String]) {
        val indexDir = new File(args(0))

        val port   = System.getProperty("http.port", "1111").toInt
        val server = new Server(port, new Searcher(indexDir))

        server.start()

        val baseUri = server.serverUri.toString

        logger.info("Server started in " + System.getProperty("user.dir") + " listening on " + baseUri)

        while(running) {
            Thread.sleep(100)
        }

        //Stop the HTTP server
        server.stop()
    }

}


================================================
FILE: src/main/scala/org/dbpedia/lookup/util/Logging.scala
================================================
package org.dbpedia.lookup.util

import org.slf4j.LoggerFactory

trait Logging {
    protected val logger = LoggerFactory.getLogger(getClass.getName)
}


================================================
FILE: src/test/resources/data.nt
================================================
<http://dbpedia.org/resource/Berlin> <http://www.w3.org/2000/01/rdf-schema#comment> "Berlin is the capital city of Germany and one of the 16 states of Germany. With a population of 3.5\u00A0million people, Berlin is Germany's largest city and is the second most populous city proper and the eighth most populous urban area in the European Union. Located in northeastern Germany, it is the center of the Berlin-Brandenburg Metropolitan Region, which has 5.9\u00A0million residents from over 190 nations. Located in the European Plains, Berlin is influenced by a temperate seasonal climate."@en .
<http://dbpedia.org/resource/Berlin> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/City> .
<http://dbpedia.org/resource/Berlin> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/City> .
<http://dbpedia.org/resource/Berlin> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Settlement> .
<http://dbpedia.org/resource/Berlin> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/PopulatedPlace> .
<http://dbpedia.org/resource/Berlin> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Place> .
<http://dbpedia.org/resource/Berlin> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Place> .
<http://dbpedia.org/resource/Berlin> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Thing> .
<http://dbpedia.org/resource/Berlin> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Berlin> .
<http://dbpedia.org/resource/Berlin> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Capitals_in_Europe> .
<http://dbpedia.org/resource/Berlin> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:City-states> .
<http://dbpedia.org/resource/Berlin> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:European_Capitals_of_Culture> .
<http://dbpedia.org/resource/Berlin> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:German_state_capitals> .
<http://dbpedia.org/resource/Berlin> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Host_cities_of_the_Summer_Olympic_Games> .
<http://dbpedia.org/resource/Berlin> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Members_of_the_Hanseatic_League> .
<http://dbpedia.org/resource/Berlin> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Populated_places_established_in_the_13th_century> .
<http://dbpedia.org/resource/Berlin> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:States_and_territories_established_in_1237> .
<http://dbpedia.org/resource/Berlin> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:States_of_Germany> .
<http://dbpedia.org/resource/Berlin> <http://lexvo.org/ontology#label> "Berlin"@en .
<http://dbpedia.org/resource/Berlin> <http://lexvo.org/ontology#label> "capital of Germany"@en .
<http://dbpedia.org/resource/Berlin> <http://dbpedia.org/property/refCount> "100"^^<http://www.w3.org/2001/XMLSchema#integer> .
<http://dbpedia.org/resource/Beirut> <http://www.w3.org/2000/01/rdf-schema#comment> "Beirut is the capital and largest city of Lebanon. As there has been no recent population census, the exact population is unknown; estimates in 2007 ranged from slightly less than 1\u00A0million to slightly more than 2\u00A0million. Located on a peninsula at the midpoint of Lebanon's Mediterranean coast, it serves as the country's largest and main seaport. The Beirut metropolitan area consists of the city and its suburbs."@en .
<http://dbpedia.org/resource/Beirut> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Settlement> .
<http://dbpedia.org/resource/Beirut> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/PopulatedPlace> .
<http://dbpedia.org/resource/Beirut> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Place> .
<http://dbpedia.org/resource/Beirut> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Place> .
<http://dbpedia.org/resource/Beirut> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Thing> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Beirut> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Amarna_letters_locations> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Ancient_cities> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Ancient_Greek_sites_in_Lebanon> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Ancient_mints> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Archaeological_sites_in_Lebanon> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Capitals_in_Asia> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Populated_places_in_the_Beirut_Governorate> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Populated_coastal_places_in_Lebanon> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Crusades> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Hellenistic_colonies> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Mediterranean> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Mediterranean_port_cities_and_towns_in_Lebanon> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Phoenician_cities> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Phoenician_sites_in_Lebanon> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Roman_colonies> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Roman_sites_in_Lebanon> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Populated_places_established_in_the_3rd_millennium_BC> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Paleolithic> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Neolithic_settlements> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Fertile_Crescent> .
<http://dbpedia.org/resource/Beirut> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Ancient_Levant> .
<http://dbpedia.org/resource/Beirut> <http://lexvo.org/ontology#label> "Beirut"@en .
<http://dbpedia.org/resource/Beirut> <http://lexvo.org/ontology#label> "largest city of Lebanon"@en .
<http://dbpedia.org/resource/Beirut> <http://dbpedia.org/property/refCount> "99"^^<http://www.w3.org/2001/XMLSchema#integer> .
<http://dbpedia.org/resource/Beirut_(band)> <http://www.w3.org/2000/01/rdf-schema#comment> "Beirut is an American band which was originally the solo musical project of Santa Fe native Zachary Francis Condon, and later expanded into a band. The band's first performances were in New York, in May 2006, to support the release of their debut album, Gulag Orkestar. Beirut's music combines elements of indie-rock and world music."@en .
<http://dbpedia.org/resource/Beirut_(band)> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Band> .
<http://dbpedia.org/resource/Beirut_(band)> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Organisation> .
<http://dbpedia.org/resource/Beirut_(band)> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Organization> .
<http://dbpedia.org/resource/Beirut_(band)> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Agent> .
<http://dbpedia.org/resource/Beirut_(band)> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Thing> .
<http://dbpedia.org/resource/Beirut_(band)> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/MusicGroup> .
<http://dbpedia.org/resource/Beirut_(band)> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:American_indie_rock_groups> .
<http://dbpedia.org/resource/Beirut_(band)> <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:Musical_groups_from_New_Mexico> .
<http://dbpedia.org/resource/Beirut_(band)> <http://lexvo.org/ontology#label> "Beirut"@en .
<http://dbpedia.org/resource/Beirut_(band)> <http://lexvo.org/ontology#label> "American band"@en .
<http://dbpedia.org/resource/Beirut_(band)> <http://dbpedia.org/property/refCount> "98"^^<http://www.w3.org/2001/XMLSchema#integer> .


================================================
FILE: src/test/resources/logback-test.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
    <root>
        <level value="OFF"/>
    </root>
</configuration>


================================================
FILE: src/test/resources/redirects.nt
================================================
<http://dbpedia.org/resource/CityBerlin> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Berlin> .
<http://dbpedia.org/resource/Berlin,_Germany> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Berlin> .
<http://dbpedia.org/resource/Capital_of_East_Germany> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Berlin> .
<http://dbpedia.org/resource/DEBER> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Berlin> .
<http://dbpedia.org/resource/Berullin> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Berlin> .
<http://dbpedia.org/resource/Berulin> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Berlin> .
<http://dbpedia.org/resource/Bereullin> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Berlin> .
<http://dbpedia.org/resource/UN/LOCODE:DEBER> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Berlin> .
<http://dbpedia.org/resource/Berlin-Zentrum> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Berlin> .
<http://dbpedia.org/resource/Berlib> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Berlin> .
<http://dbpedia.org/resource/Land_Berlin> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Berlin> .
<http://dbpedia.org/resource/Berlin.de> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Berlin> .
<http://dbpedia.org/resource/Berlin-ru.net> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Berlin> .
<http://dbpedia.org/resource/Berlin-turkish.com> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Berlin> .
<http://dbpedia.org/resource/Berlin-china.net> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Berlin> .
<http://dbpedia.org/resource/Berlin_(Germany)> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Berlin> .
<http://dbpedia.org/resource/Federal_State_of_Berlin> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Berlin> .
<http://dbpedia.org/resource/City_of_Berlin> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Berlin> .
<http://dbpedia.org/resource/Historical_sites_in_berlin> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Berlin> .
<http://dbpedia.org/resource/Bayrut> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Beirut,_Lebanon> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Beroth> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Capital_of_Lebanon> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Beyrouth> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Beyreuth> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Beyrut> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Bayr%C3%BBt> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Beyrout> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/West_Beirut> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Bairut> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Berytus> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Beiruit> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Pigeon's_Rock> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/BEIRUT> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Bayr%C5%ABt> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Biruta> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Beirut_District> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Beiruth> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Galerie_Semaan> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Bkirki> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/%D8%A8%D9%8A%D8%B1%D9%88%D8%AA> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/West_Beirut_lebanon> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Beirut_west> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/East_Beirut> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/East_beirut> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Laodicea_in_Phoenicia> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Beiro%C3%BBt> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Asia/Beirut> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Rmeil> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Beirut_I> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Beirut_II> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Beirut_III> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Beirut_IV> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Beirut_V> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Beirut_VI> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Beirut_VII> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Minet_el_Hosn> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Um_el_Khatib> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Furn_esh_Shebbak> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Plateau_Tabet> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Furn_esh_Shebbak_(river_banks)> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Rivoli_Cinema> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Byblos_Cinema> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut> .
<http://dbpedia.org/resource/Zach_Condon> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut_(band)> .
<http://dbpedia.org/resource/Beirut_band> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut_(band)> .
<http://dbpedia.org/resource/Beirut_(musician)> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut_(band)> .
<http://dbpedia.org/resource/Beirutando> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut_(band)> .
<http://dbpedia.org/resource/Zach_condon> <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/Beirut_(band)> .


================================================
FILE: src/test/scala/org/dbpedia/lookup/IntegrationTest.scala
================================================
package org.dbpedia.lookup

import org.dbpedia.lookup.lucene._
import org.dbpedia.lookup.server._
import com.sun.jersey.api.client._
import net.liftweb.json._
import scala.xml._

import org.scalatest.FunSuite
import org.scalatest.BeforeAndAfterAll

/**
 * Full stack test
 *
 * 1. build index
 * 2. Start server
 * 3. Interrogate server via Jersey client over HTTP
 * 4. Kill server, delete index
 */
class IntegrationTest extends FunSuite with BeforeAndAfterAll {

  val tmpDir = TestUtils.tempDirectory
  val port   = TestUtils.tempPort

  var server : Server = _

  override def beforeAll() {
    Indexer.main(Array(tmpDir.toString, "src/test/resources/redirects.nt", "src/test/resources/data.nt"))
    server = new Server(port, new Searcher(tmpDir))
    server.start
  }

  override def afterAll() {
    server.stop
    tmpDir.delete
  }

  def get(path: String, accepts: String = "application/xml") = {
    val client   = new Client
    val resource = client.resource("http://localhost:" + port.toString + path)
    resource.accept(accepts).get(classOf[ClientResponse])
  }

  test("KeywordSearch works") {
    val body   = get("/api/search/KeywordSearch?QueryString=Beirut").getEntity(classOf[String])
    val xml    = XML.loadString(body)
    assert((xml \ "Result" \ "Label").head.text == "Beirut")
    assert((xml \ "Result" \ "Label").tail.head.text == "Beirut (band)")
    assert((xml \ "Result").size == 2)
  }

  test("PrefixSearch works") {
    val body   = get("/api/search/PrefixSearch?QueryString=berl").getEntity(classOf[String])
    val xml    = XML.loadString(body)
    assert((xml \ "Result" \ "Label").head.text == "Berlin")
    assert((xml \ "Result").size == 1)
  }

  test("QueryClass works") {
    val body   = get("/api/search/KeywordSearch?QueryClass=place&QueryString=Beirut").getEntity(classOf[String])
    val xml    = XML.loadString(body)
    assert((xml \ "Result" \ "Label").head.text == "Beirut")
    assert((xml \ "Result").size == 1)
  }

  test("MaxHits works") {
    val body = get("/api/search/KeywordSearch?MaxHits=1&QueryString=beirut").getEntity(classOf[String])
    val xml  = XML.loadString(body)
    assert((xml \ "Result").size == 1)
  }

  test("legacy .asmx in url is optional") {
    assert(get("/api/search.asmx/KeywordSearch").getStatus == 200)
    assert(get("/api/search/KeywordSearch").getStatus == 200)
  }

  test("json results are returned when correct accepts header given") {
    val response = get("/api/search/KeywordSearch", "application/json")
    assert(response.getType.toString == "application/json")
    assert(parse(response.getEntity(classOf[String])) \\ "results" == JArray(List()))
  }

}


================================================
FILE: src/test/scala/org/dbpedia/lookup/TestUtils.scala
================================================
package org.dbpedia.lookup

import org.dbpedia.lookup.lucene._
import org.dbpedia.lookup.server._
import java.io.File

object TestUtils {

  def tempDirectory : File = {
    val file = File.createTempFile("lookup", "")
    file.delete
    file.mkdir
    file
  }

  def tempPort : Int = new java.net.ServerSocket(0).getLocalPort

}


================================================
FILE: src/test/scala/org/dbpedia/lookup/entities/EntitiesSerializationTest.scala
================================================
package org.dbpedia.lookup.entities

import org.scalatest.FunSuite
import net.liftweb.json._

trait SerializationTest extends FunSuite {

  val template = new Template("http://en.wikipedia.org/wiki/Template:Infobox")

  val redirect = new Redirect("http://en.wikipedia.org/wiki/A_page")

  val klass    = new OntologyClass("http://dbpedia.org/ontology/City")

  val category = new Category("http://dbpedia.org/resource/Category:Berlin")

  val result   = new Result(
     "http://dbpedia.org/resource/Berlin",
     "Berlin is the capital city of Germany & <> ...",
     Set(klass),
     Set(category),
     Set(template),
     Set(redirect),
     100
   )

}

class EntitiesJsonSerializationTest extends SerializationTest {

  val serializer = new ResultJsonSerializer

  test("a list of result entities should serialize to json correctly") {
    implicit val formats = net.liftweb.json.DefaultFormats

    val json = serializer.prettyPrint(List(result, result))
    val data = Serialization.read[Map[String, List[Result]]](json)

    assert(data("results").size == 2)
    assert(data("results").head == result)
  }

}

class EntitiesXmlSerializationTest extends SerializationTest {

  val serializer = new ResultXmlSerializer

  test("a list of result entities should serialize to xml correctly") {
    val xml = serializer.serialize(List(result, result))
    assert((xml \ "Result").size == 2)
  }

  test("the result entity should serialize to XML correctly") {

    val xml = serializer.serialize(result)

    assert((xml \ "Label").text == result.label)
    assert((xml \ "URI").text == result.uri)
    assert((xml \ "Description").text == result.description)
    assert((xml \ "Refcount").text == result.refCount.toString)

    assert((xml \ "Classes" \ "Class" \ "URI").text == result.classes.head.uri)
    assert((xml \ "Classes" \ "Class" \ "Label").text == result.classes.head.label)

    assert((xml \ "Categories" \ "Category" \ "URI").text == result.categories.head.uri)
    assert((xml \ "Categories" \ "Category" \ "Label").text == result.categories.head.label)

    assert((xml \ "Templates" \ "Template" \ "URI").text == result.templates.head.uri)
    assert((xml \ "Redirects" \ "Redirect" \ "URI").text == result.redirects.head.uri)

  }

}


================================================
FILE: src/test/scala/org/dbpedia/lookup/entities/EntitiesTest.scala
================================================
package org.dbpedia.lookup.entities

import org.scalatest.FunSuite

class EntitiesTest extends FunSuite {

  test("category entity has correct label for uri") {
    val category = new Category("http://dbpedia.org/resource/Category:Berlin")
    assert(category.label == "Berlin", "category label incorrect")
  }

  test("class entity has correct label for uri") {
    val klass = new OntologyClass("http://dbpedia.org/ontology/City")
    assert(klass.label == "city", "class label incorrect")
  }

  test("class entity has correct label for owl#Thing") {
    val klass = new OntologyClass("http://www.w3.org/2002/07/owl#Thing")
    assert(klass.label == "owl#Thing", "class label incorrect")
  }

  test("result entity has correct label for uri") {
    val result = new Result(
      "http://dbpedia.org/resource/Berlin",
      "Some description ...",
      Set[OntologyClass](),
      Set[Category](),
      Set[Template](),
      Set[Redirect](),
      100
    )
    assert(result.label == "Berlin", "result label incorrect")
  }

}