001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.text.nlp.namedentity; 031 032import java.io.IOException; 033 034import org.apache.lucene.analysis.standard.StandardAnalyzer; 035import org.apache.lucene.document.Document; 036import org.apache.lucene.document.Field; 037import org.apache.lucene.document.FieldType; 038import org.apache.lucene.index.CorruptIndexException; 039import org.apache.lucene.index.IndexWriter; 040import org.apache.lucene.index.IndexWriterConfig; 041import org.apache.lucene.store.Directory; 042import org.apache.lucene.store.LockObtainFailedException; 043import org.apache.lucene.util.Version; 044 045/** 046 * Wrapper around a lucene index constructor 047 * 048 * @author Laurence Willmore (lgw1e10@ecs.soton.ac.uk) 049 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 050 * 051 */ 052public class QuickIndexer { 053 054 private final Directory index; 055 private IndexWriter writer; 056 private boolean finalised = false; 057 private final StandardAnalyzer analyzer; 058 059 /** 060 * @param index 061 * construct a lucene index in this directory 062 */ 063 public QuickIndexer(Directory index) { 064 this.index = index; 065 analyzer = new StandardAnalyzer(Version.LUCENE_40); 066 final IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer); 067 try { 068 writer = new IndexWriter(index, config); 069 } catch (final CorruptIndexException e) { 070 e.printStackTrace(); 071 } catch (final LockObtainFailedException e) { 072 e.printStackTrace(); 073 } catch (final IOException e) { 074 e.printStackTrace(); 075 } 076 } 077 078 /** 079 * @param entry 080 * document to index 081 * @throws CorruptIndexException 082 * @throws IOException 083 */ 084 public void addDocument(Document entry) throws CorruptIndexException, IOException { 085 if (!finalised) 086 writer.addDocument(entry); 087 } 088 089 /** 090 * construct a document from names, values and types 091 * 092 * @param names 093 * @param values 094 * @param type 095 * @throws CorruptIndexException 096 * @throws IOException 097 */ 098 public void addDocumentFromFields(String[] names, String[] values, FieldType[] type) throws CorruptIndexException, 099 IOException 100 { 101 if (!finalised) { 102 final Document doc = new Document(); 103 for (int i = 0; i < names.length; i++) { 104 doc.add(new Field(names[i], values[i], type[i])); 105 } 106 writer.addDocument(doc); 107 } 108 } 109 110 /** 111 * call {@link IndexWriter#close()} 112 * 113 * @throws CorruptIndexException 114 * @throws IOException 115 */ 116 public void finalise() throws CorruptIndexException, IOException { 117 writer.close(); 118 finalised = true; 119 } 120 121 /** 122 * @return the underlying {@link Directory} 123 */ 124 public Directory getIndex() { 125 return index; 126 } 127 128 /** 129 * @return the underlying {@link StandardAnalyzer} 130 */ 131 public StandardAnalyzer getAnalyzer() { 132 return analyzer; 133 } 134 135}