Morphological analysis is performed by default in Japanese analysis of Solr. This is the code when you want to use morphological analysis in Java. In the management console, you can get the same result by using Verbose Output on the Analysis page.
package hello.solr;
import java.util.ArrayList;
import java.util.HashMap;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.DocumentAnalysisRequest;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
public class HelloAnalysisJapaneseSimple {
@SuppressWarnings({ "unchecked", "rawtypes" })
static public void main(String[] args) throws Exception {
String fieldName = "field_text_ja";
String coreName = "core_nlp";
String text = "Hello. It's nice weather today, is not it. I am an employee of Nissan Motor Co., Ltd.";
HashMap<String, SolrInputField> fields = new HashMap<String, SolrInputField>();
// Document
SolrInputDocument doc = new SolrInputDocument(fields);
{
// Document Field
doc.setField("id", "0");
doc.setField(fieldName, text);
}
// Request
DocumentAnalysisRequest request = new DocumentAnalysisRequest();
request.addDocument(doc);
String solrLocation = "http://localhost:8983/solr/" + coreName;
// NLP Client
SolrClient client = new HttpSolrClient.Builder(solrLocation).build();
// NLP Response
NamedList<Object> response = client.request(request);
// Get analysis response
NamedList<Object> analysis = (NamedList<Object>) response
.get("analysis");
SimpleOrderedMap f = ((SimpleOrderedMap) ((SimpleOrderedMap) analysis
.getVal(0)).get(fieldName));
SimpleOrderedMap index = (SimpleOrderedMap) f.get("index");
NamedList nlpResult = (NamedList) index.getVal(0);
System.err.println("Tokenizer,Filter ---");
{
for (int n = 0; n < nlpResult.size(); n++) {
System.err.println(nlpResult.getName(n) + "="
+ nlpResult.getVal(n));
}
}
ArrayList wordListPOS = (ArrayList) nlpResult
.get("org.apache.lucene.analysis.ja.JapaneseTokenizer");
if (wordListPOS != null) {
for (int n = 0; n < wordListPOS.size(); n++) {
SimpleOrderedMap wordPOS = (SimpleOrderedMap) wordListPOS
.get(n);
if (n == 0) {
System.err.println("<names>");
for (int m = 0; m < wordPOS.size(); m++) {
System.err.println(wordPOS.getName(m) + "="
+ wordPOS.getVal(m));
}
System.err.println("</names>");
}
String namePOS = "org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute#partOfSpeech";
String nameREADING = "org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute#reading";
System.err.println( //
"text='" + wordPOS.get("text") + "'" //
+ ",type='" + wordPOS.get("type") + "'" //
+ ",partOfSpeech='" + wordPOS.get(namePOS) + "'" //
+ ",reading='" + wordPOS.get(nameREADING) + "'" //
);
}
}
}
}
<names>
text=Hello
raw_bytes=[e3 81 93 e3 82 93 e3 81 ab e3 81 a1 e3 81 af]
start=0
end=5
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1
type=word
org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute#termFrequency=1
org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute#baseForm=null
org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute#partOfSpeech=Interjection
org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute#partOfSpeech (en)=interjection
org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute#reading=Hello
org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute#reading (en)=konnichiha
org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute#pronunciation=Hello
org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute#pronunciation (en)=konnichiwa
org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute#inflectionType=null
org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute#inflectionType (en)=null
org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute#inflectionForm=null
org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute#inflectionForm (en)=null
position=1
positionHistory=[1]
</names>
text='Hello',type='word',partOfSpeech='Interjection',reading='Hello'
text='today',type='word',partOfSpeech='noun-Adverbs possible',reading='today'
text='Is',type='word',partOfSpeech='Particle-係Particle',reading='C'
text='Good',type='word',partOfSpeech='adjective-Independence',reading='good'
text='weather',type='word',partOfSpeech='noun-General',reading='weather'
text='is',type='word',partOfSpeech='Auxiliary verb',reading='death'
text='Ne',type='word',partOfSpeech='Particle-終Particle',reading='Ne'
text='I',type='word',partOfSpeech='noun-代noun-General',reading='I'
text='Is',type='word',partOfSpeech='Particle-係Particle',reading='C'
text='Nissan',type='word',partOfSpeech='noun-固有noun-Organization',reading='Nissan'
text='Nissan Motor',type='word',partOfSpeech='noun-固有noun-Organization',reading='Nissan Jidosha'
text='Automobile',type='word',partOfSpeech='noun-General',reading='Jidosha'
text='of',type='word',partOfSpeech='Particle-Attributive',reading='No'
text='Employee',type='word',partOfSpeech='noun-General',reading='Shine'
text='is',type='word',partOfSpeech='Auxiliary verb',reading='death'
Recommended Posts