PanGu4Lucene 是Lucene.net + Pangu 的应用示例
IndexWriter writer = new IndexWriter(indexDir, new PanGuAnalyzer(), true);
writer.Optimize();
writer.Close();
public static int IndexString(String indexDir, string url, string title, DateTime time, string content)
{
//IndexWriter writer = new IndexWriter(indexDir, new Lucene.Net.Analysis.KTDictSeg.KTDictSegAnalyzer(), false);
Document doc = new Document();
Field field = new Field("url", url, Field.Store.YES, Field.Index.NO);
doc.Add(field);
field = new Field("title", title, Field.Store.YES, Field.Index.TOKENIZED);
doc.Add(field);
field = new Field("time", time.ToString("yyyyMMdd"), Field.Store.YES, Field.Index.UN_TOKENIZED);
doc.Add(field);
field = new Field("contents", content, Field.Store.YES, Field.Index.TOKENIZED);
doc.Add(field);
writer.AddDocument(doc);
int num = writer.DocCount();
//writer.Optimize();
//writer.Close();
return num;
}
static public string GetKeyWordsSplitBySpace(string keywords, PanGuTokenizer ktTokenizer)
{
StringBuilder result = new StringBuilder();
ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords);
foreach (WordInfo word in words)
{
if (word == null)
{
continue;
}
result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank));
}
return result.ToString().Trim();
}
public static List<TNews> Search(String indexDir, String q, int pageLen, int pageNo, out int recCount)
{
string keywords = q;
IndexSearcher search = new IndexSearcher(indexDir);
q = GetKeyWordsSplitBySpace(q, new PanGuTokenizer());
QueryParser queryParser = new QueryParser("contents", new PanGuAnalyzer(true));
Query query = queryParser.Parse(q);
QueryParser titleQueryParser = new QueryParser("title", new PanGuAnalyzer(true));
Query titleQuery = titleQueryParser.Parse(q);
BooleanQuery bq = new BooleanQuery();
bq.Add(query, BooleanClause.Occur.SHOULD);
bq.Add(titleQuery, BooleanClause.Occur.SHOULD);
Hits hits = search.Search(bq);
List<TNews> result = new List<TNews>();
recCount = hits.Length();
int i = (pageNo - 1) * pageLen;
while (i < recCount && result.Count < pageLen)
{
TNews news = null;
try
{
news = new TNews();
news.Title = hits.Doc(i).Get("title");
news.Content = hits.Doc(i).Get("contents");
news.Url = hits.Doc(i).Get("url");
String strTime = hits.Doc(i).Get("time");
news.Time = DateTime.ParseExact(strTime, "yyyyMMdd", null);
PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter =
new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"red\">", "</font>");
PanGu.HighLight.Highlighter highlighter =
new PanGu.HighLight.Highlighter(simpleHTMLFormatter,
new Segment());
highlighter.FragmentSize = 50;
news.Abstract = highlighter.GetBestFragment(keywords, news.Content);
news.TitleHighLighter = highlighter.GetBestFragment(keywords, news.Title);
if (string.IsNullOrEmpty(news.TitleHighLighter))
{
news.TitleHighLighter = news.Title;
}
//// 高亮显示设置
////TermQuery tQuery = new TermQuery(new Term("contents", q));
//SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
//Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
////关键内容显示大小设置
//highlighter.SetTextFragmenter(new SimpleFragmenter(50));
////取出高亮显示内容
//Lucene.Net.Analysis.KTDictSeg.KTDictSegAnalyzer analyzer = new Lucene.Net.Analysis.KTDictSeg.KTDictSegAnalyzer();
//TokenStream tokenStream = analyzer.TokenStream("contents", new StringReader(news.Content));
//news.Abstract = highlighter.GetBestFragment(tokenStream, news.Content);
}
catch (Exception e)
{
Console.WriteLine(e.Message);
}
finally
{
result.Add(news);
i++;
}
}
search.Close();
return result;
}
}
1. 下载News.xml 下载地址
http://pangusegment.codeplex.com/Release/ProjectReleases.aspx?ReleaseId=31482
2. 进入目录Bin,并运行PanGu.Lucene.ImportTool.exe 点击创建索引按钮,并导入
news.xml
3. 运行网站