Lucence.Net学习+盘古分词
生活随笔
收集整理的這篇文章主要介紹了
Lucence.Net学习+盘古分词
小編覺得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
? ? ? ? 創(chuàng)建索引庫(kù)
?
//讀取文件,存儲(chǔ)到索引庫(kù)?
?
public string CreateDatebase() { //獲取索引庫(kù)的路徑 var indexPath = AppDomain.CurrentDomain.BaseDirectory + ConfigurationManager.AppSettings["IndexDateBase"]; FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); var searcher = new IndexSearcher(reader); //搜索條件 var query = new PhraseQuery(); foreach (string word in GetPanGuAnalyzer(str))//先用空格,讓用戶去分詞,空格分隔的就是詞“計(jì)算機(jī) ? 專業(yè)” { query.Add(new Term("body", word)); } //組成詞的字之間可以允許的距離 query.SetSlop(5); TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector);//根據(jù)query查詢條件進(jìn)行查詢,查詢結(jié)果放入collector容器 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;//得到所有查詢結(jié)果中的文檔,GetTotalHits():表示總條數(shù) ? TopDocs(300, 20);//表示得到300(從300開始),到320(結(jié)束)的文檔內(nèi)容. var ?list = new List<string>(); for (int i = 0; i < docs.Length; i++) { int docId = docs[i].doc;//得到查詢結(jié)果文檔的id(Lucene內(nèi)部分配的id) Document doc = searcher.Doc(docId);//找到文檔id對(duì)應(yīng)的文檔詳細(xì)信息 var _str = doc.Get("body"); list.Add(_str); } ViewBag.List = list; return View();? ? ?}
?
? ? ? ?進(jìn)行搜索
public ActionResult List(string ?str) { //獲取索引庫(kù)的路徑 var indexPath = AppDomain.CurrentDomain.BaseDirectory + ConfigurationManager.AppSettings["IndexDateBase"]; FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); var searcher = new IndexSearcher(reader); //搜索條件 var query = new PhraseQuery(); foreach (string word in GetPanGuAnalyzer(str))//先用空格,讓用戶去分詞,空格分隔的就是詞“計(jì)算機(jī) ? 專業(yè)” { query.Add(new Term("body", word)); } //組成詞的字之間可以允許的距離 query.SetSlop(5); TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector);//根據(jù)query查詢條件進(jìn)行查詢,查詢結(jié)果放入collector容器 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;//得到所有查詢結(jié)果中的文檔,GetTotalHits():表示總條數(shù) ? TopDocs(300, 20);//表示得到300(從300開始),到320(結(jié)束)的文檔內(nèi)容. var ?list = new List<string>(); for (int i = 0; i < docs.Length; i++) { int docId = docs[i].doc;//得到查詢結(jié)果文檔的id(Lucene內(nèi)部分配的id) Document doc = searcher.Doc(docId);//找到文檔id對(duì)應(yīng)的文檔詳細(xì)信息 var _str = doc.Get("body"); list.Add(_str); } ViewBag.List = list; return View(); }?
/// 對(duì)傳遞過(guò)來(lái)的字符串進(jìn)行盤古分詞 /// </summary> /// <returns></returns> public static List<string> GetPanGuAnalyzer(string txt) { List<string> list = new List<string>(); Analyzer analyzer = new PanGuAnalyzer(); TokenStream tokenStream = analyzer.TokenStream("", new StringReader(txt)); Lucene.Net.Analysis.Token token = null; while ((token = tokenStream.Next()) != null) { list.Add(token.TermText()); } return list; }?
IndexReader?刪除數(shù)據(jù)
//獲取字典 var directory = LucenceHelp.GetDirectory(); //先刪除數(shù)據(jù) IndexReader reader = IndexReader.Open(directory); reader.DeleteDocuments(new Term("number", "0")); reader.IsOptimized(); reader.Close();?
?IndexWriter 刪除數(shù)據(jù)
?
//獲取字典 var directory = LucenceHelp.GetDirectory(); var writer = new IndexWriter(directory, new PanGuAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); //刪除的是與要匹配的關(guān)鍵字相關(guān)的文件 var term = new Term("body", name); //刪除的是文件名為13的文件 //var term = new Term("number", "13"); writer.DeleteDocuments(term); writer.Optimize(); writer.Close();?
?修改索引
Analyzer analyzer = new PanGuAnalyzer(); //獲取字典 var directory = LucenceHelp.GetDirectory(); //先刪除數(shù)據(jù) IndexReader reader = IndexReader.Open(directory); reader.DeleteDocuments(new Term("number", "0")); reader.Close(); //再添加數(shù)據(jù) IndexWriter writer = new IndexWriter(directory, analyzer, false, IndexWriter.MaxFieldLength.LIMITED); Document document = new Document(); document.Add(new Field("number", "0", Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field("body", "如家快捷酒店-0", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(document); writer.Close(); ??
?
要添加的文件
?
?
?
轉(zhuǎn)載于:https://www.cnblogs.com/xiaoyaodijun/p/4138189.html
總結(jié)
以上是生活随笔為你收集整理的Lucence.Net学习+盘古分词的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。
- 上一篇: unicode ascii
- 下一篇: myeclipse 注册码生成代码(6.