package com.mei.nlp;
import java.io.IOException;
import java.util.List;
import com.google.common.base.Optional;
import com.optimaize.langdetect.LanguageDetector;
import com.optimaize.langdetect.LanguageDetectorBuilder;
import com.optimaize.langdetect.i18n.LdLocale;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.LanguageProfile;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
import com.optimaize.langdetect.text.CommonTextObjectFactories;
import com.optimaize.langdetect.text.TextObject;
import com.optimaize.langdetect.text.TextObjectFactory;
public class LanguageDetectionTest
{
public static void main(String[] args) throws IOException
{
//加载所有内置语种
List languageProfiles = new LanguageProfileReader().readAllBuiltIn();
// 创建识别器
LanguageDetector languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()).withProfiles(languageProfiles).build();
// 创建文本对象工厂
TextObjectFactory textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
// 识别
TextObject textObject = textObjectFactory.forText(“[喵萌奶茶屋][繁體][480P][PoRO]転生剣奴の子作り闘技場(ハーレムコロッセオ) ヘタレ爆乳皇女・マルシュタール~お漏らし鎧の折檻”);
Optional lang = languageDetector.detect(textObject);
if (!lang.isPresent())
{
System.out.println(“语种识别失败,可能文本太短或混合了多国语言”);
return;
}
System.out.println(lang.get().toString());
}
}