package slothLib.NLP;

import java.util.regex.Pattern;
/// <summary>
/// 指定した品詞のみを取り出すフィルタ
/// </summary>
public class PosFilter extends AbstractMorphemeFilter
{
	// 品詞切り出し用
	private Pattern regexInclude = null;
	private Pattern regexExclude = null;
	
	
	/// <summary>
	/// コンストラクタ
	/// includePosにマッチして，かつexcludePosにはマッチしない品詞を取り出す
	/// </summary>
	/// <param name="includePos">抽出対象にする文字列（「名詞」など）の配列</param>
	/// <param name="excludePos">抽出対象外の文字列（「名詞-固有名詞」など）の配列</param>
	public PosFilter(String[] includePos, String[] excludePos)
	{
		String includePosRegex = null;
		if(includePos != null)
		{
			StringBuffer sbInclude = new StringBuffer();
			for (int i = 0; i < includePos.length; i++)
			{
				sbInclude.append(includePos[i]);
				sbInclude.append("|");
			}
			sbInclude.deleteCharAt(sbInclude.length() - 1);
			includePosRegex = sbInclude.toString();
		}
		
		String excludePosRegex = null;
		if (excludePos != null)
		{
			StringBuffer sbExclude = new StringBuffer();
			for (int i = 0; i < excludePos.length; i++)
			{
				sbExclude.append(excludePos[i]);
				sbExclude.append("|");
			}
			sbExclude.deleteCharAt(sbExclude.length() - 1);
			excludePosRegex = sbExclude.toString();
		}
		
		this.Initialize(includePosRegex, excludePosRegex);
	}
	
	/// <summary>
	/// コンストラクタ
	/// includePosにマッチする品詞を取り出す
	/// </summary>
	/// <param name="includePos">抽出対象にする文字列（「名詞」など）のリスト</param>
	public PosFilter(String[] includePos)
	{ 
		this(includePos, null);
	}
	
	/// <summary>
	/// コンストラクタ
	/// includePosRegexの正規表現にマッチして、excludePosRegexにマッチしない品詞を取り出す
	/// </summary>
	/// <param name="includePosRegex">抽出対象にする正規表現（「(名詞|動詞)」など）</param>
	/// <param name="excludePosRegex">抽出対象外にする正規表現（「名詞-(代名詞|固有名詞)」など）</param>
	public PosFilter(String includePosRegex, String excludePosRegex)
	{
		this.Initialize(includePosRegex, excludePosRegex);
	}
	
	/// <summary>
	/// コンストラクタ
	/// includePosRegexの正規表現にマッチする品詞を取り出す
	/// </summary>
	/// <param name="includePosRegex">抽出対象にする正規表現（「(名詞|動詞)」など）</param>
	public PosFilter(String includePosRegex)
	{
		this(includePosRegex, null);
	}
	
	private void Initialize(String includePosRegex, String excludePosRegex)
	{
		if (includePosRegex != null && includePosRegex.length() > 0) 
		{
			this.regexInclude = Pattern.compile("^(" + includePosRegex + ").*?$");
		}
		if (excludePosRegex != null && excludePosRegex.length() > 0) 
		{
			this.regexExclude = Pattern.compile("^(" + excludePosRegex + ").*?$");
		}
	}
	
	
	/// <summary>
	/// 指定した品詞のみを取り出す
	/// </summary>
	/// <param name="morpheme">フィルタ適用対象の形態素</param>
	/// <returns>フィルタ適用後の形態素</returns>
	public IMorpheme doFilter(IMorpheme morpheme)
	{
		String pos = morpheme.getPOS();
		
		if (pos == null || pos.length() == 0)
		{
			return null;
		}
		
		// include
		if (this.regexInclude == null || this.regexInclude.matcher(pos).matches())
		{
			// exclude
			if (this.regexExclude == null || !this.regexExclude.matcher(pos).matches())
			{
				return morpheme;
			}
		}
		return null;
	}
	
}
