﻿using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;

namespace SlothLib.NLP
{
    /// <summary>
    /// TreeTaggerの結果クラス
    /// </summary>
    public class TreeTaggerResult : IMorphologicalAnalyzerResult
    {
        /// <summary>
        /// 出力の一行にマッチする正規表現
        /// </summary>
        private static Regex regexResultLine = new Regex(@"^(.*)\t(.*)\t(.*)$", RegexOptions.Compiled | RegexOptions.Singleline);

        private List<Morpheme> morphemeList;

        /// <summary>
        /// 生の解析結果
        /// </summary>
        private string rawResult;

        //GetOriginalArray()等を実装するためのフィルタ
        private static RemainOriginalFilter remainOriginalFilter = new RemainOriginalFilter();
        private static RemainPosFilter remainPosFilter = new RemainPosFilter();
        private static RemainRawFilter remainRawFilter = new RemainRawFilter();
        
        /// <summary>
        /// コンストラクタ
        /// </summary>
        /// <param name="rawResult">生の結果</param>
        public TreeTaggerResult(string rawResult)
        {
            this.rawResult = rawResult;
            this.morphemeList = new List<Morpheme>();
            string[] lines = rawResult.Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);
            foreach (string line in lines)
            {
                Match match = regexResultLine.Match(line);
                if (match.Success)
                {
                    string raw = match.Groups[1].Value;
                    string pos = match.Groups[2].Value;
                    string original = match.Groups[3].Value;
                    this.morphemeList.Add(new Morpheme(pos, raw, original));
                }
                else
                {
                    System.Diagnostics.Debug.WriteLine("failed parsing line from TreeTagger");
                }
            }
        }

        /// <summary>
        /// インデクサ
        /// </summary>
        /// <param name="index">形態素の番号</param>
        /// <returns>index番目の形態素</returns>
        public Morpheme this[int index]
        {
            get { return this.morphemeList[index]; }
        }

        /// <summary>
        /// 格納する形態素の配列
        /// </summary>
        public Morpheme[] Morphemes
        {
            get { return this.morphemeList.ToArray(); }
        }


        #region IMorphologicalAnalyzerResult メンバ

        /// <summary>
        /// 各形態素のOriginalを配列にして返す
        /// </summary>
        /// <returns>Originalの配列</returns>
        public string[] GetOriginalArray()
        {
            return remainOriginalFilter.DoFilter(this.Morphemes);
        }

        /// <summary>
        /// 各形態素のPOSを配列にして返す
        /// </summary>
        /// <returns>POSの配列</returns>
        public string[] GetPOSArray()
        {
            return remainPosFilter.DoFilter(this.Morphemes);
        }

        /// <summary>
        /// 各形態素のRawを配列にして返す
        /// </summary>
        /// <returns>Rawの配列</returns>
        public string[] GetRawArray()
        {
            return remainRawFilter.DoFilter(this.Morphemes);
        }

        IMorpheme[] IMorphologicalAnalyzerResult.Morphemes
        {
            get { return this.morphemeList.ToArray(); }
        }

        #endregion
    }
}
