*** chasen-2.3.0/lib/tokenizer.c Tue Jun 18 11:33:40 2002 --- chasen-2.3.0-nmz/lib/tokenizer.c Mon Feb 17 14:08:22 2003 *************** *** 53,59 **** KATAKANA, /* KATAKANA LETTER (SMALL) [A-KE] */ SMALL_KATAKANA, /* KATAKANA LETTER SMALL AIUEO, TU, YAYUYO, WA */ FULL_LATIN, /* FULLWIDTH LATIN (CAPITAL|SMALL) LETTER [A-Z] */ ! HALF_LATIN, /* LATIN (CAPITAL|SMALL) LETTER [A-Z] */ JA_OTHER, }; --- 53,61 ---- KATAKANA, /* KATAKANA LETTER (SMALL) [A-KE] */ SMALL_KATAKANA, /* KATAKANA LETTER SMALL AIUEO, TU, YAYUYO, WA */ FULL_LATIN, /* FULLWIDTH LATIN (CAPITAL|SMALL) LETTER [A-Z] */ ! HALF_LATIN, /* HALFWIDTH LATIN (CAPITAL|SMALL) LETTER [A-Z] */ ! HALF_DIGIT, /* HALFWIDTH DIGIT [0-9] */ ! HALF_PUNCT, /* HALFWIDTH PUNCTUATION */ JA_OTHER, }; *************** *** 337,345 **** { if (state == JA_SPACE) { tok->_anno_type[cursor] = -1; ! } else if ((state == HALF_LATIN) || ! (state == FULL_LATIN)) { ! ; /* do nothing */ } else if (((*state0 == KATAKANA) && ((state == PROLONGED) || (state == SMALL_KATAKANA))) || --- 339,350 ---- { if (state == JA_SPACE) { tok->_anno_type[cursor] = -1; ! } else if (state == HALF_LATIN) { ! ; ! } else if (state == HALF_DIGIT) { ! state = HALF_LATIN; ! } else if (state == HALF_PUNCT) { ! state = HALF_LATIN; } else if (((*state0 == KATAKANA) && ((state == PROLONGED) || (state == SMALL_KATAKANA))) || *************** *** 375,381 **** return HALF_LATIN; } else if (is_space(str[0])) { return JA_SPACE; ! } } else if (mblen == 2) { if ((str[0] == 0xa1) && (str[1] == 0xbc)) { return PROLONGED; --- 380,390 ---- return HALF_LATIN; } else if (is_space(str[0])) { return JA_SPACE; ! } else if (isdigit(str[0])) { ! return HALF_DIGIT; ! } else if (ispunct(str[0])) { ! return HALF_PUNCT; ! } } else if (mblen == 2) { if ((str[0] == 0xa1) && (str[1] == 0xbc)) { return PROLONGED; *************** *** 440,446 **** return HALF_LATIN; } else if (is_space(str[0])) { return JA_SPACE; ! } } else if (mblen == 3) { if ((str[0] == 0xe3) && (str[1] == 0x83) && (str[2] == 0xbc)) { return PROLONGED; --- 449,459 ---- return HALF_LATIN; } else if (is_space(str[0])) { return JA_SPACE; ! } else if (isdigit(str[0])) { ! return HALF_DIGIT; ! } else if (ispunct(str[0])) { ! return HALF_PUNCT; ! } } else if (mblen == 3) { if ((str[0] == 0xe3) && (str[1] == 0x83) && (str[2] == 0xbc)) { return PROLONGED;