浏览代码

Merge pull request #120 from fzhangtj/master

fix chinese/japanese word break
/main
GitHub 6 年前
当前提交
4dc82ee5
共有 1 个文件被更改,包括 26 次插入1 次删除
  1. 27
      Runtime/ui/txt/wordbreaker.cs

27
Runtime/ui/txt/wordbreaker.cs


if (this._current == this._size) {
return -1;
}
bool preBoundaryChar = isBoundaryChar(this._text[this._current + this._offset]);
if (preBoundaryChar) {
return this._current;
}
break;
}
if (isBoundaryChar(this._text[this._current + this._offset])) {
break;
}
var currentType = WordSeparate.classifyChar(this._text, this._current + this._offset);

return (char) (((uint) (lead) << 10) + (uint) (trail - U16_SURROGATE_OFFSET));
}
public static bool isBoundaryChar(char code) {
if (char.IsPunctuation(code)) {
return true;
}
if (code >= 0x4E00 && code <= 0x9FFF) { // cjk https://en.wikipedia.org/wiki/CJK_Unified_Ideographs
return true;
}
// https://social.msdn.microsoft.com/Forums/en-US/0d1888de-9745-4dd1-80fd-d3c29d3e381d/checking-for-japanese-characters-in-a-string?forum=vcmfcatl
if (code >= 0x3040 && code <= 0x30FF) { // Hiragana or Katakana
return true;
}
return false;
}
void nextUntilCodePoint() {
while (this._current < this._size
&& (char.IsLowSurrogate(this._text[this._current + this._offset])

正在加载...
取消
保存