浏览代码

Optimize word breaker.

/main
Yuncong Zhang 5 年前
当前提交
6c901c28
共有 2 个文件被更改,包括 19 次插入25 次删除
  1. 12
      Runtime/ui/txt/word_separate.cs
  2. 32
      Runtime/ui/txt/wordbreaker.cs

12
Runtime/ui/txt/word_separate.cs


namespace Unity.UIWidgets.ui {
struct WordSeparate {
internal enum characterType {
internal enum CharacterType {
LetterLike,
Symbol,
WhiteSpace

}
internal static characterType classifyChar(string text, int index) {
internal static CharacterType classifyChar(string text, int index) {
internal static characterType classifyChar(char ch) {
internal static CharacterType classifyChar(char ch) {
return characterType.WhiteSpace;
return CharacterType.WhiteSpace;
return characterType.LetterLike;
return CharacterType.LetterLike;
return characterType.Symbol;
return CharacterType.Symbol;
}
}
}

32
Runtime/ui/txt/wordbreaker.cs


}
char c = this._text.charAt(this._current);
WordSeparate.characterType preType = WordSeparate.classifyChar(c);
bool preBoundaryChar = isBoundaryChar(c);
this._current++;
if (preBoundaryChar) {

this._findBoundaryCharOrTypeChange();
return this._current;
}
void _findBoundaryCharOrTypeChange() {
char c = this._text.charAt(this._current);
bool preType = char.IsWhiteSpace(c);
c = this._text.charAt(this._current);
c = this._text.charAt(this._current);
var currentType = WordSeparate.classifyChar(c);
if ((currentType == WordSeparate.characterType.WhiteSpace)
!= (preType == WordSeparate.characterType.WhiteSpace)) {
bool currentType = char.IsWhiteSpace(c);
if (currentType != preType) {
return this._current;
}
void _detectEmailOrUrl() {

}
public static bool isBoundaryChar(char code) {
if (char.IsPunctuation(code)) {
return true;
}
if (code >= 0x4E00 && code <= 0x9FFF) { // cjk https://en.wikipedia.org/wiki/CJK_Unified_Ideographs
return true;
}
// https://social.msdn.microsoft.com/Forums/en-US/0d1888de-9745-4dd1-80fd-d3c29d3e381d/checking-for-japanese-characters-in-a-string?forum=vcmfcatl
if (code >= 0x3040 && code <= 0x30FF) { // Hiragana or Katakana
return true;
}
return false;
return char.IsPunctuation(code) || code >= 0x4E00 && code <= 0x9FFF || code >= 0x3040 && code <= 0x30FF;
}
void nextUntilCodePoint() {

正在加载...
取消
保存