Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| dir = os.path.dirname(__file__) | |
| EMOJI_DATA_PATH = os.path.join(dir, "emojis_tr_twitter.json") | |
| with open(EMOJI_DATA_PATH, "r") as f: | |
| emojis = json.load(f) | |
| _SEARCH_TREE = None | |
| def _get_search_tree(): | |
| global _SEARCH_TREE | |
| if _SEARCH_TREE is None: | |
| _SEARCH_TREE = {} | |
| for emj in emojis: | |
| sub_tree = _SEARCH_TREE | |
| lastidx = len(emj) - 1 | |
| for i, char in enumerate(emj): | |
| if char not in sub_tree: | |
| sub_tree[char] = {} | |
| sub_tree = sub_tree[char] | |
| if i == lastidx: | |
| sub_tree["data"] = emojis[emj] | |
| return _SEARCH_TREE | |
| def demojize( | |
| string, | |
| delimiters=("<emoji> ", " </emoji>"), | |
| language="tr", | |
| version=None, | |
| handle_version=None, | |
| ): | |
| if language == "alias": | |
| language = "tr" | |
| _use_aliases = True | |
| else: | |
| _use_aliases = False | |
| tree = _get_search_tree() | |
| result = [] | |
| i = 0 | |
| length = len(string) | |
| while i < length: | |
| consumed = False | |
| char = string[i] | |
| if char in tree: | |
| j = i + 1 | |
| sub_tree = tree[char] | |
| while j < length and string[j] in sub_tree: | |
| sub_tree = sub_tree[string[j]] | |
| j += 1 | |
| if "data" in sub_tree: | |
| emj_data = sub_tree["data"] | |
| code_points = string[i:j] | |
| replace_str = None | |
| if version is not None and emj_data["E"] > version: | |
| if callable(handle_version): | |
| emj_data = emj_data.copy() | |
| emj_data["match_start"] = i | |
| emj_data["match_end"] = j | |
| replace_str = handle_version(code_points, emj_data) | |
| elif handle_version is not None: | |
| replace_str = str(handle_version) | |
| else: | |
| replace_str = None | |
| elif language in emj_data: | |
| if _use_aliases and "alias" in emj_data: | |
| replace_str = ( | |
| delimiters[0] + emj_data["alias"][0][:-1] + delimiters[1] | |
| ) | |
| else: | |
| replace_str = ( | |
| delimiters[0] + emj_data[language][1:-1] + delimiters[1] | |
| ) | |
| else: | |
| # The emoji exists, but it is not translated, so we keep the emoji | |
| replace_str = code_points | |
| i = j - 1 | |
| consumed = True | |
| if replace_str: | |
| result.append(replace_str) | |
| if not consumed and char != "\ufe0e" and char != "\ufe0f": | |
| result.append(char) | |
| i += 1 | |
| return "".join(result) | |