0x1949 Team - FAZEMRX - MANAGER
Edit File: Utf8.php
<?php declare(strict_types=1); /** * @package s9e\RegexpBuilder * @copyright Copyright (c) 2016-2022 The s9e authors * @license http://www.opensource.org/licenses/mit-license.php The MIT License */ namespace s9e\RegexpBuilder\Input; use InvalidArgumentException; use function array_map, ord, preg_match_all; class Utf8 extends BaseImplementation { /** * @var bool Whether to use surrogates to represent higher codepoints */ protected $useSurrogates; /** * {@inheritdoc} */ public function __construct(array $options = []) { $this->useSurrogates = !empty($options['useSurrogates']); } /** * {@inheritdoc} */ public function split(string $string): array { if (preg_match_all('(.)us', $string, $matches) === false) { throw new InvalidArgumentException('Invalid UTF-8 string'); } return ($this->useSurrogates) ? $this->charsToCodepointsWithSurrogates($matches[0]) : $this->charsToCodepoints($matches[0]); } /** * Convert a list of UTF-8 characters into a list of Unicode codepoint * * @param string[] $chars * @return integer[] */ protected function charsToCodepoints(array $chars): array { return array_map([$this, 'cp'], $chars); } /** * Convert a list of UTF-8 characters into a list of Unicode codepoint with surrogates * * @param string[] $chars * @return integer[] */ protected function charsToCodepointsWithSurrogates(array $chars): array { $codepoints = []; foreach ($chars as $char) { $cp = $this->cp($char); if ($cp < 0x10000) { $codepoints[] = $cp; } else { $codepoints[] = 0xD7C0 + ($cp >> 10); $codepoints[] = 0xDC00 + ($cp & 0x3FF); } } return $codepoints; } /** * Compute and return the Unicode codepoint for given UTF-8 char * * @param string $char UTF-8 char * @return integer */ protected function cp(string $char): int { $cp = ord($char[0]); if ($cp >= 0xF0) { $cp = ($cp << 18) + (ord($char[1]) << 12) + (ord($char[2]) << 6) + ord($char[3]) - 0x3C82080; } elseif ($cp >= 0xE0) { $cp = ($cp << 12) + (ord($char[1]) << 6) + ord($char[2]) - 0xE2080; } elseif ($cp >= 0xC0) { $cp = ($cp << 6) + ord($char[1]) - 0x3080; } return $cp; } }