configFactory = $config_factory; $this->aliasStorageHelper = $alias_storage_helper; $this->languageManager = $language_manager; $this->cacheBackend = $cache_backend; $this->transliteration = $transliteration; $this->moduleHandler = $module_handler; } /** * {@inheritdoc} */ public function cleanAlias($alias) { $config = $this->configFactory->get('pathauto.settings'); $alias_max_length = min($config->get('max_length'), $this->aliasStorageHelper->getAliasSchemaMaxLength()); $output = $alias; // Trim duplicate, leading, and trailing separators. Do this before cleaning // backslashes since a pattern like "[token1]/[token2]-[token3]/[token4]" // could end up like "value1/-/value2" and if backslashes were cleaned first // this would result in a duplicate backslash. $output = $this->getCleanSeparators($output); // Trim duplicate, leading, and trailing backslashes. $output = $this->getCleanSeparators($output, '/'); // Shorten to a logical place based on word boundaries. $output = Unicode::truncate($output, $alias_max_length, TRUE); return $output; } /** * {@inheritdoc} */ public function getCleanSeparators($string, $separator = NULL) { $config = $this->configFactory->get('pathauto.settings'); if (!isset($separator)) { $separator = $config->get('separator'); } $output = $string; if (strlen($separator)) { // Trim any leading or trailing separators. $output = trim($output, $separator); // Escape the separator for use in regular expressions. $seppattern = preg_quote($separator, '/'); // Replace multiple separators with a single one. $output = preg_replace("/$seppattern+/", $separator, $output); // Replace trailing separators around slashes. if ($separator !== '/') { $output = preg_replace("/\/+$seppattern\/+|$seppattern\/+|\/+$seppattern/", "/", $output); } else { // If the separator is a slash, we need to re-add the leading slash // dropped by the trim function. $output = '/' . $output; } } return $output; } /** * {@inheritdoc} */ public function cleanString($string, array $options = []) { if (empty($this->cleanStringCache)) { // Generate and cache variables used in this method. $config = $this->configFactory->get('pathauto.settings'); $this->cleanStringCache = [ 'separator' => $config->get('separator'), 'strings' => [], 'transliterate' => $config->get('transliterate'), 'punctuation' => [], 'reduce_ascii' => (bool) $config->get('reduce_ascii'), 'ignore_words_regex' => FALSE, 'lowercase' => (bool) $config->get('case'), 'maxlength' => min($config->get('max_component_length'), $this->aliasStorageHelper->getAliasSchemaMaxLength()), ]; // Generate and cache the punctuation replacements for strtr(). $punctuation = $this->getPunctuationCharacters(); foreach ($punctuation as $name => $details) { $action = $config->get('punctuation.' . $name); switch ($action) { case PathautoGeneratorInterface::PUNCTUATION_REMOVE: $this->cleanStringCache['punctuation'][$details['value']] = ''; break; case PathautoGeneratorInterface::PUNCTUATION_REPLACE: $this->cleanStringCache['punctuation'][$details['value']] = $this->cleanStringCache['separator']; break; case PathautoGeneratorInterface::PUNCTUATION_DO_NOTHING: // Literally do nothing. break; } } // Generate and cache the ignored words regular expression. $ignore_words = $config->get('ignore_words'); $ignore_words_regex = preg_replace(['/^[,\s]+|[,\s]+$/', '/[,\s]+/'], ['', '\b|\b'], $ignore_words); if ($ignore_words_regex) { $this->cleanStringCache['ignore_words_regex'] = '\b' . $ignore_words_regex . '\b'; if (function_exists('mb_eregi_replace')) { mb_regex_encoding('UTF-8'); $this->cleanStringCache['ignore_words_callback'] = 'mb_eregi_replace'; } else { $this->cleanStringCache['ignore_words_callback'] = 'preg_replace'; $this->cleanStringCache['ignore_words_regex'] = '/' . $this->cleanStringCache['ignore_words_regex'] . '/i'; } } } // Empty strings do not need any processing. if ($string === '' || $string === NULL) { return ''; } $langcode = 'en'; if (!empty($options['language'])) { $langcode = $options['language']->getId(); } elseif (!empty($options['langcode'])) { $langcode = $options['langcode']; } // Check if the string has already been processed, and if so return the // cached result. if (isset($this->cleanStringCache['strings'][$langcode][(string) $string])) { return $this->cleanStringCache['strings'][$langcode][(string) $string]; } // Remove all HTML tags from the string. $output = Html::decodeEntities($string); $output = PlainTextOutput::renderFromHtml($output); // Replace or drop punctuation based on user settings. $output = strtr($output, $this->cleanStringCache['punctuation']); // Optionally transliterate. if ($this->cleanStringCache['transliterate']) { // If the reduce strings to letters and numbers is enabled, don't bother // replacing unknown characters with a question mark. Use an empty string // instead. $output = $this->transliteration->transliterate($output, $langcode, $this->cleanStringCache['reduce_ascii'] ? '' : '?'); // Replace or drop punctuation again as the transliteration process can // convert special characters to punctuation. $output = strtr($output, $this->cleanStringCache['punctuation']); } // Reduce strings to letters and numbers. if ($this->cleanStringCache['reduce_ascii']) { $output = preg_replace('/[^a-zA-Z0-9\/]+/', $this->cleanStringCache['separator'], $output); } // Get rid of words that are on the ignore list. if ($this->cleanStringCache['ignore_words_regex']) { $words_removed = $this->cleanStringCache['ignore_words_callback']($this->cleanStringCache['ignore_words_regex'], '', $output); if (mb_strlen(trim($words_removed)) > 0) { $output = $words_removed; } } // Always replace whitespace with the separator. $output = preg_replace('/\s+/', $this->cleanStringCache['separator'], $output); // Trim duplicates and remove trailing and leading separators. $output = $this->getCleanSeparators($this->getCleanSeparators($output, $this->cleanStringCache['separator'])); // Optionally convert to lower case. if ($this->cleanStringCache['lowercase']) { $output = mb_strtolower($output); } // Shorten to a logical place based on word boundaries. $output = Unicode::truncate($output, $this->cleanStringCache['maxlength'], TRUE); // Cache this result in the static array. $this->cleanStringCache['strings'][$langcode][(string) $string] = $output; return $output; } /** * {@inheritdoc} */ public function getPunctuationCharacters() { if (empty($this->punctuationCharacters)) { $langcode = $this->languageManager->getCurrentLanguage()->getId(); $cid = 'pathauto:punctuation:' . $langcode; if ($cache = $this->cacheBackend->get($cid)) { $this->punctuationCharacters = $cache->data; } else { $punctuation = []; $punctuation['double_quotes'] = ['value' => '"', 'name' => $this->t('Double quotation marks')]; $punctuation['quotes'] = ['value' => '\'', 'name' => $this->t("Single quotation marks (apostrophe)")]; $punctuation['backtick'] = ['value' => '`', 'name' => $this->t('Back tick')]; $punctuation['comma'] = ['value' => ',', 'name' => $this->t('Comma')]; $punctuation['period'] = ['value' => '.', 'name' => $this->t('Period')]; $punctuation['hyphen'] = ['value' => '-', 'name' => $this->t('Hyphen')]; $punctuation['underscore'] = ['value' => '_', 'name' => $this->t('Underscore')]; $punctuation['colon'] = ['value' => ':', 'name' => $this->t('Colon')]; $punctuation['semicolon'] = ['value' => ';', 'name' => $this->t('Semicolon')]; $punctuation['pipe'] = ['value' => '|', 'name' => $this->t('Vertical bar (pipe)')]; $punctuation['left_curly'] = ['value' => '{', 'name' => $this->t('Left curly bracket')]; $punctuation['left_square'] = ['value' => '[', 'name' => $this->t('Left square bracket')]; $punctuation['right_curly'] = ['value' => '}', 'name' => $this->t('Right curly bracket')]; $punctuation['right_square'] = ['value' => ']', 'name' => $this->t('Right square bracket')]; $punctuation['plus'] = ['value' => '+', 'name' => $this->t('Plus sign')]; $punctuation['equal'] = ['value' => '=', 'name' => $this->t('Equal sign')]; $punctuation['asterisk'] = ['value' => '*', 'name' => $this->t('Asterisk')]; $punctuation['ampersand'] = ['value' => '&', 'name' => $this->t('Ampersand')]; $punctuation['percent'] = ['value' => '%', 'name' => $this->t('Percent sign')]; $punctuation['caret'] = ['value' => '^', 'name' => $this->t('Caret')]; $punctuation['dollar'] = ['value' => '$', 'name' => $this->t('Dollar sign')]; $punctuation['hash'] = ['value' => '#', 'name' => $this->t('Number sign (pound sign, hash)')]; $punctuation['at'] = ['value' => '@', 'name' => $this->t('At sign')]; $punctuation['exclamation'] = ['value' => '!', 'name' => $this->t('Exclamation mark')]; $punctuation['tilde'] = ['value' => '~', 'name' => $this->t('Tilde')]; $punctuation['left_parenthesis'] = ['value' => '(', 'name' => $this->t('Left parenthesis')]; $punctuation['right_parenthesis'] = ['value' => ')', 'name' => $this->t('Right parenthesis')]; $punctuation['question_mark'] = ['value' => '?', 'name' => $this->t('Question mark')]; $punctuation['less_than'] = ['value' => '<', 'name' => $this->t('Less-than sign')]; $punctuation['greater_than'] = ['value' => '>', 'name' => $this->t('Greater-than sign')]; $punctuation['slash'] = ['value' => '/', 'name' => $this->t('Slash')]; $punctuation['back_slash'] = ['value' => '\\', 'name' => $this->t('Backslash')]; // Allow modules to alter the punctuation list and cache the result. $this->moduleHandler->alter('pathauto_punctuation_chars', $punctuation); $this->cacheBackend->set($cid, $punctuation); $this->punctuationCharacters = $punctuation; } } return $this->punctuationCharacters; } /** * {@inheritdoc} */ public function cleanTokenValues(&$replacements, $data = [], $options = []) { foreach ($replacements as $token => $value) { // Only clean non-path tokens. $config = $this->configFactory->get('pathauto.settings'); $safe_tokens = implode('|', (array) $config->get('safe_tokens')); if (!preg_match('/(\[|\:)(' . $safe_tokens . ')(:|\]$)/', $token)) { $replacements[$token] = $this->cleanString($value, $options); } } } /** * {@inheritdoc} */ public function resetCaches() { $this->cleanStringCache = []; } }