0x1949 Team - FAZEMRX - MANAGER
Edit File: SearchAPI-Fulltext.php
<?php /** * Simple Machines Forum (SMF) * * @package SMF * @author Simple Machines https://www.simplemachines.org * @copyright 2023 Simple Machines and individual contributors * @license https://www.simplemachines.org/about/smf/license.php BSD * * @version 2.1.4 */ if (!defined('SMF')) die('No direct access...'); /** * Class fulltext_search * Used for fulltext index searching */ class fulltext_search extends search_api { /** * @var array Which words are banned */ protected $bannedWords = array(); /** * @var int The minimum word length */ protected $min_word_length = 4; /** * @var array Which databases support this method? */ protected $supported_databases = array('mysql', 'postgresql'); /** * The constructor function */ public function __construct() { global $modSettings, $db_type; // Is this database supported? if (!in_array($db_type, $this->supported_databases)) { $this->is_supported = false; return; } $this->bannedWords = empty($modSettings['search_banned_words']) ? array() : explode(',', $modSettings['search_banned_words']); $this->min_word_length = $this->_getMinWordLength(); } /** * {@inheritDoc} */ public function supportsMethod($methodName, $query_params = null) { $return = false; switch ($methodName) { case 'searchSort': case 'prepareIndexes': case 'indexedWordQuery': $return = true; break; // All other methods, too bad dunno you. default: $return = false; break; } // Maybe parent got support if (!$return) $return = parent::supportsMethod($methodName, $query_params); return $return; } /** * fulltext_search::_getMinWordLength() * * What is the minimum word length full text supports? * * @return int The minimum word length */ protected function _getMinWordLength() { global $smcFunc, $db_type; if ($db_type == 'postgresql') return 0; // Try to determine the minimum number of letters for a fulltext search. $request = $smcFunc['db_search_query']('max_fulltext_length', ' SHOW VARIABLES LIKE {string:fulltext_minimum_word_length}', array( 'fulltext_minimum_word_length' => 'ft_min_word_len', ) ); if ($request !== false && $smcFunc['db_num_rows']($request) == 1) { list (, $min_word_length) = $smcFunc['db_fetch_row']($request); $smcFunc['db_free_result']($request); } // 4 is the MySQL default... else $min_word_length = 4; return $min_word_length; } /** * {@inheritDoc} */ public function searchSort($a, $b) { global $excludedWords, $smcFunc; $x = $smcFunc['strlen']($a) - (in_array($a, $excludedWords) ? 1000 : 0); $y = $smcFunc['strlen']($b) - (in_array($b, $excludedWords) ? 1000 : 0); return $x < $y ? 1 : ($x > $y ? -1 : 0); } /** * {@inheritDoc} */ public function prepareIndexes($word, array &$wordsSearch, array &$wordsExclude, $isExcluded) { global $modSettings, $smcFunc; $subwords = text2words($word, null, false); if (empty($modSettings['search_force_index'])) { // A boolean capable search engine and not forced to only use an index, we may use a non indexed search // this is harder on the server so we are restrictive here if (count($subwords) > 1 && preg_match('~[.:@$]~', $word)) { // using special characters that a full index would ignore and the remaining words are short which would also be ignored if (($smcFunc['strlen'](current($subwords)) < $this->min_word_length) && ($smcFunc['strlen'](next($subwords)) < $this->min_word_length)) { $wordsSearch['words'][] = trim($word, "/*- "); $wordsSearch['complex_words'][] = count($subwords) === 1 ? $word : '"' . $word . '"'; } } elseif ($smcFunc['strlen'](trim($word, "/*- ")) < $this->min_word_length) { // short words have feelings too $wordsSearch['words'][] = trim($word, "/*- "); $wordsSearch['complex_words'][] = count($subwords) === 1 ? $word : '"' . $word . '"'; } } $fulltextWord = count($subwords) === 1 ? $word : '"' . $word . '"'; $wordsSearch['indexed_words'][] = $fulltextWord; if ($isExcluded) $wordsExclude[] = $fulltextWord; } /** * {@inheritDoc} */ public function indexedWordQuery(array $words, array $search_data) { global $modSettings, $smcFunc; // Specify the function to search with. Regex is for word boundaries. $is_search_regex = !empty($modSettings['search_match_words']) && !$search_data['no_regexp']; $query_match_type = $is_search_regex ? 'RLIKE' : 'LIKE'; $word_boundary_wrapper = function(string $str) use ($smcFunc): string { return sprintf($smcFunc['db_supports_pcre'] ? '\\b%s\\b' : '[[:<:]]%s[[:>:]]', $str); }; $escape_sql_regex = function(string $str): string { return addcslashes(preg_replace('/[\[\]$.+*?&^|{}()]/', '[$0]', $str), '\\\''); }; $query_select = array( 'id_msg' => 'm.id_msg', ); $query_where = array(); $query_params = $search_data['params']; if ($smcFunc['db_title'] === POSTGRE_TITLE) $modSettings['search_simple_fulltext'] = true; if ($query_params['id_search']) $query_select['id_search'] = '{int:id_search}'; $count = 0; if (empty($modSettings['search_simple_fulltext'])) foreach ($words['words'] as $regularWord) { if (in_array($regularWord, $query_params['excluded_words'])) $query_where[] = 'm.body NOT ' . $query_match_type . ' {string:complex_body_' . $count . '}'; else $query_where[] = 'm.body ' . $query_match_type . ' {string:complex_body_' . $count . '}'; if ($is_search_regex) $query_params['complex_body_' . $count++] = $word_boundary_wrapper($escape_sql_regex($regularWord)); else $query_params['complex_body_' . $count++] = '%' . $smcFunc['db_escape_wildcard_string']($regularWord) . '%'; } if ($query_params['user_query']) $query_where[] = '{raw:user_query}'; if ($query_params['board_query']) $query_where[] = 'm.id_board {raw:board_query}'; if ($query_params['topic']) $query_where[] = 'm.id_topic = {int:topic}'; if ($query_params['min_msg_id']) $query_where[] = 'm.id_msg >= {int:min_msg_id}'; if ($query_params['max_msg_id']) $query_where[] = 'm.id_msg <= {int:max_msg_id}'; $count = 0; if (!empty($query_params['excluded_phrases']) && empty($modSettings['search_force_index'])) foreach ($query_params['excluded_phrases'] as $phrase) { $query_where[] = 'subject NOT ' . $query_match_type . ' {string:exclude_subject_phrase_' . $count . '}'; if ($is_search_regex) $query_params['exclude_subject_phrase_' . $count++] = $word_boundary_wrapper($escape_sql_regex($phrase)); else $query_params['exclude_subject_phrase_' . $count++] = '%' . $smcFunc['db_escape_wildcard_string']($phrase) . '%'; } $count = 0; if (!empty($query_params['excluded_subject_words']) && empty($modSettings['search_force_index'])) foreach ($query_params['excluded_subject_words'] as $excludedWord) { $query_where[] = 'subject NOT ' . $query_match_type . ' {string:exclude_subject_words_' . $count . '}'; if ($is_search_regex) $query_params['exclude_subject_words_' . $count++] = $word_boundary_wrapper($escape_sql_regex($excludedWord)); else $query_params['exclude_subject_words_' . $count++] = '%' . $smcFunc['db_escape_wildcard_string']($excludedWord) . '%'; } if (!empty($modSettings['search_simple_fulltext'])) { if ($smcFunc['db_title'] === POSTGRE_TITLE) { $language_ftx = $smcFunc['db_search_language'](); $query_where[] = 'to_tsvector({string:language_ftx},body) @@ plainto_tsquery({string:language_ftx},{string:body_match})'; $query_params['language_ftx'] = $language_ftx; } else $query_where[] = 'MATCH (body) AGAINST ({string:body_match})'; $query_params['body_match'] = implode(' ', array_diff($words['indexed_words'], $query_params['excluded_index_words'])); } else { $query_params['boolean_match'] = ''; // remove any indexed words that are used in the complex body search terms $words['indexed_words'] = array_diff($words['indexed_words'], $words['complex_words']); if ($smcFunc['db_title'] === POSTGRE_TITLE) { $row = 0; foreach ($words['indexed_words'] as $fulltextWord) { $query_params['boolean_match'] .= ($row <> 0 ? '&' : ''); $query_params['boolean_match'] .= (in_array($fulltextWord, $query_params['excluded_index_words']) ? '!' : '') . $fulltextWord . ' '; $row++; } } else foreach ($words['indexed_words'] as $fulltextWord) $query_params['boolean_match'] .= (in_array($fulltextWord, $query_params['excluded_index_words']) ? '-' : '+') . $fulltextWord . ' '; $query_params['boolean_match'] = substr($query_params['boolean_match'], 0, -1); // if we have bool terms to search, add them in if ($query_params['boolean_match']) { if ($smcFunc['db_title'] === POSTGRE_TITLE) { $language_ftx = $smcFunc['db_search_language'](); $query_where[] = 'to_tsvector({string:language_ftx},body) @@ plainto_tsquery({string:language_ftx},{string:boolean_match})'; $query_params['language_ftx'] = $language_ftx; } else $query_where[] = 'MATCH (body) AGAINST ({string:boolean_match} IN BOOLEAN MODE)'; } } $ignoreRequest = $smcFunc['db_search_query']('insert_into_log_messages_fulltext', ($smcFunc['db_support_ignore'] ? (' INSERT IGNORE INTO {db_prefix}' . $search_data['insert_into'] . ' (' . implode(', ', array_keys($query_select)) . ')') : '') . ' SELECT ' . implode(', ', $query_select) . ' FROM {db_prefix}messages AS m WHERE ' . implode(' AND ', $query_where) . (empty($search_data['max_results']) ? '' : ' LIMIT ' . ($search_data['max_results'] - $search_data['indexed_results'])), $query_params ); return $ignoreRequest; } } ?>