Chris@0: get('index.minimum_word_size') Chris@0: * characters. Chris@0: * Chris@0: * @see SearchQuery::getStatus() Chris@0: */ Chris@0: const NO_POSITIVE_KEYWORDS = 1; Chris@0: Chris@0: /** Chris@0: * Indicates that part of the search expression was ignored. Chris@0: * Chris@0: * To prevent Denial of Service attacks, only Chris@0: * \Drupal::config('search.settings')->get('and_or_limit') expressions Chris@0: * (positive keywords, phrases, negative keywords) are allowed; this flag Chris@0: * indicates that expressions existed past that limit and they were removed. Chris@0: * Chris@0: * @see SearchQuery::getStatus() Chris@0: */ Chris@0: const EXPRESSIONS_IGNORED = 2; Chris@0: Chris@0: /** Chris@0: * Indicates that lower-case "or" was in the search expression. Chris@0: * Chris@0: * The word "or" in lower case was found in the search expression. This Chris@0: * probably means someone was trying to do an OR search but used lower-case Chris@0: * instead of upper-case. Chris@0: * Chris@0: * @see SearchQuery::getStatus() Chris@0: */ Chris@0: const LOWER_CASE_OR = 4; Chris@0: Chris@0: /** Chris@0: * Indicates that no positive keyword matches were found. Chris@0: * Chris@0: * @see SearchQuery::getStatus() Chris@0: */ Chris@0: const NO_KEYWORD_MATCHES = 8; Chris@0: Chris@0: /** Chris@0: * The keywords and advanced search options that are entered by the user. Chris@0: * Chris@0: * @var string Chris@0: */ Chris@0: protected $searchExpression; Chris@0: Chris@0: /** Chris@0: * The type of search (search type). Chris@0: * Chris@0: * This maps to the value of the type column in search_index, and is usually Chris@0: * equal to the machine-readable name of the plugin or the search page. Chris@0: * Chris@0: * @var string Chris@0: */ Chris@0: protected $type; Chris@0: Chris@0: /** Chris@0: * Parsed-out positive and negative search keys. Chris@0: * Chris@0: * @var array Chris@0: */ Chris@0: protected $keys = ['positive' => [], 'negative' => []]; Chris@0: Chris@0: /** Chris@0: * Indicates whether the query conditions are simple or complex (LIKE). Chris@0: * Chris@0: * @var bool Chris@0: */ Chris@0: protected $simple = TRUE; Chris@0: Chris@0: /** Chris@0: * Conditions that are used for exact searches. Chris@0: * Chris@0: * This is always used for the second step in the query, but is not part of Chris@0: * the preparation step unless $this->simple is FALSE. Chris@0: * Chris@18: * @var Drupal\Core\Database\Query\ConditionInterface[] Chris@0: */ Chris@0: protected $conditions; Chris@0: Chris@0: /** Chris@0: * Indicates how many matches for a search query are necessary. Chris@0: * Chris@0: * @var int Chris@0: */ Chris@0: protected $matches = 0; Chris@0: Chris@0: /** Chris@0: * Array of positive search words. Chris@0: * Chris@0: * These words have to match against {search_index}.word. Chris@0: * Chris@0: * @var array Chris@0: */ Chris@0: protected $words = []; Chris@0: Chris@0: /** Chris@0: * Multiplier to normalize the keyword score. Chris@0: * Chris@0: * This value is calculated by the preparation step, and is used as a Chris@0: * multiplier of the word scores to make sure they are between 0 and 1. Chris@0: * Chris@0: * @var float Chris@0: */ Chris@0: protected $normalize = 0; Chris@0: Chris@0: /** Chris@0: * Indicates whether the preparation step has been executed. Chris@0: * Chris@0: * @var bool Chris@0: */ Chris@0: protected $executedPrepare = FALSE; Chris@0: Chris@0: /** Chris@0: * A bitmap of status conditions, described in getStatus(). Chris@0: * Chris@0: * @var int Chris@0: * Chris@0: * @see SearchQuery::getStatus() Chris@0: */ Chris@0: protected $status = 0; Chris@0: Chris@0: /** Chris@0: * The word score expressions. Chris@0: * Chris@0: * @var array Chris@0: * Chris@0: * @see SearchQuery::addScore() Chris@0: */ Chris@0: protected $scores = []; Chris@0: Chris@0: /** Chris@0: * Arguments for the score expressions. Chris@0: * Chris@0: * @var array Chris@0: */ Chris@0: protected $scoresArguments = []; Chris@0: Chris@0: /** Chris@0: * The number of 'i.relevance' occurrences in score expressions. Chris@0: * Chris@0: * @var int Chris@0: */ Chris@0: protected $relevance_count = 0; Chris@0: Chris@0: /** Chris@0: * Multipliers for score expressions. Chris@0: * Chris@0: * @var array Chris@0: */ Chris@0: protected $multiply = []; Chris@0: Chris@0: /** Chris@0: * Sets the search query expression. Chris@0: * Chris@0: * @param string $expression Chris@0: * A search string, which can contain keywords and options. Chris@0: * @param string $type Chris@0: * The search type. This maps to {search_index}.type in the database. Chris@0: * Chris@0: * @return $this Chris@0: */ Chris@0: public function searchExpression($expression, $type) { Chris@0: $this->searchExpression = $expression; Chris@0: $this->type = $type; Chris@0: Chris@0: // Add query tag. Chris@0: $this->addTag('search_' . $type); Chris@0: Chris@0: // Initialize conditions and status. Chris@0: $this->conditions = new Condition('AND'); Chris@0: $this->status = 0; Chris@0: Chris@0: return $this; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Parses the search query into SQL conditions. Chris@0: * Chris@0: * Sets up the following variables: Chris@0: * - $this->keys Chris@0: * - $this->words Chris@0: * - $this->conditions Chris@0: * - $this->simple Chris@0: * - $this->matches Chris@0: */ Chris@0: protected function parseSearchExpression() { Chris@0: // Matches words optionally prefixed by a - sign. A word in this case is Chris@0: // something between two spaces, optionally quoted. Chris@0: preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' ' . $this->searchExpression, $keywords, PREG_SET_ORDER); Chris@0: Chris@0: if (count($keywords) == 0) { Chris@0: return; Chris@0: } Chris@0: Chris@0: // Classify tokens. Chris@0: $in_or = FALSE; Chris@0: $limit_combinations = \Drupal::config('search.settings')->get('and_or_limit'); Chris@0: // The first search expression does not count as AND. Chris@0: $and_count = -1; Chris@0: $or_count = 0; Chris@0: foreach ($keywords as $match) { Chris@0: if ($or_count && $and_count + $or_count >= $limit_combinations) { Chris@0: // Ignore all further search expressions to prevent Denial-of-Service Chris@0: // attacks using a high number of AND/OR combinations. Chris@0: $this->status |= SearchQuery::EXPRESSIONS_IGNORED; Chris@0: break; Chris@0: } Chris@0: Chris@0: // Strip off phrase quotes. Chris@0: $phrase = FALSE; Chris@0: if ($match[2]{0} == '"') { Chris@0: $match[2] = substr($match[2], 1, -1); Chris@0: $phrase = TRUE; Chris@0: $this->simple = FALSE; Chris@0: } Chris@0: Chris@0: // Simplify keyword according to indexing rules and external Chris@0: // preprocessors. Use same process as during search indexing, so it Chris@0: // will match search index. Chris@0: $words = search_simplify($match[2]); Chris@0: // Re-explode in case simplification added more words, except when Chris@0: // matching a phrase. Chris@0: $words = $phrase ? [$words] : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY); Chris@0: // Negative matches. Chris@0: if ($match[1] == '-') { Chris@0: $this->keys['negative'] = array_merge($this->keys['negative'], $words); Chris@0: } Chris@0: // OR operator: instead of a single keyword, we store an array of all Chris@0: // OR'd keywords. Chris@0: elseif ($match[2] == 'OR' && count($this->keys['positive'])) { Chris@0: $last = array_pop($this->keys['positive']); Chris@0: // Starting a new OR? Chris@0: if (!is_array($last)) { Chris@0: $last = [$last]; Chris@0: } Chris@0: $this->keys['positive'][] = $last; Chris@0: $in_or = TRUE; Chris@0: $or_count++; Chris@0: continue; Chris@0: } Chris@0: // AND operator: implied, so just ignore it. Chris@0: elseif ($match[2] == 'AND' || $match[2] == 'and') { Chris@0: continue; Chris@0: } Chris@0: Chris@0: // Plain keyword. Chris@0: else { Chris@0: if ($match[2] == 'or') { Chris@0: // Lower-case "or" instead of "OR" is a warning condition. Chris@0: $this->status |= SearchQuery::LOWER_CASE_OR; Chris@0: } Chris@0: if ($in_or) { Chris@0: // Add to last element (which is an array). Chris@0: $this->keys['positive'][count($this->keys['positive']) - 1] = array_merge($this->keys['positive'][count($this->keys['positive']) - 1], $words); Chris@0: } Chris@0: else { Chris@0: $this->keys['positive'] = array_merge($this->keys['positive'], $words); Chris@0: $and_count++; Chris@0: } Chris@0: } Chris@0: $in_or = FALSE; Chris@0: } Chris@0: Chris@0: // Convert keywords into SQL statements. Chris@0: $has_and = FALSE; Chris@0: $has_or = FALSE; Chris@0: // Positive matches. Chris@0: foreach ($this->keys['positive'] as $key) { Chris@0: // Group of ORed terms. Chris@0: if (is_array($key) && count($key)) { Chris@0: // If we had already found one OR, this is another one AND-ed with the Chris@0: // first, meaning it is not a simple query. Chris@0: if ($has_or) { Chris@0: $this->simple = FALSE; Chris@0: } Chris@0: $has_or = TRUE; Chris@0: $has_new_scores = FALSE; Chris@0: $queryor = new Condition('OR'); Chris@0: foreach ($key as $or) { Chris@0: list($num_new_scores) = $this->parseWord($or); Chris@0: $has_new_scores |= $num_new_scores; Chris@0: $queryor->condition('d.data', "% $or %", 'LIKE'); Chris@0: } Chris@0: if (count($queryor)) { Chris@0: $this->conditions->condition($queryor); Chris@0: // A group of OR keywords only needs to match once. Chris@0: $this->matches += ($has_new_scores > 0); Chris@0: } Chris@0: } Chris@0: // Single ANDed term. Chris@0: else { Chris@0: $has_and = TRUE; Chris@0: list($num_new_scores, $num_valid_words) = $this->parseWord($key); Chris@0: $this->conditions->condition('d.data', "% $key %", 'LIKE'); Chris@0: if (!$num_valid_words) { Chris@0: $this->simple = FALSE; Chris@0: } Chris@0: // Each AND keyword needs to match at least once. Chris@0: $this->matches += $num_new_scores; Chris@0: } Chris@0: } Chris@0: if ($has_and && $has_or) { Chris@0: $this->simple = FALSE; Chris@0: } Chris@0: Chris@0: // Negative matches. Chris@0: foreach ($this->keys['negative'] as $key) { Chris@0: $this->conditions->condition('d.data', "% $key %", 'NOT LIKE'); Chris@0: $this->simple = FALSE; Chris@0: } Chris@0: } Chris@0: Chris@0: /** Chris@0: * Parses a word or phrase for parseQuery(). Chris@0: * Chris@0: * Splits a phrase into words. Adds its words to $this->words, if it is not Chris@0: * already there. Returns a list containing the number of new words found, Chris@0: * and the total number of words in the phrase. Chris@0: */ Chris@0: protected function parseWord($word) { Chris@0: $num_new_scores = 0; Chris@0: $num_valid_words = 0; Chris@0: Chris@0: // Determine the scorewords of this word/phrase. Chris@0: $split = explode(' ', $word); Chris@0: foreach ($split as $s) { Chris@0: $num = is_numeric($s); Chris@17: if ($num || mb_strlen($s) >= \Drupal::config('search.settings')->get('index.minimum_word_size')) { Chris@0: if (!isset($this->words[$s])) { Chris@0: $this->words[$s] = $s; Chris@0: $num_new_scores++; Chris@0: } Chris@0: $num_valid_words++; Chris@0: } Chris@0: } Chris@0: Chris@0: // Return matching snippet and number of added words. Chris@0: return [$num_new_scores, $num_valid_words]; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Prepares the query and calculates the normalization factor. Chris@0: * Chris@0: * After the query is normalized the keywords are weighted to give the results Chris@0: * a relevancy score. The query is ready for execution after this. Chris@0: * Chris@0: * Error and warning conditions can apply. Call getStatus() after calling Chris@0: * this method to retrieve them. Chris@0: * Chris@0: * @return bool Chris@0: * TRUE if at least one keyword matched the search index; FALSE if not. Chris@0: */ Chris@0: public function prepareAndNormalize() { Chris@0: $this->parseSearchExpression(); Chris@0: $this->executedPrepare = TRUE; Chris@0: Chris@0: if (count($this->words) == 0) { Chris@0: // Although the query could proceed, there is no point in joining Chris@0: // with other tables and attempting to normalize if there are no Chris@0: // keywords present. Chris@0: $this->status |= SearchQuery::NO_POSITIVE_KEYWORDS; Chris@0: return FALSE; Chris@0: } Chris@0: Chris@0: // Build the basic search query: match the entered keywords. Chris@0: $or = new Condition('OR'); Chris@0: foreach ($this->words as $word) { Chris@0: $or->condition('i.word', $word); Chris@0: } Chris@0: $this->condition($or); Chris@0: Chris@0: // Add keyword normalization information to the query. Chris@0: $this->join('search_total', 't', 'i.word = t.word'); Chris@0: $this Chris@0: ->condition('i.type', $this->type) Chris@0: ->groupBy('i.type') Chris@0: ->groupBy('i.sid'); Chris@0: Chris@0: // If the query is simple, we should have calculated the number of Chris@0: // matching words we need to find, so impose that criterion. For non- Chris@0: // simple queries, this condition could lead to incorrectly deciding not Chris@0: // to continue with the full query. Chris@0: if ($this->simple) { Chris@0: $this->having('COUNT(*) >= :matches', [':matches' => $this->matches]); Chris@0: } Chris@0: Chris@0: // Clone the query object to calculate normalization. Chris@0: $normalize_query = clone $this->query; Chris@0: Chris@0: // For complex search queries, add the LIKE conditions; if the query is Chris@0: // simple, we do not need them for normalization. Chris@0: if (!$this->simple) { Chris@0: $normalize_query->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type AND i.langcode = d.langcode'); Chris@0: if (count($this->conditions)) { Chris@0: $normalize_query->condition($this->conditions); Chris@0: } Chris@0: } Chris@0: Chris@0: // Calculate normalization, which is the max of all the search scores for Chris@0: // positive keywords in the query. And note that the query could have other Chris@0: // fields added to it by the user of this extension. Chris@0: $normalize_query->addExpression('SUM(i.score * t.count)', 'calculated_score'); Chris@0: $result = $normalize_query Chris@0: ->range(0, 1) Chris@0: ->orderBy('calculated_score', 'DESC') Chris@0: ->execute() Chris@0: ->fetchObject(); Chris@0: if (isset($result->calculated_score)) { Chris@0: $this->normalize = (float) $result->calculated_score; Chris@0: } Chris@0: Chris@0: if ($this->normalize) { Chris@0: return TRUE; Chris@0: } Chris@0: Chris@0: // If the normalization value was zero, that indicates there were no Chris@0: // matches to the supplied positive keywords. Chris@0: $this->status |= SearchQuery::NO_KEYWORD_MATCHES; Chris@0: return FALSE; Chris@0: } Chris@0: Chris@0: /** Chris@0: * {@inheritdoc} Chris@0: */ Chris@0: public function preExecute(SelectInterface $query = NULL) { Chris@0: if (!$this->executedPrepare) { Chris@0: $this->prepareAndNormalize(); Chris@0: } Chris@0: Chris@0: if (!$this->normalize) { Chris@0: return FALSE; Chris@0: } Chris@0: Chris@0: return parent::preExecute($query); Chris@0: } Chris@0: Chris@0: /** Chris@0: * Adds a custom score expression to the search query. Chris@0: * Chris@0: * Score expressions are used to order search results. If no calls to Chris@0: * addScore() have taken place, a default keyword relevance score will be Chris@0: * used. However, if at least one call to addScore() has taken place, the Chris@0: * keyword relevance score is not automatically added. Chris@0: * Chris@0: * Note that you must use this method to add ordering to your searches, and Chris@0: * not call orderBy() directly, when using the SearchQuery extender. This is Chris@0: * because of the two-pass system the SearchQuery class uses to normalize Chris@0: * scores. Chris@0: * Chris@0: * @param string $score Chris@0: * The score expression, which should evaluate to a number between 0 and 1. Chris@0: * The string 'i.relevance' in a score expression will be replaced by a Chris@0: * measure of keyword relevance between 0 and 1. Chris@0: * @param array $arguments Chris@0: * Query arguments needed to provide values to the score expression. Chris@0: * @param float $multiply Chris@0: * If set, the score is multiplied with this value. However, all scores Chris@0: * with multipliers are then divided by the total of all multipliers, so Chris@0: * that overall, the normalization is maintained. Chris@0: * Chris@0: * @return $this Chris@0: */ Chris@0: public function addScore($score, $arguments = [], $multiply = FALSE) { Chris@0: if ($multiply) { Chris@0: $i = count($this->multiply); Chris@0: // Modify the score expression so it is multiplied by the multiplier, Chris@0: // with a divisor to renormalize. Note that the ROUND here is necessary Chris@0: // for PostgreSQL and SQLite in order to ensure that the :multiply_* and Chris@0: // :total_* arguments are treated as a numeric type, because the Chris@0: // PostgreSQL PDO driver sometimes puts values in as strings instead of Chris@0: // numbers in complex expressions like this. Chris@0: $score = "(ROUND(:multiply_$i, 4)) * COALESCE(($score), 0) / (ROUND(:total_$i, 4))"; Chris@0: // Add an argument for the multiplier. The :total_$i argument is taken Chris@0: // care of in the execute() method, which is when the total divisor is Chris@0: // calculated. Chris@0: $arguments[':multiply_' . $i] = $multiply; Chris@0: $this->multiply[] = $multiply; Chris@0: } Chris@0: Chris@0: // Search scoring needs a way to include a keyword relevance in the score. Chris@0: // For historical reasons, this is done by putting 'i.relevance' into the Chris@0: // search expression. So, use string replacement to change this to a Chris@0: // calculated query expression, counting the number of occurrences so Chris@0: // in the execute() method we can add arguments. Chris@0: while (($pos = strpos($score, 'i.relevance')) !== FALSE) { Chris@0: $pieces = explode('i.relevance', $score, 2); Chris@0: $score = implode('((ROUND(:normalization_' . $this->relevance_count . ', 4)) * i.score * t.count)', $pieces); Chris@0: $this->relevance_count++; Chris@0: } Chris@0: Chris@0: $this->scores[] = $score; Chris@0: $this->scoresArguments += $arguments; Chris@0: Chris@0: return $this; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Executes the search. Chris@0: * Chris@0: * The complex conditions are applied to the query including score Chris@0: * expressions and ordering. Chris@0: * Chris@0: * Error and warning conditions can apply. Call getStatus() after calling Chris@0: * this method to retrieve them. Chris@0: * Chris@0: * @return \Drupal\Core\Database\StatementInterface|null Chris@0: * A query result set containing the results of the query. Chris@0: */ Chris@0: public function execute() { Chris@0: if (!$this->preExecute($this)) { Chris@0: return NULL; Chris@0: } Chris@0: Chris@0: // Add conditions to the query. Chris@0: $this->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type AND i.langcode = d.langcode'); Chris@0: if (count($this->conditions)) { Chris@0: $this->condition($this->conditions); Chris@0: } Chris@0: Chris@0: // Add default score (keyword relevance) if there are not any defined. Chris@0: if (empty($this->scores)) { Chris@0: $this->addScore('i.relevance'); Chris@0: } Chris@0: Chris@0: if (count($this->multiply)) { Chris@0: // Re-normalize scores with multipliers by dividing by the total of all Chris@0: // multipliers. The expressions were altered in addScore(), so here just Chris@0: // add the arguments for the total. Chris@0: $sum = array_sum($this->multiply); Chris@0: for ($i = 0; $i < count($this->multiply); $i++) { Chris@0: $this->scoresArguments[':total_' . $i] = $sum; Chris@0: } Chris@0: } Chris@0: Chris@0: // Add arguments for the keyword relevance normalization number. Chris@0: $normalization = 1.0 / $this->normalize; Chris@0: for ($i = 0; $i < $this->relevance_count; $i++) { Chris@0: $this->scoresArguments[':normalization_' . $i] = $normalization; Chris@0: } Chris@0: Chris@0: // Add all scores together to form a query field. Chris@0: $this->addExpression('SUM(' . implode(' + ', $this->scores) . ')', 'calculated_score', $this->scoresArguments); Chris@0: Chris@0: // If an order has not yet been set for this query, add a default order Chris@0: // that sorts by the calculated sum of scores. Chris@0: if (count($this->getOrderBy()) == 0) { Chris@0: $this->orderBy('calculated_score', 'DESC'); Chris@0: } Chris@0: Chris@0: // Add query metadata. Chris@0: $this Chris@0: ->addMetaData('normalize', $this->normalize) Chris@0: ->fields('i', ['type', 'sid']); Chris@0: return $this->query->execute(); Chris@0: } Chris@0: Chris@0: /** Chris@0: * Builds the default count query for SearchQuery. Chris@0: * Chris@0: * Since SearchQuery always uses GROUP BY, we can default to a subquery. We Chris@0: * also add the same conditions as execute() because countQuery() is called Chris@0: * first. Chris@0: */ Chris@0: public function countQuery() { Chris@0: if (!$this->executedPrepare) { Chris@0: $this->prepareAndNormalize(); Chris@0: } Chris@0: Chris@0: // Clone the inner query. Chris@0: $inner = clone $this->query; Chris@0: Chris@0: // Add conditions to query. Chris@0: $inner->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type'); Chris@0: if (count($this->conditions)) { Chris@0: $inner->condition($this->conditions); Chris@0: } Chris@0: Chris@0: // Remove existing fields and expressions, they are not needed for a count Chris@0: // query. Chris@0: $fields =& $inner->getFields(); Chris@0: $fields = []; Chris@0: $expressions =& $inner->getExpressions(); Chris@0: $expressions = []; Chris@0: Chris@0: // Add sid as the only field and count them as a subquery. Chris@18: $count = $this->connection->select($inner->fields('i', ['sid']), NULL); Chris@0: Chris@0: // Add the COUNT() expression. Chris@0: $count->addExpression('COUNT(*)'); Chris@0: Chris@0: return $count; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Returns the query status bitmap. Chris@0: * Chris@0: * @return int Chris@0: * A bitmap indicating query status. Zero indicates there were no problems. Chris@0: * A non-zero value is a combination of one or more of the following flags: Chris@0: * - SearchQuery::NO_POSITIVE_KEYWORDS Chris@0: * - SearchQuery::EXPRESSIONS_IGNORED Chris@0: * - SearchQuery::LOWER_CASE_OR Chris@0: * - SearchQuery::NO_KEYWORD_MATCHES Chris@0: */ Chris@0: public function getStatus() { Chris@0: return $this->status; Chris@0: } Chris@0: Chris@0: }