Making WordPress.org

Changeset 13778


Ignore:
Timestamp:
06/06/2024 04:13:41 AM (9 months ago)
Author:
dd32
Message:

Plugin Directory: Search: Improve the search code for phrase matching.

This does not add support for proper phrase matching in the search, but rather corrects the code to properly handle Jetpack Phrase search mode.

Previously most of the customizations in our search code was being skipped, as the structure of the Jetpack ES query was in an unexpected form.

See #2642.

Location:
sites/trunk/wordpress.org/public_html/wp-content/plugins/plugin-directory
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • sites/trunk/wordpress.org/public_html/wp-content/plugins/plugin-directory/class-plugin-directory.php

    r13721 r13778  
    975975        // Sanitize / cleanup the search query a little bit.
    976976        if ( $wp_query->is_search() ) {
    977             $s = $wp_query->get( 's' );
     977            $s = wp_unslash( $wp_query->get( 's' ) );
    978978            $s = urldecode( $s );
    979979
     
    988988            }
    989989
    990             // Trim off special characters, only allowing wordy characters at the end of searches.
    991             $s = preg_replace( '!(\W+)$!iu', '', $s );
    992             // ..and whitespace
     990            // Trim whitespace
    993991            $s = trim( $s );
    994992
    995             $wp_query->set( 's', $s );
     993            // If we're searching for a phrase, only trim non-quotey+wordy characters.
     994            if ( str_starts_with( $s, '"' ) || str_starts_with( $s, "'" ) ) {
     995                $s = preg_replace( '!(\s*[^\'"\w]+)$!iu', '', $s );
     996            } else {
     997                // If we're searching for a word, trim all non-wordy characters.
     998                $s = preg_replace( '!(\s*\W+)$!iu', '', $s );
     999            }
     1000
     1001            $wp_query->set( 's', wp_slash( $s ) );
    9961002
    9971003            // If the search is in the block directory, require that.
  • sites/trunk/wordpress.org/public_html/wp-content/plugins/plugin-directory/class-plugin-search.php

    r13749 r13778  
    249249        }
    250250
     251        // In phrase-search mode, the should is not present, and it's instead simply a `must` query.
     252        $es_query_args[ 'query' ][ 'function_score' ][ 'query' ][ 'bool' ][ 'should' ] ??= [];
     253
     254        // We'll always be adding function scoring.
     255        $es_query_args[ 'query' ][ 'function_score' ][ 'functions' ] ??= [];
     256
    251257        // The should match is where we add the fields to be searched in, and the weighting of them (boost).
    252         $should_match = [];
    253         if ( isset( $es_query_args[ 'query' ][ 'function_score' ][ 'query' ][ 'bool' ][ 'should' ] ) ) {
    254             $should_match = & $es_query_args[ 'query' ][ 'function_score' ][ 'query' ][ 'bool' ][ 'should' ];
    255         }
    256 
    257         $search_phrase = $should_match[0][ 'multi_match' ][ 'query' ] ?? '';
     258        $should_match   = & $es_query_args[ 'query' ][ 'function_score' ][ 'query' ][ 'bool' ][ 'should' ];
     259
     260        // The must match is where the base query is present.
     261        $must_match     = & $es_query_args[ 'query' ][ 'function_score' ][ 'query' ][ 'bool' ][ 'must' ];
    258262
    259263        // The function score is where calculations on fields occur.
    260         $function_score = [];
    261         if ( isset( $es_query_args[ 'query' ][ 'function_score' ][ 'functions' ] ) ) {
    262             $function_score = & $es_query_args[ 'query' ][ 'function_score' ][ 'functions' ];
    263         }
     264        $function_score = & $es_query_args[ 'query' ][ 'function_score' ][ 'functions' ];
     265
     266        // Determine what's actually being searched for according to ES.
     267        $search_phrase  = $must_match[0][ 'multi_match' ][ 'query' ] ?? ( $should_match[0][ 'multi_match' ][ 'query' ] ?? '' );
     268
     269        // $phrase_search_mode = ( 'phrase' === $must_match[0][ 'multi_match' ][ 'type' ] );
    264270
    265271        // Set boost on the match query, from jetpack_search_es_wp_query_args.
    266         if ( isset( $es_query_args[ 'query' ][ 'function_score' ][ 'query' ][ 'bool' ][ 'must' ][0][ 'multi_match' ] ) ) {
    267             $es_query_args[ 'query' ][ 'function_score' ][ 'query' ][ 'bool' ][ 'must' ][0][ 'multi_match' ][ 'boost' ] = 0.1;
    268         }
    269 
    270         // This extends the search to additionally search in the title, excerpt, description and plugin_tags.
     272        if ( isset( $must_match[0][ 'multi_match' ] ) ) {
     273            $must_match[0][ 'multi_match' ][ 'boost' ] = 0.1;
     274        }
     275
     276        // This extends the word search to additionally search in the title, excerpt, description and plugin_tags.
     277        // Note: This is not present in phrase searching mode.
    271278        if ( isset( $should_match[0][ 'multi_match' ] ) ) {
    272279            $should_match[0][ 'multi_match' ][ 'boost' ]  = 2;
Note: See TracChangeset for help on using the changeset viewer.