Making WordPress.org

Changeset 10228


Ignore:
Timestamp:
09/02/2020 04:05:29 AM (5 years ago)
Author:
dd32
Message:

Plugin Directory: Search: Put less importance upon the description of the block plugins, and filter out the lower 1% of results.

This is an attempt at making the search only return relevant results for block searches, non-block searches we want pages upon pages of result, but blocks should be more exact.
A search without any highly relevant results will still return very lowly related results and is mostly unaffected by this.

Testing has been done by re-running common searches and picking out keywords from existing block plugins and seeing what gets returned.

Note: block_name/block_title were removed as they're not being indexed properly in ElasticSearch.

See https://github.com/WordPress/gutenberg/issues/24910.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • sites/trunk/wordpress.org/public_html/wp-content/plugins/plugin-directory/libs/site-search/jetpack-search.php

    r10212 r10228  
    431431
    432432        // Block Search.
    433         if ( !empty( $query->query['block_search'] ) ) {
     433        $is_block_search = !empty( $query->query['block_search'] );
     434        if ( $is_block_search ) {
    434435            $es_wp_query_args['block_search'] = $query->query['block_search'];
    435436
     
    456457        $es_query_args['fields'] = array(
    457458            'slug',
     459            'post_id',
     460            'blog_id',
     461
     462            /*
     463            // For debugging, enabling extra fields to be returned can be helpful.
     464            'title_en',
     465            'excerpt_en',
    458466            'support_threads_resolved',
    459467            'support_threads_percentage',
     
    466474            'rating',
    467475            'plugin_modified',
    468             'post_id',
    469             'blog_id',
     476            // */
    470477        );
    471 
    472478        // This filter is harder to use if you're unfamiliar with ES but it allows complete control over the query
    473479        $es_query_args      = apply_filters( 'jetpack_search_es_query_args', $es_query_args, $query );
     
    480486            $this->found_posts = 0;
    481487            return '';
     488        }
     489
     490        // Try filtering out super irrelevant lower 1% results from searches for Blocks.
     491        if ( $is_block_search && 1 === $page ) {
     492            // Include the 99%.
     493            $cutoff = 0.01 * $this->search_result['results']['max_score'];
     494
     495            foreach ( $this->search_result['results']['hits'] as $i => $result ) {
     496                if ( $result['_score'] < $cutoff ) {
     497                    unset( $this->search_result['results']['hits'][ $i ] );
     498
     499                    // If we've removed an entry, pretend that this is all we've got.
     500                    $this->search_result['results']['total'] = count( $this->search_result['results'] );
     501                }
     502            }
     503
    482504        }
    483505
     
    779801        $is_block_search = ! empty( $args['block_search'] );
    780802
     803        // How much weighting to put on the Description field.
     804        // Blocks get a much lower value here, as it's more title/excerpt (short description) based.
     805        $desc_boost = $is_block_search ? 0.05 : 1;
     806
    781807        if ( $args['locale'] && $args['locale'] !== 'en' && substr( $args['locale'], 0, 3 ) !== 'en_' ) {
    782808            $locale = $args['locale'];
     
    791817            // so rather than 0.1 we use a much smaller multiplier of en content
    792818            $en_boost             = 0.00001;
     819            $desc_en_boost        = $desc_boost * $en_boost;
     820
    793821            $matching_fields      = array(
    794822                'all_content_' . $locale,
     
    798826                'title_' . $locale,
    799827                'excerpt_' . $locale,
    800                 'description_' . $locale,
     828                'description_' . $locale . '^' . $desc_boost,
    801829                'title_en^' . $en_boost,
    802830                'excerpt_en^' . $en_boost,
    803                 'description_en^' . $en_boost,
     831                'description_en^' . $desc_en_boost,
    804832                'taxonomy.plugin_tags.name',
    805833            );
    806             if ( $is_block_search ) {
    807                 $boost_phrase_fields[] = 'block_title_' . $locale;
    808                 $boost_phrase_fields[] = 'block_title_en^' . $en_boost;
    809             }
    810834            $boost_ngram_fields   = array(
    811835                'title_' . $locale . '.ngram',
     
    817841                'slug_text',
    818842            );
    819             if ( $is_block_search ) {
    820                 $boost_title_fields[] = 'block_title_' . $locale;
    821                 $boost_title_fields[] = 'block_title_en^' . $en_boost;
    822                 $boost_title_fields[] = 'block_name';
    823             }
    824843            $boost_content_fields = array(
    825844                'excerpt_' . $locale,
    826                 'description_' . $locale,
     845                'description_' . $locale . '^' . $desc_boost,
    827846                'excerpt_en^' . $en_boost,
    828                 'description_en^' . $en_boost,
     847                'description_en^' . $desc_en_boost,
    829848                'taxonomy.plugin_tags.name',
    830849            );
     
    836855                'title_en',
    837856                'excerpt_en',
    838                 'description_en',
     857                'description_en^' . $desc_boost,
    839858                'taxonomy.plugin_tags.name',
    840859            );
    841             if ( $is_block_search ) {
    842                 $boost_phrase_fields[] = 'block_title_en';
    843             }
    844860            $boost_ngram_fields   = array(
    845861                'title_en.ngram',
     
    849865                'slug_text',
    850866            );
    851             if ( $is_block_search ) {
    852                 $boost_title_fields[] = 'block_title_en';
    853                 $boost_title_fields[] = 'block_name';
    854             }
    855867            $boost_content_fields = array(
    856868                'excerpt_en',
    857                 'description_en',
     869                'description_en^' . $desc_boost,
    858870                'taxonomy.plugin_tags.name',
    859871            );
    860872        }
    861873
    862         //
    863874        // Build the query - potentially extracting more filters
    864875        // TODO: add auto phrase searching
Note: See TracChangeset for help on using the changeset viewer.