Changeset 6287 for sites/trunk/wordpress.org/public_html/wp-content/plugins/plugin-directory/libs/site-search/jetpack-search.php
- Timestamp:
- 12/19/2017 04:22:37 PM (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sites/trunk/wordpress.org/public_html/wp-content/plugins/plugin-directory/libs/site-search/jetpack-search.php
r6197 r6287 50 50 */ 51 51 52 require_once ( __DIR__ . '/class.jetpack-searchresult-posts-iterator.php' );52 require_once __DIR__ . '/class.jetpack-searchresult-posts-iterator.php'; 53 53 54 54 class Jetpack_Search { … … 67 67 protected static $instance; 68 68 69 // Languages with custom analyzers, other languages are supported,69 // Languages with custom analyzers, other languages are supported, 70 70 // but are analyzed with the default analyzer. 71 71 public static $analyzed_langs = array( 'ar', 'bg', 'ca', 'cs', 'da', 'de', 'el', 'en', 'es', 'eu', 'fa', 'fi', 'fr', 'he', 'hi', 'hu', 'hy', 'id', 'it', 'ja', 'ko', 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh' ); 72 72 73 const CACHE_GROUP = 'jetpack-search';74 const CACHE_EXPIRY = 300;75 const ERROR_COUNT_KEY = 'error-count-';73 const CACHE_GROUP = 'jetpack-search'; 74 const CACHE_EXPIRY = 300; 75 const ERROR_COUNT_KEY = 'error-count-'; 76 76 const ERROR_COUNT_WINDOW = 60; // seconds 77 77 … … 80 80 } 81 81 82 public function __clone() { wp_die( "Please don't __clone WPCOM_elasticsearch" ); } 83 84 public function __wakeup() { wp_die( "Please don't __wakeup WPCOM_elasticsearch" ); } 82 public function __clone() { 83 wp_die( "Please don't __clone WPCOM_elasticsearch" ); } 84 85 public function __wakeup() { 86 wp_die( "Please don't __wakeup WPCOM_elasticsearch" ); } 85 87 86 88 public static function instance() { 87 89 if ( ! isset( self::$instance ) ) { 88 self::$instance = new Jetpack_Search ;90 self::$instance = new Jetpack_Search(); 89 91 self::$instance->setup(); 90 92 } … … 93 95 94 96 public function setup() { 95 //TODO: only enable if this site is public (otherwise we don't have content) 96 //TODO: check that the module is activated 97 97 // TODO: only enable if this site is public (otherwise we don't have content) 98 // TODO: check that the module is activated 98 99 $this->jetpack_blog_id = Jetpack::get_option( 'id' ); 99 100 … … 105 106 public function set_lang( $lang = false ) { 106 107 if ( ! $lang ) { 107 // TODO: don't think this works for Jetpack108 // TODO: don't think this works for Jetpack 108 109 $blog = get_blog_details( $blog_id ); 109 110 $lang = get_lang_code_by_id( $blog->lang_id ); … … 112 113 } 113 114 114 // ///////////////////////////////////////////////////115 // 115 116 // Lots of hooks 116 117 117 public function init_hooks() { 118 118 // Checks to see if we need to worry about found_posts 119 119 add_filter( 'post_limits_request', array( $this, 'filter__post_limits_request' ), 999, 2 ); 120 120 121 # Note: Advanced Post Cache hooks in at 10 so it's important to hook in before that 122 121 // Note: Advanced Post Cache hooks in at 10 so it's important to hook in before that 123 122 // Force $q['cache_results'] = false; this prevents the un-inflated WP_Post objects from being stored in cache 124 123 add_action( 'pre_get_posts', array( $this, 'action__pre_get_posts' ), 5 ); … … 142 141 143 142 // Debug 144 if ( file_exists( __DIR__ . '/jetpack-search-debug.php' ) ) 145 include_once( __DIR__ . '/jetpack-search-debug.php' ); 143 if ( file_exists( __DIR__ . '/jetpack-search-debug.php' ) ) { 144 include_once __DIR__ . '/jetpack-search-debug.php'; 145 } 146 146 } 147 147 … … 152 152 */ 153 153 public function register_loop_hooks() { 154 add_action( 'loop_start', 155 add_action( 'loop_end', 154 add_action( 'loop_start', array( $this, 'action__loop_start' ) ); 155 add_action( 'loop_end', array( $this, 'action__loop_end' ) ); 156 156 } 157 157 … … 162 162 */ 163 163 public function unregister_loop_hooks() { 164 remove_action( 'the_post', 165 remove_action( 'loop_end', 166 } 167 168 169 // ///////////////////////////////////////////////////////164 remove_action( 'the_post', array( $this, 'action__the_post' ) ); 165 remove_action( 'loop_end', array( $this, 'action__loop_end' ) ); 166 } 167 168 169 // 170 170 // Raw Search Query 171 172 /* 171 /* 173 172 * Return a count of the number of search API errors within the last ERROR_COUNT_WINDOW seconds 174 173 */ 175 174 protected function get_error_volume() { 176 175 // Use a dual-tick window like nonces 177 $tick = ceil( time() / ( self::ERROR_COUNT_WINDOW/2) );178 179 return intval( wp_cache_get( self::ERROR_COUNT_KEY . $tick, self::CACHE_GROUP ) ) 180 + intval( wp_cache_get( self::ERROR_COUNT_KEY . ( $tick -1), self::CACHE_GROUP ) );181 } 182 183 /* 176 $tick = ceil( time() / ( self::ERROR_COUNT_WINDOW / 2 ) ); 177 178 return intval( wp_cache_get( self::ERROR_COUNT_KEY . $tick, self::CACHE_GROUP ) ) 179 + intval( wp_cache_get( self::ERROR_COUNT_KEY . ( $tick - 1 ), self::CACHE_GROUP ) ); 180 } 181 182 /* 184 183 * Increment the recent error volume by $count. 185 184 */ 186 185 protected function increment_error_volume( $count = 1 ) { 187 186 // wp_cache_incr() bails if the key does not exist 188 $tick = ceil( time() / ( self::ERROR_COUNT_WINDOW/2) );187 $tick = ceil( time() / ( self::ERROR_COUNT_WINDOW / 2 ) ); 189 188 wp_cache_add( self::ERROR_COUNT_KEY . $tick, 0, self::CACHE_GROUP, self::ERROR_COUNT_WINDOW ); 190 189 return wp_cache_incr( self::ERROR_COUNT_KEY . $tick, $count, self::CACHE_GROUP ); … … 204 203 // For 20 errors, a 50% chance 205 204 // For 40+ errors, a 0% chance 206 207 205 $threshold = ceil( 10 / ( 1 + pow( $error_volume / 20, 4 ) ) ); 208 206 return mt_rand( 1, 10 ) <= $threshold; … … 213 211 */ 214 212 protected function search_error( $reason ) { 215 trigger_error( 'Plugin directory search: ' .$reason, E_USER_WARNING );213 trigger_error( 'Plugin directory search: ' . $reason, E_USER_WARNING ); 216 214 return $this->increment_error_volume(); 217 215 } … … 224 222 */ 225 223 public function search( $es_args ) { 226 $service_url = 'https://public-api.wordpress.com/rest/v1/sites/' . $this->jetpack_blog_id . '/search';224 $service_url = 'https://public-api.wordpress.com/rest/v1/sites/' . $this->jetpack_blog_id . '/search'; 227 225 $json_es_args = json_encode( $es_args ); 228 $cache_key = md5( $json_es_args );229 $lock_key = 'lock-'.$cache_key;230 226 $cache_key = md5( $json_es_args ); 227 $lock_key = 'lock-' . $cache_key; 228 231 229 $response = wp_cache_get( $cache_key, self::CACHE_GROUP ); 232 230 … … 238 236 if ( $this->error_volume_is_low() ) { 239 237 $request = wp_remote_post( $service_url, array( 240 'headers' => array(238 'headers' => array( 241 239 'Content-Type' => 'application/json', 242 240 ), 243 'timeout' => 10,241 'timeout' => 10, 244 242 'user-agent' => 'WordPress.org/jetpack_search', 245 'body' => $json_es_args,243 'body' => $json_es_args, 246 244 ) ); 247 245 } else { … … 258 256 wp_cache_set( $lock_key, 1, self::CACHE_GROUP, mt_rand( 3, 7 ) ); 259 257 260 if ( is_wp_error( $request ) ) 261 $this->search_error( 'http error '.$request->get_error_message(), E_USER_WARNING ); 262 else 263 $this->search_error( 'http status '.wp_remote_retrieve_response_code( $request ), E_USER_WARNING ); 258 if ( is_wp_error( $request ) ) { 259 $this->search_error( 'http error ' . $request->get_error_message(), E_USER_WARNING ); 260 } else { 261 $this->search_error( 'http status ' . wp_remote_retrieve_response_code( $request ), E_USER_WARNING ); 262 } 264 263 265 264 // If we have a stale cached response, return that. Otherwise, return the error object. 266 if ( $response ) 265 if ( $response ) { 267 266 return $response; // Stale cached response. 267 } 268 268 return $request; // Fresh error object. 269 269 } … … 271 271 $fresh_response = json_decode( wp_remote_retrieve_body( $request ), true ); 272 272 273 if ( ! $fresh_response || isset( $fresh_response['error'] ) ) {273 if ( ! $fresh_response || isset( $fresh_response['error'] ) ) { 274 274 // As above, lock further requests for the same search for a few seconds 275 275 wp_cache_set( $lock_key, 1, self::CACHE_GROUP, mt_rand( 3, 7 ) ); 276 276 277 if ( isset( $fresh_response['error'] ) ) 278 $this->search_error( 'remote error ' .$fresh_response['error'], E_USER_WARNING );279 else277 if ( isset( $fresh_response['error'] ) ) { 278 $this->search_error( 'remote error ' . $fresh_response['error'], E_USER_WARNING ); 279 } else { 280 280 $this->search_error( 'invalid json response', E_USER_WARNING ); 281 } 281 282 282 283 // Return a stale response if we have one 283 if ( $response ) 284 if ( $response ) { 284 285 return $response; 286 } 285 287 return $fresh_response; // Fresh error object as a last resort 286 288 … … 294 296 } else { 295 297 // Stampede protection has kicked in, AND we have no stale cached value to display. That's bad - possibly indicates cache exhaustion 296 if ( false === $response ) 298 if ( false === $response ) { 297 299 trigger_error( 'Plugin directory search: no cached results available during stampede.', E_USER_WARNING ); 300 } 298 301 } 299 302 … … 301 304 } 302 305 303 //TODO: add secured search for posts/comments 304 305 ///////////////////////////////////////////////////////// 306 // TODO: add secured search for posts/comments 307 // 306 308 // Insert the ES results into the Loop when searching 307 309 // 308 309 310 public function filter__post_limits_request( $limits, $query ) { 310 if ( ! $query->is_search() ) 311 if ( ! $query->is_search() ) { 311 312 return $limits; 313 } 312 314 313 315 if ( empty( $limits ) || $query->get( 'no_found_rows' ) ) { … … 321 323 322 324 public function filter__the_posts( $posts, $query ) { 323 if ( ! $query->is_main_query() || ! $query->is_search() ) 325 if ( ! $query->is_main_query() || ! $query->is_search() ) { 324 326 return $posts; 327 } 325 328 326 329 if ( ! is_array( $this->search_result ) || ! isset( $this->search_result['results'] ) ) { … … 340 343 $post = new stdClass(); 341 344 342 $post->ID 343 $post->blog_id 345 $post->ID = $result['fields']['post_id']; 346 $post->blog_id = $result['fields']['blog_id']; 344 347 345 348 // Run through get_post() to add all expected properties (even if they're empty) 346 349 $post = get_post( $post ); 347 350 348 if ( $post ) 351 if ( $post ) { 349 352 $posts[] = $post; 353 } 350 354 } 351 355 … … 359 363 global $wpdb; 360 364 361 if ( ! $query->is_main_query() || ! $query->is_search() ) 365 if ( ! $query->is_main_query() || ! $query->is_search() ) { 362 366 return $sql; 363 364 $page = ( $query->get( 'paged' ) ) ? absint( $query->get( 'paged' ) ) : 1; 367 } 368 369 $page = ( $query->get( 'paged' ) ) ? absint( $query->get( 'paged' ) ) : 1; 365 370 $posts_per_page = $query->get( 'posts_per_page' ); 366 371 367 372 // ES API does not allow more than 15 results at a time 368 if ( $posts_per_page > 200 ) 373 if ( $posts_per_page > 200 ) { 369 374 $posts_per_page = 200; 375 } 370 376 371 377 // ES API does not allow fetching past the 10,000th post 372 378 $page = min( $page, floor( 9999 / $posts_per_page ) ); 373 379 374 $date_cutoff = strftime( '%Y-%m-%d', strtotime( '-8 years' ) );375 $date_today = strftime( '%Y-%m-%d' );376 $version_cutoff = ( defined( 'WP_CORE_STABLE_BRANCH') ? sprintf( '%0.1f', WP_CORE_STABLE_BRANCH - 0.5) : '4.0' );380 $date_cutoff = strftime( '%Y-%m-%d', strtotime( '-8 years' ) ); 381 $date_today = strftime( '%Y-%m-%d' ); 382 $version_cutoff = ( defined( 'WP_CORE_STABLE_BRANCH' ) ? sprintf( '%0.1f', WP_CORE_STABLE_BRANCH - 0.5 ) : '4.0' ); 377 383 378 384 // Start building the WP-style search query args … … 385 391 'order' => $query->get( 'order' ), 386 392 // plugin directory specific: 387 #'date_range'=> array( 'field' => 'modified', 'gte' => $date_cutoff ),388 #'tested_range'=> array( 'field' => 'tested', 'gte' => $version_cutoff ),389 'filters' 393 // 'date_range' => array( 'field' => 'modified', 'gte' => $date_cutoff ), 394 // 'tested_range' => array( 'field' => 'tested', 'gte' => $version_cutoff ), 395 'filters' => array( 390 396 array( 'term' => array( 'disabled' => array( 'value' => false ) ) ), 391 397 ), … … 400 406 401 407 $es_wp_query_args['locale'] = get_locale(); 402 408 403 409 // You can use this filter to modify the search query parameters, such as controlling the post_type. 404 410 // These arguments are in the format for convert_wp_es_to_es_args(), i.e. WP-style. … … 408 414 $es_query_args = $this->convert_wp_es_to_es_args( $es_wp_query_args ); 409 415 410 // Only trust ES to give us IDs, not the content since it is a mirror416 // Only trust ES to give us IDs, not the content since it is a mirror 411 417 $es_query_args['fields'] = array( 412 418 'slug', … … 422 428 'plugin_modified', 423 429 'post_id', 424 'blog_id' 430 'blog_id', 425 431 ); 426 432 427 433 // This filter is harder to use if you're unfamiliar with ES but it allows complete control over the query 428 $es_query_args = apply_filters( 'jetpack_search_es_query_args', $es_query_args, $query );434 $es_query_args = apply_filters( 'jetpack_search_es_query_args', $es_query_args, $query ); 429 435 $this->search_query = $es_query_args; 430 436 … … 447 453 448 454 public function filter__found_posts_query( $sql, $query ) { 449 if ( ! $query->is_main_query() || ! $query->is_search() ) 455 if ( ! $query->is_main_query() || ! $query->is_search() ) { 450 456 return $sql; 457 } 451 458 452 459 return ''; … … 454 461 455 462 public function filter__found_posts( $found_posts, $query ) { 456 if ( ! $query->is_main_query() || ! $query->is_search() ) 463 if ( ! $query->is_main_query() || ! $query->is_search() ) { 457 464 return $found_posts; 465 } 458 466 459 467 return $this->found_posts; … … 462 470 public function action__pre_get_posts( $query ) { 463 471 // Treat an API request for the recommended tab as a search, even though there is no search string in the query 464 if ( defined( 'WPORG_IS_API' ) && WPORG_IS_API && isset( $query->query['browse'] ) && $query->query['browse'] === 'recommended' ) 472 if ( defined( 'WPORG_IS_API' ) && WPORG_IS_API && isset( $query->query['browse'] ) && $query->query['browse'] === 'recommended' ) { 465 473 $query->is_search = true; 466 467 if ( ! $query->is_main_query() || ! $query->is_search() ) 474 } 475 476 if ( ! $query->is_main_query() || ! $query->is_search() ) { 468 477 return; 478 } 469 479 470 480 $query->set( 'cache_results', false ); … … 490 500 491 501 // Restore the original blog, if we're not on it 492 if ( get_current_blog_id() !== $this->original_blog_id ) 502 if ( get_current_blog_id() !== $this->original_blog_id ) { 493 503 switch_to_blog( $this->original_blog_id ); 504 } 494 505 } 495 506 … … 499 510 $post = $this->get_post_by_index( $wp_query->current_post ); 500 511 501 if ( ! $post ) 512 if ( ! $post ) { 502 513 return; 514 } 503 515 504 516 // Do some additional setup that normally happens in setup_postdata(), but gets skipped 505 517 // in this plugin because the posts hadn't yet been inflated. 506 $authordata 507 508 $currentday = mysql2date('d.m.y', $post->post_date, false);509 $currentmonth = mysql2date('m', $post->post_date, false);510 511 $numpages = 1;518 $authordata = get_userdata( $post->post_author ); 519 520 $currentday = mysql2date( 'd.m.y', $post->post_date, false ); 521 $currentmonth = mysql2date( 'm', $post->post_date, false ); 522 523 $numpages = 1; 512 524 $multipage = 0; 513 $page = get_query_var('page');514 if ( ! $page ) 525 $page = get_query_var( 'page' ); 526 if ( ! $page ) { 515 527 $page = 1; 516 if ( is_single() || is_page() || is_feed() ) 528 } 529 if ( is_single() || is_page() || is_feed() ) { 517 530 $more = 1; 531 } 518 532 $content = $post->post_content; 519 533 if ( false !== strpos( $content, '<!--nextpage-->' ) ) { 520 if ( $page > 1 ) 534 if ( $page > 1 ) { 521 535 $more = 1; 536 } 522 537 $content = str_replace( "\n<!--nextpage-->\n", '<!--nextpage-->', $content ); 523 538 $content = str_replace( "\n<!--nextpage-->", '<!--nextpage-->', $content ); 524 539 $content = str_replace( "<!--nextpage-->\n", '<!--nextpage-->', $content ); 525 540 // Ignore nextpage at the beginning of the content. 526 if ( 0 === strpos( $content, '<!--nextpage-->' ) ) 541 if ( 0 === strpos( $content, '<!--nextpage-->' ) ) { 527 542 $content = substr( $content, 15 ); 528 $pages = explode('<!--nextpage-->', $content); 529 $numpages = count($pages); 530 if ( $numpages > 1 ) 543 } 544 $pages = explode( '<!--nextpage-->', $content ); 545 $numpages = count( $pages ); 546 if ( $numpages > 1 ) { 531 547 $multipage = 1; 548 } 532 549 } else { 533 550 $pages = array( $post->post_content ); … … 543 560 544 561 public function get_search_result( $raw = false ) { 545 if ( $raw ) 562 if ( $raw ) { 546 563 return $this->search_result; 564 } 547 565 548 566 return ( ! empty( $this->search_result ) && ! is_wp_error( $this->search_result ) && is_array( $this->search_result ) && ! empty( $this->search_result['results'] ) ) ? $this->search_result['results'] : false; … … 553 571 } 554 572 555 // ///////////////////////////////////////////////573 // 556 574 // Standard Filters Applied to the search query 557 575 // 558 559 576 public function filter__add_date_filter_to_query( $es_wp_query_args, $query ) { 560 577 if ( $query->get( 'year' ) ) { … … 580 597 } 581 598 582 $es_wp_query_args['date_range'] = array( 'field' => 'date', 'gte' => $date_start, 'lte' => $date_end ); 599 $es_wp_query_args['date_range'] = array( 600 'field' => 'date', 601 'gte' => $date_start, 602 'lte' => $date_end, 603 ); 583 604 } 584 605 … … 586 607 } 587 608 588 // ///////////////////////////////////////////////609 // 589 610 // Helpers for manipulating queries 590 611 // 591 592 612 // Someday: Should we just use ES_WP_Query??? 593 594 613 // Converts WP-style args to ES args 595 614 function convert_wp_es_to_es_args( $args ) { … … 608 627 609 628 'date_range' => null, // array( 'field' => 'date', 'gt' => 'YYYY-MM-dd', 'lte' => 'YYYY-MM-dd' ); date formats: 'YYYY-MM-dd' or 'YYYY-MM-dd HH:MM:SS' 610 'tested_range' 611 'filters' 629 'tested_range' => null, 630 'filters' => array(), 612 631 613 632 'orderby' => null, // Defaults to 'relevance' if query is set, otherwise 'date'. Pass an array for multiple orders. … … 637 656 ); 638 657 639 //TODO: limit size to 15 640 658 // TODO: limit size to 15 641 659 // ES "from" arg (offset) 642 660 if ( $args['offset'] ) { … … 646 664 } 647 665 648 if ( ! is_array( $args['author_name'] ) ) {666 if ( ! is_array( $args['author_name'] ) ) { 649 667 $args['author_name'] = array( $args['author_name'] ); 650 668 } … … 652 670 // ES stores usernames, not IDs, so transform 653 671 if ( ! empty( $args['author'] ) ) { 654 if ( ! is_array( $args['author'] ) )672 if ( ! is_array( $args['author'] ) ) { 655 673 $args['author'] = array( $args['author'] ); 674 } 656 675 foreach ( $args['author'] as $author ) { 657 676 $user = get_user_by( 'id', $author ); … … 663 682 } 664 683 665 // ////////////////////////////////////////////////684 // 666 685 // Build the filters from the query elements. 667 686 // Filters rock because they are cached from one query to the next … … 671 690 672 691 if ( $args['post_type'] ) { 673 if ( ! is_array( $args['post_type'] ) )692 if ( ! is_array( $args['post_type'] ) ) { 674 693 $args['post_type'] = array( $args['post_type'] ); 694 } 675 695 $filters[] = array( 'terms' => array( 'post_type' => $args['post_type'] ) ); 676 696 } … … 680 700 } 681 701 682 if ( ! empty( $args['date_range'] ) && isset( $args['date_range']['field'] ) ) {702 if ( ! empty( $args['date_range'] ) && isset( $args['date_range']['field'] ) ) { 683 703 $field = $args['date_range']['field']; 684 704 unset( $args['date_range']['field'] ); … … 686 706 } 687 707 688 if ( ! empty( $args['tested_range'] ) && isset( $args['tested_range']['field'] ) ) {708 if ( ! empty( $args['tested_range'] ) && isset( $args['tested_range']['field'] ) ) { 689 709 $field = $args['tested_range']['field']; 690 710 unset( $args['tested_range']['field'] ); … … 721 741 $locale = $args['locale']; 722 742 723 // Because most plugins don't have any translations we need to743 // Because most plugins don't have any translations we need to 724 744 // correct for the very low scores that locale-specific fields. 725 745 // end up getting. This is caused by the average field length being … … 729 749 // As of 2017-01-23 it looked like we were off by about 10,000x, 730 750 // so rather than 0.1 we use a much smaller multiplier of en content 731 $en_boost = 0.00001;732 $matching_fields = array(751 $en_boost = 0.00001; 752 $matching_fields = array( 733 753 'all_content_' . $locale, 734 'all_content_en^' . $en_boost 735 ); 736 $boost_phrase_fields = array(754 'all_content_en^' . $en_boost, 755 ); 756 $boost_phrase_fields = array( 737 757 'title_' . $locale, 738 758 'excerpt_' . $locale, … … 743 763 'taxonomy.plugin_tags.name', 744 764 ); 745 $boost_ngram_fields = array(765 $boost_ngram_fields = array( 746 766 'title_' . $locale . '.ngram', 747 'title_en.ngram^' . $en_boost 748 ); 749 $boost_title_fields = array(767 'title_en.ngram^' . $en_boost, 768 ); 769 $boost_title_fields = array( 750 770 'title_' . $locale, 751 771 'title_en^' . $en_boost, … … 760 780 ); 761 781 } else { 762 $matching_fields = array(763 'all_content_en' 764 ); 765 $boost_phrase_fields = array(782 $matching_fields = array( 783 'all_content_en', 784 ); 785 $boost_phrase_fields = array( 766 786 'title_en', 767 787 'excerpt_en', … … 769 789 'taxonomy.plugin_tags.name', 770 790 ); 771 $boost_ngram_fields = array(772 'title_en.ngram' 773 ); 774 $boost_title_fields = array(791 $boost_ngram_fields = array( 792 'title_en.ngram', 793 ); 794 $boost_title_fields = array( 775 795 'title_en', 776 796 'slug_text', … … 782 802 ); 783 803 } 784 785 // /////////////////////////////////////////////////////////804 805 // 786 806 // Build the query - potentially extracting more filters 787 // 788 // 789 // 807 // TODO: add auto phrase searching 808 // TODO: add fuzzy searching to correct for spelling mistakes 809 // TODO: boost title, tag, and category matches 790 810 if ( $args['query'] ) { 791 811 $analyzer = Jetpack_Search::get_analyzer_name( $this->blog_lang ); 792 $query = array(812 $query = array( 793 813 'bool' => array( 794 'must' => array(814 'must' => array( 795 815 'multi_match' => array( 796 'query' => $args['query'],797 'fields' => $matching_fields,798 'boost' => 0.1,816 'query' => $args['query'], 817 'fields' => $matching_fields, 818 'boost' => 0.1, 799 819 'operator' => 'and', 800 820 ), … … 805 825 'query' => $args['query'], 806 826 'fields' => $boost_phrase_fields, 807 'type' => 'phrase',808 'boost' => 2827 'type' => 'phrase', 828 'boost' => 2, 809 829 ), 810 830 ), … … 813 833 'query' => $args['query'], 814 834 'fields' => $boost_ngram_fields, 815 'type' => 'phrase',816 'boost' => 0.2835 'type' => 'phrase', 836 'boost' => 0.2, 817 837 ), 818 838 ), … … 821 841 'query' => $args['query'], 822 842 'fields' => $boost_title_fields, 823 'type' => 'best_fields',824 'boost' => 2843 'type' => 'best_fields', 844 'boost' => 2, 825 845 ), 826 846 ), … … 829 849 'query' => $args['query'], 830 850 'fields' => $boost_content_fields, 831 'type' => 'best_fields',832 'boost' => 2851 'type' => 'best_fields', 852 'boost' => 2, 833 853 ), 834 854 ), … … 840 860 'contributors', 841 861 ), 842 'type' => 'best_fields',843 'boost' => 2862 'type' => 'best_fields', 863 'boost' => 2, 844 864 ), 845 865 ), … … 854 874 } 855 875 } else { 856 $query = array( 'match_all' => array() );876 $query = array( 'match_all' => array() ); 857 877 $es_query_args['query'] = Jetpack_Search::score_query_by_recency( $query ); 858 878 if ( ! $args['orderby'] ) { … … 877 897 // todo: add support for sorting by title, num likes, num comments, num views, etc 878 898 switch ( $orderby ) { 879 case 'relevance' 880 // never order by score ascending899 case 'relevance': 900 // never order by score ascending 881 901 $es_query_args['sort'][] = array( '_score' => array( 'order' => 'desc' ) ); 882 902 break; 883 case 'date' 903 case 'date': 884 904 $es_query_args['sort'][] = array( 'date' => array( 'order' => $args['order'] ) ); 885 905 break; 886 case 'ID' 906 case 'ID': 887 907 $es_query_args['sort'][] = array( 'id' => array( 'order' => $args['order'] ) ); 888 908 break; 889 case 'author' 909 case 'author': 890 910 $es_query_args['sort'][] = array( 'author.raw' => array( 'order' => $args['order'] ) ); 891 911 break; 892 912 } 893 913 } 894 if ( empty( $es_query_args['sort'] ) ) 914 if ( empty( $es_query_args['sort'] ) ) { 895 915 unset( $es_query_args['sort'] ); 896 916 } 897 917 898 918 if ( ! empty( $filters ) ) { … … 911 931 } else { 912 932 $split_lang = explode( '-', $lang_code ); 913 if ( in_array( $split_lang[0], Jetpack_Search::$analyzed_langs ) ) 933 if ( in_array( $split_lang[0], Jetpack_Search::$analyzed_langs ) ) { 914 934 $analyzer = $split_lang[0] . '_analyzer'; 935 } 915 936 } 916 937 return $analyzer; 917 938 } 918 939 919 // //////////////////////////////////////////940 // 920 941 // ES Filter Manipulation 921 922 942 /* 923 943 * And an existing filter object with a list of additional filters. … … 925 945 */ 926 946 public static function and_es_filters( $curr_filter, $filters ) { 927 if ( ! is_array( $curr_filter ) || isset( $curr_filter['match_all'] ) ) {928 if ( 1 == count( $filters ) ) 947 if ( ! is_array( $curr_filter ) || isset( $curr_filter['match_all'] ) ) { 948 if ( 1 == count( $filters ) ) { 929 949 return $filters[0]; 950 } 930 951 931 952 return array( 'and' => $filters ); … … 935 956 } 936 957 937 // //////////////////////////////////////////958 // 938 959 // ES Query Manipulation 939 940 960 public static function score_query_by_recency( $query ) { 941 // Newer content gets weighted slightly higher942 $date_scale = '360d';961 // Newer content gets weighted slightly higher 962 $date_scale = '360d'; 943 963 $date_offset = '180d'; 944 $date_decay = 0.5;964 $date_decay = 0.5; 945 965 $date_origin = date( 'Y-m-d' ); 946 966 … … 949 969 'query' => array( 950 970 'function_score' => array( 951 'query' => $query,952 'functions' => array(971 'query' => $query, 972 'functions' => array( 953 973 array( 954 'exp' => array(974 'exp' => array( 955 975 'plugin_modified' => array( 956 976 'origin' => $date_origin, 957 977 'offset' => $date_offset, 958 'scale' => $date_scale,959 'decay' => $date_decay,978 'scale' => $date_scale, 979 'decay' => $date_decay, 960 980 ), 961 981 ), 962 982 ), 963 array( 983 array( 964 984 'exp' => array( 965 985 'tested' => array( 966 986 'origin' => sprintf( '%0.1f', WP_CORE_STABLE_BRANCH ), 967 987 'offset' => 0.1, 968 'scale' => 0.4,969 'decay' => 0.6,988 'scale' => 0.4, 989 'decay' => 0.6, 970 990 ), 971 991 ), … … 973 993 array( 974 994 'field_value_factor' => array( 975 'field' => 'active_installs',976 'factor' => 0.375,995 'field' => 'active_installs', 996 'factor' => 0.375, 977 997 'modifier' => 'log2p', 978 'missing' => 1,998 'missing' => 1, 979 999 ), 980 1000 ), 981 1001 array( 982 // there aren't that many plugins with more than 1 million (6 total)1002 // there aren't that many plugins with more than 1 million (6 total) 983 1003 // we don't need to differentiate them as much 984 1004 'filter' => array( 985 'range' => array( 'active_installs' => array( 986 'lte' => 1000000 987 ) ) 1005 'range' => array( 1006 'active_installs' => array( 1007 'lte' => 1000000, 1008 ), 1009 ), 988 1010 ), 989 'exp' => array(1011 'exp' => array( 990 1012 'active_installs' => array( 991 1013 'origin' => 1000000, 992 1014 'offset' => 0, 993 'scale' => 900000,994 'decay' => 0.75,1015 'scale' => 900000, 1016 'decay' => 0.75, 995 1017 ), 996 1018 ), … … 998 1020 array( 999 1021 'field_value_factor' => array( 1000 'field' => 'support_threads_resolved',1001 'factor' => 0.25,1022 'field' => 'support_threads_resolved', 1023 'factor' => 0.25, 1002 1024 'modifier' => 'log2p', 1003 'missing' => 0.5,1025 'missing' => 0.5, 1004 1026 ), 1005 1027 ), 1006 1028 array( 1007 1029 'field_value_factor' => array( 1008 'field' => 'rating',1009 'factor' => 0.25,1030 'field' => 'rating', 1031 'factor' => 0.25, 1010 1032 'modifier' => 'sqrt', 1011 'missing' => 2.5,1033 'missing' => 2.5, 1012 1034 ), 1013 1035 ), 1014 1036 ), 1015 1037 'boost_mode' => 'multiply', 1016 ) 1038 ), 1017 1039 ), 1018 ) 1040 ), 1019 1041 ); 1020 1042 }
Note: See TracChangeset
for help on using the changeset viewer.