Ticket #2823: 2823.diff
File 2823.diff, 33.2 KB (added by , 6 years ago) |
---|
-
1.0/index.php
function send_response( $response, $ttl 147 147 148 148 echo wp_json_encode( $response ); 149 149 } 150 150 151 151 /** 152 152 * Guess the location based on a city inside the given input 153 153 * 154 154 * @param string $location_name 155 155 * @param string $timezone 156 156 * @param string $country_code 157 157 * 158 158 * @return false|object false on failure; an object on success 159 159 */ 160 160 function guess_location_from_city( $location_name, $timezone, $country_code ) { 161 161 $guess = guess_location_from_geonames( $location_name, $timezone, $country_code ); 162 $location_word_count = str_word_count( $location_name ); 163 $location_name_parts = explode( ' ', $location_name ); 162 if ( $guess ) { 163 return $guess; 164 } 164 165 165 166 /* 166 167 * Multi-word queries may contain cities, regions, and countries, so try to extract just the city 167 168 * 168 169 * This won't work for most ideographic languages, because they don't use the space character as a word 169 * delimiter. That's ok, though, because `guess_ideographic_location_from_geonames()` should cover those 170 * cases. 170 * delimiter. 171 171 */ 172 $location_name_parts = preg_split( '/\s+/u', $location_name ); 173 $location_word_count = count( $location_name_parts ); 174 172 175 if ( ! $guess && $location_word_count >= 2 ) { 173 176 // Catch input like "Portland Maine" 174 $guess = guess_location_from_geonames( $location_name_parts[0], $timezone, $country_code );177 $guess = guess_location_from_geonames( $location_name_parts[0], $timezone, $country_code, $wildcard = false ); 175 178 } 176 179 177 180 if ( ! $guess && $location_word_count >= 3 ) { 178 181 // Catch input like "Sao Paulo Brazil" 179 182 $city_name = sprintf( '%s %s', $location_name_parts[0], $location_name_parts[1] ); 180 $guess = guess_location_from_geonames( $city_name, $timezone, $country_code ); 181 } 182 183 // Normalize all errors to boolean false for consistency 184 if ( empty ( $guess ) ) { 185 $guess = false; 183 $guess = guess_location_from_geonames( $city_name, $timezone, $country_code, $wildcard = false ); 186 184 } 187 185 188 186 return $guess; 189 187 } 190 188 191 189 /** 192 190 * Look for the given location in the Geonames database 193 191 * 194 192 * @param string $location_name 195 193 * @param string $timezone 196 194 * @param string $country 197 195 * 198 196 * @return stdClass|null 199 197 */ 200 function guess_location_from_geonames( $location_name, $timezone, $country ) {198 function guess_location_from_geonames( $location_name, $timezone, $country, $wildcard = true ) { 201 199 global $wpdb; 202 200 // Look for a location that matches the name. 203 201 // The FIELD() orderings give preference to rows that match the country and/or timezone, without excluding rows that don't match. 204 202 // And we sort by population desc, assuming that the biggest matching location is the most likely one. 205 203 206 // Strip all quotes from the search query, and then enclose it in double quotes, to force an exact literal search 207 $quoted_location_name = sprintf( 208 '"%s"', 209 strtr( $location_name, [ '"' => '', "'" => '' ] ) 210 ); 211 204 // Exact match 212 205 $row = $wpdb->get_row( $wpdb->prepare( " 213 206 SELECT name, latitude, longitude, country 214 FROM geoname 207 FROM geoname_summary 215 208 WHERE 216 MATCH( name, asciiname, alternatenames ) 217 AGAINST( %s IN BOOLEAN MODE ) 209 name = %s 218 210 ORDER BY 219 211 FIELD( %s, country ) DESC, 220 212 FIELD( %s, timezone ) DESC, 221 213 population DESC 222 214 LIMIT 1", 223 $ quoted_location_name,215 $location_name, 224 216 $country, 225 217 $timezone 226 218 ) ); 227 219 228 if ( ! is_a( $row, 'stdClass' ) && 'ASCII' !== mb_detect_encoding( $location_name ) ) { 229 $row = guess_location_from_geonames_fallback( $location_name, $country, $timezone, 'exact', 'ideographic' ); 230 } 220 // Wildcard match 221 if ( ! $row && $wildcard && 'ASCII' !== mb_detect_encoding( $location_name ) ) { 222 $row = $wpdb->get_row( $wpdb->prepare( " 223 SELECT name, latitude, longitude, country 224 FROM geoname_summary 225 WHERE 226 name LIKE %s 227 ORDER BY 228 FIELD( %s, country ) DESC, 229 FIELD( %s, timezone ) DESC, 230 population DESC 231 LIMIT 1", 232 $location_name . '%', 233 $country, 234 $timezone 235 ) ); 236 } 237 238 // Suffix the "State", good in some countries (western countries) horrible in others (where geonames data is not as complete, or region names are similar (but not quite the same) to city names) 239 // LEFT JOIN admin1codes ac ON gs.statecode = ac.code 240 // if ( $row->state && $row->state != $row->name && $row->name NOT CONTAINED WITHIN $row->state? ) { 241 // $row->name .= ', ' . $row->state; 242 // } 231 243 232 244 return $row; 233 245 } 234 246 235 247 /** 236 * Look for the given location in the Geonames database using a LIKE query237 *238 * This is a fallback for situations where the full-text search in `guess_location_from_geonames()` resulted239 * in a false-negative.240 *241 * One situation where this happens is with queries in ideographic languages, because MySQL < 5.7.6 doesn't242 * support full-text searches for them, because it can't determine where the word boundaries are.243 * See https://dev.mysql.com/doc/refman/5.7/en/fulltext-restrictions.html244 *245 * There are also edge cases where the exact query doesn't exist in the database, but a loose LIKE query will find246 * a similar alternate, like `Osakashi`.247 *248 * @param string $location_name249 * @param string $country250 * @param string $timezone251 * @param string $mode 'exact' to only return exact matches from the database;252 * 'loose' to return any match. This has a high chance of false positives.253 * @param string $restrict_counties 'ideographic' to only search in countries where ideographic languages are common;254 * 'none' to search all countries255 *256 * @return stdClass|null257 */258 function guess_location_from_geonames_fallback( $location_name, $country, $timezone, $mode = 'exact', $restrict_counties = 'ideographic' ) {259 global $wpdb;260 261 $where = $ideographic_countries = $ideographic_country_placeholders = '';262 263 /*264 * The name is wrapped in commas in order to ensure that we're only matching the exact location, which is265 * delimited by commas. Otherwise, there would be false positives in situations where `$location_name`266 * appears in other rows, which happens sometimes.267 *268 * Because this will only match entries that are prefixed _and_ postfixed with a comma, it will never match the269 * first and last entries in the column. That's ok, though, because the first entry is often an airport code270 * in English, which is shorter than `ft_min_word_len` anyway. The last entry is often ideographic, so it'd be nice271 * to match it, but this is good enough for now.272 */273 $escaped_location_name = sprintf(274 'loose' === $mode ? '%%%s%%' : '%%,%s,%%',275 $wpdb->esc_like( $location_name )276 );277 278 $prepare_args = array( $escaped_location_name, $country, $timezone );279 280 if ( 'ideographic' == $restrict_counties ) {281 $ideographic_countries = get_ideographic_counties();282 $ideographic_country_placeholders = get_prepare_placeholders( count( $ideographic_countries ), '%s' );283 284 $where .= "country IN ( $ideographic_country_placeholders ) AND";285 286 $prepare_args = array_merge( $ideographic_countries, $prepare_args );287 }288 289 /*290 * REPLACE() is used because sometimes the `alternatenames` column contains entries where the `asciiname` is291 * prefixed to an ideographic name; for example: `,Karachi - كراچى,`292 *293 * If that prefix is not removed, then the LIKE query will fail in those cases, because294 * `$escaped_location_name` is wrapped in commas.295 *296 * The query is restricted to countries where ideographic languages are common, in order to avoid a full-table297 * scan.298 */299 $query = "300 SELECT name, latitude, longitude, country301 FROM `geoname`302 WHERE303 $where304 REPLACE( alternatenames, CONCAT( asciiname, ' - ' ), '' ) LIKE %s305 ORDER BY306 FIELD( %s, country ) DESC,307 FIELD( %s, timezone ) DESC,308 population DESC309 LIMIT 1";310 311 $prepared_query = $wpdb->prepare( $query, $prepare_args );312 313 return $wpdb->get_row( $prepared_query );314 }315 316 /**317 * Get an array of countries where ideographic languages are common318 *319 * Derived from https://en.wikipedia.org/wiki/List_of_writing_systems#List_of_writing_scripts_by_adoption320 *321 * @todo Some of these individual countries may be able to be removed, to further narrow the rows that need to be322 * scanned by `guess_ideographic_location_from_geonames()`. Some of the entire categories could possibly be323 * removed too, but let's err on the side of caution for now.324 */325 function get_ideographic_counties() {326 $middle_east = array( 'AE', 'BH', 'CY', 'EG', 'IL', 'IR', 'IQ', 'JO', 'KW', 'LB', 'OM', 'PS', 'QA', 'SA', 'SY', 'TR', 'YE' );327 $north_africa = array( 'DZ', 'EH', 'EG', 'LY', 'MA', 'SD', 'SS', 'TN' );328 329 $abjad_countries = array_merge( $middle_east, $north_africa, array( 'CN', 'IL', 'IN', 'MY', 'PK' ) );330 $abugida_countries = array( 'BD', 'BT', 'ER', 'ET', 'ID', 'IN', 'KH', 'LA', 'LK', 'MV', 'MY', 'MU', 'MM', 'NP', 'PK', 'SG', 'TH' );331 $logographic_countries = array( 'CN', 'JP', 'KR', 'MY', 'SG');332 333 $all_ideographic_countries = array_merge( $abjad_countries, $abugida_countries, $logographic_countries );334 335 return array_unique( $all_ideographic_countries );336 }337 338 /**339 * Build a string of placeholders to pass to `WPDB::prepare()`340 *341 * Sometimes it's convenient to be able to generate placeholders for `prepare()` dynamically. For example, when342 * looping through a multi-dimensional array where the sub-arrays have distinct counts; or when the total343 * number of items is too large to conveniently count by hand.344 *345 * See https://iandunn.name/2016/03/31/generating-dynamic-placeholders-for-wpdb-prepare/346 *347 * @param int $number The number of placeholders needed348 * @param string $format An sprintf()-like format accepted by WPDB::prepare()349 *350 * @return string351 */352 function get_prepare_placeholders( $number, $format ) {353 return implode( ', ', array_fill( 0, $number, $format ) );354 }355 356 /**357 248 * Determine a location for the given IPv4 address 358 249 * 359 250 * NOTE: The location that is found here cannot be returned to the client. 360 251 * See `rebuild_location_from_geonames()`. 361 252 * 362 253 * @todo - Add support for IPv6 addresses. Otherwise, this will quickly lose effectiveness. As of March 2017, IPv6 363 254 * adoption is at 16% globally and rising relatively fast. Some countries are as high as 30%. 364 255 * See https://www.google.com/intl/en/ipv6/statistics.html#tab=ipv6-adoption for current stats. 365 256 * 366 257 * @todo - Core sends anonymized IPs like `2a03:2880:2110:df07::`, so make sure those work when implementing IPv6 367 258 * 368 259 * @param string $dotted_ip 369 260 * 370 261 * @return null|object `null` on failure; an object on success 371 262 */ 372 263 function guess_location_from_ip( $dotted_ip ) { 373 264 global $wpdb; 374 265 375 266 $long_ip = ip2long( $dotted_ip ); 376 if ( $long_ip === false ) 377 return; 267 if ( $long_ip === false ) { 268 return false; 269 } 378 270 379 271 $row = $wpdb->get_row( $wpdb->prepare( " 380 272 SELECT ip_city, ip_latitude, ip_longitude, country_short 381 273 FROM ip2location 382 274 WHERE ip_to >= %d 383 275 ORDER BY ip_to ASC 384 276 LIMIT 1", 385 277 $long_ip 386 278 ) ); 387 279 388 280 // Unknown location: 389 281 if ( ! $row || '-' == $row->country_short ) { 390 return ;282 return false; 391 283 } 392 284 393 285 return $row; 394 286 } 395 287 396 288 /** 397 289 * Rebuild the location given to the client from the event source data 398 290 * 399 291 * We cannot publicly expose location data that we retrieve from the `ip2location` database, because that would 400 292 * violate their licensing terms. We can only use the information internally, for the purposes of completing the 401 293 * program's business logic (determining nearby events). 402 294 * 403 295 * Once we have nearby events, though, we can take advantage of the data that's available in the `wporg_events` table. 404 296 * That table contains the locations details for the event's venue, which was sourced from the respective APIs 405 297 * (WordCamp.org, Meetup.com, etc). We can return the venue's location data without violating any terms. … … function get_location( $args = array() ) 447 339 if ( isset( $args['country'] ) ) { 448 340 $location = array( 449 341 'country' => $args['country'], 450 342 ); 451 343 } 452 344 453 345 $country_code = get_country_code_from_locale( $args['locale'] ?? '' ); 454 346 455 347 // Coordinates provided 456 348 if ( 457 349 ! $location && ( 458 350 ! empty( $args['latitude'] ) && is_numeric( $args['latitude'] ) && 459 351 ! empty( $args['longitude'] ) && is_numeric( $args['longitude'] ) 460 352 ) 461 353 ) { 462 $city = get_city_from_coordinates( $args['latitude'], $args['longitude'] );463 464 354 $location = array( 465 'description' => $city ? $city : "{$args['latitude']}, {$args['longitude']}",355 'description' => false, 466 356 'latitude' => $args['latitude'], 467 357 'longitude' => $args['longitude'] 468 358 ); 469 359 } 470 360 471 361 // City was provided by the user: 472 362 if ( ! $location && isset( $args['location_name'] ) ) { 473 363 $guess = guess_location_from_city( $args['location_name'], $args['timezone'] ?? '', $country_code ); 474 364 475 365 if ( $guess ) { 476 366 $location = array( 477 367 'description' => $guess->name, 478 368 'latitude' => $guess->latitude, 479 369 'longitude' => $guess->longitude, 480 370 'country' => $guess->country, 481 371 ); 482 372 } else { 483 373 $guess = guess_location_from_country( $args['location_name'] ); 484 374 485 if ( ! $location &&$guess ) {375 if ( $guess ) { 486 376 $location = array( 487 377 'country' => $guess['country_short'], 488 378 'description' => $guess['country_long'], 489 379 ); 490 380 } 491 381 } 492 382 } 493 383 494 /*495 * If all else fails, cast a wide net and try to find something before giving up, even496 * if the chance of success if lower than normal. Returning false is guaranteed failure, so this improves things497 * even if it only works 10% of the time.498 *499 * This must be done as the very last thing before giving up, because the likelihood of false positives is high.500 */501 if ( ! $location && isset( $args['location_name'] ) ) {502 if ( 'ASCII' === mb_detect_encoding( $args['location_name'] ) ) {503 $guess = guess_location_from_geonames_fallback( $args['location_name'], $country_code, $args['timezone'] ?? '', 'loose', 'none' );504 } else {505 $guess = guess_location_from_geonames_fallback( $args['location_name'], $country_code, $args['timezone'] ?? '', 'loose', 'ideographic' );506 }507 508 if ( $guess ) {509 $location = array(510 'description' => $guess->name,511 'latitude' => $guess->latitude,512 'longitude' => $guess->longitude,513 'country' => $guess->country,514 );515 }516 }517 518 384 if ( ! $location ) { 519 385 if ( isset( $args['location_name'] ) || isset( $args['ip'] ) || ! empty( $args['latitude'] ) || ! empty( $args['longitude'] ) ) { 520 386 // If any of these are specified, and no localitity was guessed based on the above checks, bail with no location. 521 387 $location = false; 522 388 } else { 523 389 // No specific location details. 524 390 $location = array(); 525 391 } 526 392 } 527 393 528 394 // IP: 529 395 if ( ! $location && isset( $args['ip'] ) && ! isset( $args['location_name'] ) ) { 530 396 $guess = guess_location_from_ip( $args['ip'] ); 531 397 532 398 if ( $guess ) { … … function get_country_code_from_locale( $ 572 438 * 573 439 * This isn't perfect because some of the country names in the database are in a format that regular 574 440 * people wouldn't type -- e.g., "Venezuela, Bolvarian Republic Of" -- but this will still match a 575 441 * majority of them. 576 442 * 577 443 * Currently, this only works with English names because that's the only data we have. 578 444 * 579 445 * @param string $location_name 580 446 * 581 447 * @return false|array false on failure; an array with country details on success 582 448 */ 583 449 function guess_location_from_country( $location_name ) { 584 450 // Check if they entered only the country name, e.g. "Germany" or "New Zealand" 585 451 $country = get_country_from_name( $location_name ); 586 452 $location_word_count = str_word_count( $location_name ); 587 $location_name_parts = explode( ' ', $location_name ); 588 $valid_country_codes = get_valid_country_codes(); 453 $location_name_parts = preg_split( '/\s+/u', $location_name ); 589 454 590 455 /* 591 456 * Multi-word queries may contain cities, regions, and countries, so try to extract just the country 592 457 */ 593 458 if ( ! $country && $location_word_count >= 2 ) { 594 459 // Catch input like "Vancouver Canada" 595 460 $country_id = $location_name_parts[ $location_word_count - 1 ]; 596 461 $country = get_country_from_name( $country_id ); 597 462 } 598 463 599 464 if ( ! $country && $location_word_count >= 3 ) { 600 465 // Catch input like "Santiago De Los Caballeros, Dominican Republic" 601 466 $country_name = sprintf( 602 467 '%s %s', 603 468 $location_name_parts[ $location_word_count - 2 ], … … function guess_location_from_country( $l 609 474 if ( ! $country && $location_word_count >= 4 ) { 610 475 // Catch input like "Kaga-Bandoro, Central African Republic" 611 476 $country_name = sprintf( 612 477 '%s %s %s', 613 478 $location_name_parts[ $location_word_count - 3 ], 614 479 $location_name_parts[ $location_word_count - 2 ], 615 480 $location_name_parts[ $location_word_count - 1 ] 616 481 ); 617 482 $country = get_country_from_name( $country_name ); 618 483 } 619 484 620 485 return $country; 621 486 } 622 487 623 488 /** 624 * Get a list of valid country codes625 *626 * @return array627 */628 function get_valid_country_codes() {629 global $wpdb;630 631 return $wpdb->get_col( "SELECT DISTINCT country FROM geoname" );632 }633 634 /**635 489 * Get the country that corresponds to the given country name 636 490 * 637 491 * @param string $country_name 638 492 * 639 493 * @return false|array false on failure; an array with country details on success 640 494 */ 641 495 function get_country_from_name( $country_name ) { 642 496 global $wpdb; 643 497 644 $country = $wpdb->get_row( $wpdb->prepare( " 645 SELECT country_short, country_long 646 FROM ip2location 647 WHERE 648 country_long = %s OR 649 country_short = %s 650 LIMIT 1", 651 $country_name, 652 $country_name 653 ), 'ARRAY_A' ); 654 655 // Convert all errors to boolean false for consistency 656 if ( empty( $country ) ) { 657 $country = false; 498 $field = 'name'; 499 if ( strlen( $country_name ) == 2 ) { 500 $field = 'country'; 658 501 } 659 502 660 return $country; 661 } 662 663 /** 664 * Get the name of the city that's closest to the given coordinates 665 * 666 * @todo - This can probably be optimized by SELECT'ing from a derived table of the closest rows, instead of the 667 * entire table, similar to the technique described at 668 * http://www.techfounder.net/2009/02/02/selecting-closest-values-in-mysql/ 669 * There's only 140k rows in the table, though, so this is performant for now. 670 * 671 * NOTE: If this causes any performance issues, it's possible that it could be removed entirely. The Core client 672 * saves the location locally, so it could display that instead of using this. However, there were some 673 * edge cases early in development that caused us to add this. I don't remember what they were, though, and 674 * didn't properly document them in r5128. So, if we ever want to attempt removing this, we'll need to test 675 * for unintended side effects. The Core client would need to be updated to display the saved location, so 676 * removing this would probably require creating a new version of the endpoint, and leaving this version for 677 * older installs. 678 * 679 * @param float $latitude 680 * @param float $longitude 681 * 682 * @return false|string 683 */ 684 function get_city_from_coordinates( $latitude, $longitude ) { 685 global $wpdb; 686 687 $results = $wpdb->get_col( $wpdb->prepare( " 503 return $wpdb->get_row( $wpdb->prepare( " 688 504 SELECT 689 name, 690 ABS( %f - latitude ) AS latitude_distance, 691 ABS( %f - longitude ) AS longitude_distance 692 FROM geoname 693 HAVING 694 latitude_distance < 0.3 AND -- 0.3 degrees is about 30 miles 695 longitude_distance < 0.3 696 ORDER by latitude_distance ASC, longitude_distance ASC 505 country as country_short, 506 name as country_long 507 FROM countrycodes 508 WHERE 509 $field = %s 697 510 LIMIT 1", 698 $latitude, 699 $longitude 700 ) ); 701 702 return isset( $results[0] ) ? $results[0] : false; 511 $country_name 512 ), 'ARRAY_A' ); 703 513 } 704 514 705 515 function get_events( $args = array() ) { 706 516 global $wpdb, $cache_life, $cache_group; 707 517 708 518 // Sort to ensure consistent cache keys. 709 519 ksort( $args ); 710 520 711 521 // number should be between 0 and 100, with a default of 10. 712 522 $args['number'] = $args['number'] ?? 10; 713 523 $args['number'] = max( 0, min( $args['number'], 100 ) ); 714 524 715 525 $cache_key = 'events:' . md5( serialize( $args ) ); 716 526 if ( false !== ( $data = wp_cache_get( $cache_key, $cache_group ) ) ) { 717 527 return $data; -
1.0/tests/test-index.php
1 1 <?php 2 2 3 3 namespace Dotorg\API\Events; 4 4 5 5 if ( 'cli' !== php_sapi_name() ) { 6 6 die(); 7 7 } 8 8 9 // For query time stats 10 define( 'SAVEQUERIES', true ); 11 9 12 /** 10 13 * Main entry point 11 14 */ 12 15 function run_tests() { 16 global $wpdb; 13 17 define( 'RUNNING_TESTS', true ); 14 18 require_once( dirname( __DIR__ ) . '/index.php' ); 15 19 16 20 $failed = 0; 17 21 $failed += test_get_location(); 18 $failed += test_get_city_from_coordinates();19 22 23 printf( 24 "\nTook %f seconds for %d queries (%f/q)", 25 $sum = array_sum( array_column( $wpdb->queries, 1 ) ), 26 $count = count( $wpdb->queries ), 27 $sum/$count 28 ); 20 29 printf( "\n\nFinished running all tests. %d failed.\n", $failed ); 30 21 31 } 22 32 23 33 /** 24 34 * Output the results of an individual test 25 35 * 26 36 * @param int $case_id 27 37 * @param bool $passed 28 38 * @param mixed $expected_result 29 39 * @param mixed $actual_result 30 40 */ 31 41 function output_results( $case_id, $passed, $expected_result, $actual_result ) { 32 42 printf( 33 43 "\n* %s: %s", 34 44 $case_id, 35 45 $passed ? 'PASSED' : '_FAILED_' … … function get_location_test_cases() { 131 141 /* 132 142 * The country name, locale, and timezone are given 133 143 */ 134 144 'country-exonym-1-word' => array( 135 145 'input' => array( 136 146 'location_name' => 'Indonesia', 137 147 'locale' => 'id_ID', 138 148 'timezone' => 'Asia/Jakarta', 139 149 ), 140 150 'expected' => array( 141 151 'country' => 'ID', 142 152 'description' => 'indonesia', 143 153 ), 144 154 ), 145 155 146 /*147 * This is matching a city inside the country before it the country searches run, but that's ok since it's148 * good enough for our use cases149 */150 156 'country-exonym-2-words' => array( 151 157 'input' => array( 152 158 'location_name' => 'Bosnia and Herzegovina', 153 159 'locale' => 'bs_BA', 154 160 'timezone' => 'Europe/Sarajevo', 155 161 ), 156 162 'expected' => array( 157 'description' => 'pale', 158 'latitude' => '43.817', 159 'longitude' => '18.569', 160 'country' => 'BA' 163 'country' => 'BA', 164 'description' => 'bosnia and herzegovina', 161 165 ), 162 166 ), 163 167 164 168 165 169 /* 166 170 * A location couldn't be found 167 171 */ 168 172 'city-invalid-private-ip' => array( 169 173 'input' => array( 170 174 'location_name' => 'Rivendell', 171 175 'ip' => '127.0.0.1' 172 176 ), 173 177 'expected' => false, 174 178 ), 175 179 … … function get_location_test_cases() { 265 269 'expected' => array( 266 270 'description' => 'sydney', 267 271 'latitude' => '-33.868', 268 272 'longitude' => '151.207', 269 273 'country' => 'AU', 270 274 ), 271 275 ), 272 276 273 277 'city-south-america' => array( 274 278 'input' => array( 275 279 'location_name' => 'Sao Paulo', 276 280 'locale' => 'pt_BR', 277 281 'timezone' => 'America/Sao_Paulo', 278 282 ), 279 283 'expected' => array( 280 'description' => 's ão paulo',284 'description' => 'sao paulo', 281 285 'latitude' => '-23.548', 282 286 'longitude' => '-46.636', 283 287 'country' => 'BR', 284 288 ), 285 289 ), 286 290 287 291 // Users will often type them without the dash, bypassing an exact match 288 292 'city-with-dashes-in-formal-name' => array( 289 293 'input' => array( 290 294 'location_name' => 'Osakashi', 291 295 'locale' => 'ja', 292 296 'timezone' => 'Asia/Tokyo', 293 297 ), 294 298 'expected' => array( 295 'description' => 'osaka ',299 'description' => 'osakashi', 296 300 'latitude' => '34.694', 297 301 'longitude' => '135.502', 298 302 'country' => 'JP', 299 303 ), 300 304 ), 301 305 302 306 // If a location is provided, the fallback search should be attempted before an IP search 303 307 'fallback-with-public-ip' => array( 304 308 'input' => array( 305 309 'location_name' => 'Osakashi', 306 310 'locale' => 'ja', 307 311 'timezone' => 'Asia/Tokyo', 308 312 'ip' => '153.163.68.148', // Tokyo 309 313 ), 310 314 'expected' => array( 311 'description' => 'osaka ',315 'description' => 'osakashi', 312 316 'latitude' => '34.694', 313 317 'longitude' => '135.502', 314 318 'country' => 'JP', 315 319 ), 316 320 ), 317 321 318 322 'city-with-apostrophe-in-formal-name' => array( 319 323 'input' => array( 320 324 'location_name' => "Coeur d'Alene", 321 325 'locale' => 'en_US', 322 326 'timezone' => 'America/Los_Angeles', 323 327 ), 324 328 'expected' => array( 325 329 'description' => "coeur d'alene", 326 330 'latitude' => '47.678', … … function get_location_test_cases() { 343 347 'expected' => array( 344 348 'description' => "doña ana", 345 349 'latitude' => '32.390', 346 350 'longitude' => '-106.814', 347 351 'country' => 'US', 348 352 ), 349 353 ), 350 354 351 355 'city-with-diacritics-in-formal-name-but-not-in-query' => array( 352 356 'input' => array( 353 357 'location_name' => "Dona Ana", 354 358 'locale' => 'en_US', 355 359 'timezone' => 'America/Denver', 356 360 ), 357 361 'expected' => array( 358 'description' => "do ña ana",362 'description' => "dona ana", 359 363 'latitude' => '32.390', 360 364 'longitude' => '-106.814', 361 365 'country' => 'US', 362 366 ), 363 367 ), 364 368 365 369 'city-with-period-in-query' => array( 366 370 'input' => array( 367 371 'location_name' => "St. Louis", 368 372 'locale' => 'en_US', 369 373 'timezone' => 'America/Chicago', 370 374 ), 371 375 'expected' => array( 372 376 'description' => "st. louis", 373 377 'latitude' => '38.627', 374 378 'longitude' => '-90.198', 375 379 'country' => 'US', 376 380 ), 377 381 ), 378 382 379 383 'city-with-period-in-formal-name-but-not-in-query' => array( 380 384 'input' => array( 381 385 'location_name' => "St Louis", 382 386 'locale' => 'en_US', 383 387 'timezone' => 'America/Chicago', 384 388 ), 385 389 'expected' => array( 386 'description' => "st .louis",390 'description' => "st louis", 387 391 'latitude' => '38.627', 388 392 'longitude' => '-90.198', 389 393 'country' => 'US', 390 394 ), 391 395 ), 392 396 393 397 /* 394 398 * The city endonym, locale, and timezone are given 395 399 * 396 400 * @todo 397 401 * This is currently failling. A query from PHP shows row id 2220957 has "Yaound?" instead of 398 402 * "Yaoundé", but it's correct in the database itself. 399 403 */ 400 404 'city-endonym-accents-africa' => array( 401 405 'input' => array( … … function get_location_test_cases() { 406 410 'expected' => array( 407 411 'description' => 'yaoundé', 408 412 'latitude' => '3.867', 409 413 'longitude' => '11.517', 410 414 'country' => 'CM', 411 415 ), 412 416 ), 413 417 414 418 'city-endonym-non-latin-africa' => array( 415 419 'input' => array( 416 420 'location_name' => 'አዲስ አበ', 417 421 'locale' => 'am', 418 422 'timezone' => 'Africa/Addis_Ababa', 419 423 ), 420 424 'expected' => array( 421 'description' => ' addis ababa',425 'description' => 'አዲስ አበባ', 422 426 'latitude' => '9.025', 423 427 'longitude' => '38.747', 424 428 'country' => 'ET', 425 429 ), 426 430 ), 427 431 428 432 'city-endonym-ideographic-asia1' => array( 429 433 'input' => array( 430 434 'location_name' => '白浜町宇佐崎南', 431 435 'locale' => 'ja', 432 436 'timezone' => 'Asia/Tokyo', 433 437 ), 434 438 'expected' => array( 435 'description' => ' shirahamachō-usazakiminami',439 'description' => '白浜町宇佐崎南', 436 440 'latitude' => '34.783', 437 441 'longitude' => '134.717', 438 442 'country' => 'JP', 439 443 ), 440 444 ), 441 445 442 446 'city-endonym-ideographic-asia2' => array( 443 447 'input' => array( 444 448 'location_name' => 'تهران', 445 449 'locale' => 'fa_IR', 446 450 'timezone' => 'Asia/Tehran', 447 451 ), 448 452 'expected' => array( 449 'description' => ' tehran',453 'description' => 'تهران', 450 454 'latitude' => '35.694', 451 455 'longitude' => '51.422', 452 456 'country' => 'IR', 453 457 ), 454 458 ), 455 459 456 460 'city-endonym-ideographic-asia3' => array( 457 461 'input' => array( 458 462 'location_name' => 'كراچى', 459 463 'locale' => 'ur', 460 464 'timezone' => 'Asia/Karachi', 461 465 ), 462 466 'expected' => array( 463 'description' => ' karachi',467 'description' => 'كراچى', 464 468 'latitude' => '24.906', 465 469 'longitude' => '67.082', 466 470 'country' => 'PK', 467 471 ), 468 472 ), 469 473 470 474 'city-endonym-ideographic-asia4' => array( 471 475 'input' => array( 472 476 'location_name' => '京都', 473 477 'locale' => 'ja', 474 478 'timezone' => 'Asia/Tokyo', 475 479 ), 476 480 'expected' => array( 477 'description' => ' kyoto',481 'description' => '京都', 478 482 'latitude' => '35.021', 479 483 'longitude' => '135.754', 480 484 'country' => 'JP', 481 485 ), 482 486 ), 483 487 484 488 'city-endonym-ideographic-asia5' => array( 485 489 'input' => array( 486 490 'location_name' => '東京', 487 491 'locale' => 'ja', 488 492 'timezone' => 'Asia/Tokyo', 489 493 ), 490 494 'expected' => array( 491 'description' => ' tokyo',495 'description' => '東京', 492 496 'latitude' => '35.690', 493 497 'longitude' => '139.692', 494 498 'country' => 'JP', 495 499 ), 496 500 ), 497 501 498 502 // The database only has 大阪市 ("Osaka-shi"), not 大阪 ("Osaka"), so an exact match will for 大阪 will fail 499 503 'city-endonym-ideographic-municipal-unit-asia' => array( 500 504 'input' => array( 501 505 'location_name' => '大阪', 502 506 'locale' => 'ja', 503 507 'timezone' => 'Asia/Tokyo', 504 508 ), 505 509 'expected' => array( 506 'description' => ' osaka',510 'description' => '大阪市', 507 511 'latitude' => '34.694', 508 512 'longitude' => '135.502', 509 513 'country' => 'JP', 510 514 ), 511 515 ), 512 516 513 517 'city-endonym-europe' => array( 514 518 'input' => array( 515 519 'location_name' => 'Wien', 516 520 'locale' => 'de_DE', 517 521 'timezone' => 'Europe/Berlin', 518 522 ), 519 523 'expected' => array( 520 'description' => ' vienna',524 'description' => 'wien', 521 525 'latitude' => '48.208', 522 526 'longitude' => '16.372', 523 527 'country' => 'AT', 524 528 ), 525 529 ), 526 530 527 531 'city-endonym-europe2' => array( 528 532 'input' => array( 529 533 'location_name' => 'Москва', 530 534 'locale' => 'ru_RU', 531 535 'timezone' => 'Europe/Moscow', 532 536 ), 533 537 'expected' => array( 534 'description' => ' moscow',538 'description' => 'Москва', 535 539 'latitude' => '55.752', 536 540 'longitude' => '37.616', 537 541 'country' => 'RU', 538 542 ), 539 543 ), 540 544 541 545 'city-endonym-accents-north-america' => array( 542 546 'input' => array( 543 547 'location_name' => 'Ciudad de México', 544 548 'locale' => 'en_MX', 545 549 'timezone' => 'America/Mexico_City', 546 550 ), 547 551 'expected' => array( 548 'description' => ' mexico city',552 'description' => 'ciudad de méxico', 549 553 'latitude' => '19.428', 550 554 'longitude' => '-99.128', 551 555 'country' => 'MX', 552 556 ), 553 557 ), 554 558 555 559 'city-endonym-accents-oceania' => array( 556 560 'input' => array( 557 561 'location_name' => 'Hagåtña', 558 562 'locale' => 'en_US', 559 563 'timezone' => 'Pacific/Guam', 560 564 ), 561 565 'expected' => array( 562 566 'description' => 'hagåtña', 563 567 'latitude' => '13.476', … … function get_location_test_cases() { 674 678 ), 675 679 ), 676 680 677 681 /* 678 682 * Coordinates should take precedence over IP addresses 679 683 */ 680 684 'coordinates-over-ip-us' => array( 681 685 'input' => array( 682 686 'latitude' => '47.6062100', 683 687 'longitude' => '-122.3320700', 684 688 'ip' => '192.0.70.251', // San Francisco, USA 685 689 'timezone' => 'America/Los_Angeles', 686 690 'locale' => 'en_US', 687 691 ), 688 692 'expected' => array( 689 'description' => 'seattle',693 'description' => false, 690 694 'latitude' => '47.606', 691 695 'longitude' => '-122.332', 692 696 ), 693 697 ), 694 698 695 699 'coordinates-over-ip-africa' => array( 696 700 'input' => array( 697 701 'latitude' => '-19.634233', 698 702 'longitude' => '17.331767', 699 703 'ip' => '41.190.96.5', // Tsumeb, Namibia 700 704 'timezone' => 'Africa/Windhoek', 701 705 'locale' => 'af', 702 706 ), 703 707 'expected' => array( 704 'description' => 'otavi',708 'description' => false, 705 709 'latitude' => '-19.634', 706 710 'longitude' => '17.332', 707 711 ), 708 712 ), 709 713 710 714 /* 711 715 * Only the IP is given 712 716 */ 713 717 'ip-africa' => array( 714 718 'input' => array( 'ip' => '41.191.232.22' ), 715 719 'expected' => array( 716 720 'description' => 'harare', 717 721 'latitude' => '-17.829', 718 722 'longitude' => '31.054', 719 723 'country' => 'ZW', … … function get_location_test_cases() { 768 772 'ip-south-america' => array( 769 773 'input' => array( 'ip' => '181.66.32.136' ), 770 774 'expected' => array( 771 775 'description' => 'lima', 772 776 'latitude' => '-12.043', 773 777 'longitude' => '-77.028', 774 778 'country' => 'PE', 775 779 'internal' => true, 776 780 ), 777 781 ), 778 782 ); 779 783 780 784 return $cases; 781 785 } 782 786 783 /**784 * Test `get_city_from_coordinates()`785 *786 * @todo This can probably be refactored along with test_get_location() into a more abstract/DRY general-purpose787 * test runner.788 *789 * @return bool The number of failures790 */791 function test_get_city_from_coordinates() {792 $failed = 0;793 $cases = get_city_from_coordinates_test_cases();794 795 printf( "\n\nRunning %d city from coordinate tests\n", count( $cases ) );796 797 foreach ( $cases as $case_id => $case ) {798 $case['input'] = add_cachebusting_parameter( $case['input'] );799 $actual_result = get_city_from_coordinates( $case['input']['latitude'], $case['input']['longitude'] );800 $passed = $case['expected'] === $actual_result;801 802 output_results( $case_id, $passed, $case['expected'], $actual_result );803 804 if ( ! $passed ) {805 $failed++;806 }807 }808 809 return $failed;810 }811 812 /**813 * Get the cases for testing `get_city_from_coordinates()`814 *815 * @return array816 */817 function get_city_from_coordinates_test_cases() {818 $cases = array(819 'lower-latitude-higher-longitude' => array(820 'input' => array(821 'latitude' => '60.199',822 'longitude' => '24.660'823 ),824 'expected' => 'Espoo',825 ),826 827 'higher-latitude-lower-longitude' => array(828 'input' => array(829 'latitude' => '22.000',830 'longitude' => '95.900'831 ),832 'expected' => 'Mandalay',833 ),834 835 'middle-of-no-and-where' => array(836 'input' => array(837 'latitude' => '-23.121',838 'longitude' => '125.071'839 ),840 'expected' => false,841 ),842 );843 844 return $cases;845 }846 847 787 run_tests();