- Timestamp:
- 06/06/2017 05:39:24 PM (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sites/trunk/api.wordpress.org/public_html/events/1.0/index.php
r5541 r5543 187 187 function guess_location_from_city( $location_name, $timezone, $country_code ) { 188 188 $guess = guess_location_from_geonames( $location_name, $timezone, $country_code ); 189 $location_word_count = str_word_count( $location_name ); 190 $location_name_parts = explode( ' ', $location_name ); 189 190 if ( $guess ) { 191 return $guess; 192 } 191 193 192 194 /* … … 194 196 * 195 197 * This won't work for most ideographic languages, because they don't use the space character as a word 196 * delimiter. That's ok, though, because `guess_ideographic_location_from_geonames()` should cover those 197 * cases. 198 * delimiter. 198 199 */ 200 $location_name_parts = preg_split( '/\s+/u', $location_name ); 201 $location_word_count = count( $location_name_parts ); 202 199 203 if ( ! $guess && $location_word_count >= 2 ) { 200 204 // Catch input like "Portland Maine" 201 $guess = guess_location_from_geonames( $location_name_parts[0], $timezone, $country_code );205 $guess = guess_location_from_geonames( $location_name_parts[0], $timezone, $country_code, $wildcard = false ); 202 206 } 203 207 … … 205 209 // Catch input like "Sao Paulo Brazil" 206 210 $city_name = sprintf( '%s %s', $location_name_parts[0], $location_name_parts[1] ); 207 $guess = guess_location_from_geonames( $city_name, $timezone, $country_code ); 208 } 209 210 // Normalize all errors to boolean false for consistency 211 if ( empty ( $guess ) ) { 212 $guess = false; 211 $guess = guess_location_from_geonames( $city_name, $timezone, $country_code, $wildcard = false ); 213 212 } 214 213 … … 225 224 * @return stdClass|null 226 225 */ 227 function guess_location_from_geonames( $location_name, $timezone, $country ) {226 function guess_location_from_geonames( $location_name, $timezone, $country, $wildcard = true ) { 228 227 global $wpdb; 229 228 // Look for a location that matches the name. … … 231 230 // And we sort by population desc, assuming that the biggest matching location is the most likely one. 232 231 233 // Strip all quotes from the search query, and then enclose it in double quotes, to force an exact literal search 234 $quoted_location_name = sprintf( 235 '"%s"', 236 strtr( $location_name, [ '"' => '', "'" => '' ] ) 237 ); 238 232 // Exact match 239 233 $row = $wpdb->get_row( $wpdb->prepare( " 240 234 SELECT name, latitude, longitude, country 241 FROM geoname 242 WHERE 243 MATCH( name, asciiname, alternatenames ) 244 AGAINST( %s IN BOOLEAN MODE ) 235 FROM geoname_summary 236 WHERE name = %s 245 237 ORDER BY 246 238 FIELD( %s, country ) DESC, … … 248 240 population DESC 249 241 LIMIT 1", 250 $ quoted_location_name,242 $location_name, 251 243 $country, 252 244 $timezone 253 245 ) ); 254 246 255 if ( ! is_a( $row, 'stdClass' ) && 'ASCII' !== mb_detect_encoding( $location_name ) ) { 256 $row = guess_location_from_geonames_fallback( $location_name, $country, $timezone, 'exact', 'ideographic' ); 257 } 247 // Wildcard match 248 if ( ! $row && $wildcard && 'ASCII' !== mb_detect_encoding( $location_name ) ) { 249 $row = $wpdb->get_row( $wpdb->prepare( " 250 SELECT name, latitude, longitude, country 251 FROM geoname_summary 252 WHERE name LIKE %s 253 ORDER BY 254 FIELD( %s, country ) DESC, 255 FIELD( %s, timezone ) DESC, 256 population DESC 257 LIMIT 1", 258 $location_name . '%', 259 $country, 260 $timezone 261 ) ); 262 } 263 264 // Suffix the "State", good in some countries (western countries) horrible in others 265 // (where geonames data is not as complete, or region names are similar (but not quite the same) to city names) 266 // LEFT JOIN admin1codes ac ON gs.statecode = ac.code 267 // if ( $row->state && $row->state != $row->name && $row->name NOT CONTAINED WITHIN $row->state? ) { 268 // $row->name .= ', ' . $row->state; 269 // } 258 270 259 271 return $row; 260 272 } 261 273 262 /**263 * Look for the given location in the Geonames database using a LIKE query264 *265 * This is a fallback for situations where the full-text search in `guess_location_from_geonames()` resulted266 * in a false-negative.267 *268 * One situation where this happens is with queries in ideographic languages, because MySQL < 5.7.6 doesn't269 * support full-text searches for them, because it can't determine where the word boundaries are.270 * See https://dev.mysql.com/doc/refman/5.7/en/fulltext-restrictions.html271 *272 * There are also edge cases where the exact query doesn't exist in the database, but a loose LIKE query will find273 * a similar alternate, like `Osakashi`.274 *275 * @param string $location_name276 * @param string $country277 * @param string $timezone278 * @param string $mode 'exact' to only return exact matches from the database;279 * 'loose' to return any match. This has a high chance of false positives.280 * @param string $restrict_counties 'ideographic' to only search in countries where ideographic languages are common;281 * 'none' to search all countries282 *283 * @return stdClass|null284 */285 function guess_location_from_geonames_fallback( $location_name, $country, $timezone, $mode = 'exact', $restrict_counties = 'ideographic' ) {286 global $wpdb;287 288 $where = $ideographic_countries = $ideographic_country_placeholders = '';289 290 /*291 * The name is wrapped in commas in order to ensure that we're only matching the exact location, which is292 * delimited by commas. Otherwise, there would be false positives in situations where `$location_name`293 * appears in other rows, which happens sometimes.294 *295 * Because this will only match entries that are prefixed _and_ postfixed with a comma, it will never match the296 * first and last entries in the column. That's ok, though, because the first entry is often an airport code297 * in English, which is shorter than `ft_min_word_len` anyway. The last entry is often ideographic, so it'd be nice298 * to match it, but this is good enough for now.299 */300 $escaped_location_name = sprintf(301 'loose' === $mode ? '%%%s%%' : '%%,%s,%%',302 $wpdb->esc_like( $location_name )303 );304 305 $prepare_args = array( $escaped_location_name, $country, $timezone );306 307 if ( 'ideographic' == $restrict_counties ) {308 $ideographic_countries = get_ideographic_counties();309 $ideographic_country_placeholders = get_prepare_placeholders( count( $ideographic_countries ), '%s' );310 311 $where .= "country IN ( $ideographic_country_placeholders ) AND";312 313 $prepare_args = array_merge( $ideographic_countries, $prepare_args );314 }315 316 /*317 * REPLACE() is used because sometimes the `alternatenames` column contains entries where the `asciiname` is318 * prefixed to an ideographic name; for example: `,Karachi - كراچى,`319 *320 * If that prefix is not removed, then the LIKE query will fail in those cases, because321 * `$escaped_location_name` is wrapped in commas.322 *323 * The query is restricted to countries where ideographic languages are common, in order to avoid a full-table324 * scan.325 */326 $query = "327 SELECT name, latitude, longitude, country328 FROM `geoname`329 WHERE330 $where331 REPLACE( alternatenames, CONCAT( asciiname, ' - ' ), '' ) LIKE %s332 ORDER BY333 FIELD( %s, country ) DESC,334 FIELD( %s, timezone ) DESC,335 population DESC336 LIMIT 1";337 338 $prepared_query = $wpdb->prepare( $query, $prepare_args );339 340 return $wpdb->get_row( $prepared_query );341 }342 343 /**344 * Get an array of countries where ideographic languages are common345 *346 * Derived from https://en.wikipedia.org/wiki/List_of_writing_systems#List_of_writing_scripts_by_adoption347 *348 * @todo Some of these individual countries may be able to be removed, to further narrow the rows that need to be349 * scanned by `guess_ideographic_location_from_geonames()`. Some of the entire categories could possibly be350 * removed too, but let's err on the side of caution for now.351 */352 function get_ideographic_counties() {353 $middle_east = array( 'AE', 'BH', 'CY', 'EG', 'IL', 'IR', 'IQ', 'JO', 'KW', 'LB', 'OM', 'PS', 'QA', 'SA', 'SY', 'TR', 'YE' );354 $north_africa = array( 'DZ', 'EH', 'EG', 'LY', 'MA', 'SD', 'SS', 'TN' );355 356 $abjad_countries = array_merge( $middle_east, $north_africa, array( 'CN', 'IL', 'IN', 'MY', 'PK' ) );357 $abugida_countries = array( 'BD', 'BT', 'ER', 'ET', 'ID', 'IN', 'KH', 'LA', 'LK', 'MV', 'MY', 'MU', 'MM', 'NP', 'PK', 'SG', 'TH' );358 $logographic_countries = array( 'CN', 'JP', 'KR', 'MY', 'SG');359 360 $all_ideographic_countries = array_merge( $abjad_countries, $abugida_countries, $logographic_countries );361 362 return array_unique( $all_ideographic_countries );363 }364 365 /**366 * Build a string of placeholders to pass to `WPDB::prepare()`367 *368 * Sometimes it's convenient to be able to generate placeholders for `prepare()` dynamically. For example, when369 * looping through a multi-dimensional array where the sub-arrays have distinct counts; or when the total370 * number of items is too large to conveniently count by hand.371 *372 * See https://iandunn.name/2016/03/31/generating-dynamic-placeholders-for-wpdb-prepare/373 *374 * @param int $number The number of placeholders needed375 * @param string $format An sprintf()-like format accepted by WPDB::prepare()376 *377 * @return string378 */379 function get_prepare_placeholders( $number, $format ) {380 return implode( ', ', array_fill( 0, $number, $format ) );381 }382 274 383 275 /** … … 580 472 ); 581 473 } 582 }583 }584 585 /*586 * If all else fails, cast a wide net and try to find something before giving up, even587 * if the chance of success if lower than normal. Returning false is guaranteed failure, so this improves things588 * even if it only works 10% of the time.589 *590 * This must be done as the very last thing before giving up, because the likelihood of false positives is high.591 */592 if ( ! $location && isset( $args['location_name'] ) ) {593 if ( 'ASCII' === mb_detect_encoding( $args['location_name'] ) ) {594 $guess = guess_location_from_geonames_fallback( $args['location_name'], $country_code, $args['timezone'] ?? '', 'loose', 'none' );595 } else {596 $guess = guess_location_from_geonames_fallback( $args['location_name'], $country_code, $args['timezone'] ?? '', 'loose', 'ideographic' );597 }598 599 if ( $guess ) {600 $location = array(601 'description' => $guess->name,602 'latitude' => $guess->latitude,603 'longitude' => $guess->longitude,604 'country' => $guess->country,605 );606 474 } 607 475 }
Note: See TracChangeset
for help on using the changeset viewer.