- Timestamp:
- 04/07/2017 05:20:54 AM (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sites/trunk/api.wordpress.org/public_html/events/1.0/index.php
r5275 r5277 150 150 /* 151 151 * Multi-word queries may contain cities, regions, and countries, so try to extract just the city 152 * 153 * This won't work for most ideographic languages, because they don't use the space character as a word 154 * delimiter. That's ok, though, because `guess_ideographic_location_from_geonames()` should cover those 155 * cases. 152 156 */ 153 157 if ( ! $guess && $location_word_count >= 2 ) { … … 207 211 ) ); 208 212 213 if ( ! is_a( $row, 'stdClass' ) && 'ASCII' !== mb_detect_encoding( $location_name ) ) { 214 $row = guess_ideographic_location_from_geonames( $location_name, $country, $timezone ); 215 } 216 209 217 return $row; 218 } 219 220 /** 221 * Look for the given ideographic location in the Geonames database 222 * 223 * This is a fallback for situations where the full-text search in `guess_location_from_geonames()` resulted 224 * in a false-negative. MySQL < 5.7.6 doesn't support full-text searches on ideographic languages, because 225 * it cannot determine where the word boundaries are. 226 * 227 * See https://dev.mysql.com/doc/refman/5.7/en/fulltext-restrictions.html 228 * 229 * @param string $location_name 230 * @param string $country 231 * @param string $timezone 232 * 233 * @return stdClass|null 234 */ 235 function guess_ideographic_location_from_geonames( $location_name, $country, $timezone ) { 236 global $wpdb; 237 238 $ideographic_countries = get_ideographic_counties(); 239 $ideographic_country_placeholders = get_prepare_placeholders( count( $ideographic_countries ), '%s' ); 240 241 /* 242 * The name is wrapped in commas in order to ensure that we're only matching the exact location, which is 243 * delimited by commas. Otherwise, there would be false positives in situations where `$location_name` 244 * appears in other rows, which happens sometimes. 245 * 246 * Because this will only match entries that are prefixed _and_ postfixed with a comma, it will never match the 247 * first and last entries in the column. That's ok, though, because the first entry is always an airport code 248 * in English, which will be matched by other functions. The last entry is often ideographic, so it'd be nice 249 * to match it, but this is good enough for now. 250 */ 251 $escaped_location_name = sprintf( '%%,%s,%%', $wpdb->esc_like( $location_name ) ); 252 253 /* 254 * REPLACE() is used because sometimes the `alternatenames` column contains entries where the `asciiname` is 255 * prefixed to an ideographic name; for example: `,Karachi - كراچى,` 256 * 257 * If that prefix is not removed, then the LIKE query will fail in those cases, because 258 * `$escaped_location_name` is wrapped in commas. 259 * 260 * The query is restricted to countries where ideographic languages are common, in order to avoid a full-table 261 * scan. 262 */ 263 $query = " 264 SELECT name, latitude, longitude, country 265 FROM `geoname` 266 WHERE 267 country IN ( $ideographic_country_placeholders ) AND 268 REPLACE( alternatenames, CONCAT( asciiname, ' - ' ), '' ) LIKE %s 269 ORDER BY 270 FIELD( %s, country ) DESC, 271 FIELD( %s, timezone ) DESC, 272 population DESC 273 LIMIT 1"; 274 275 $prepared_query = $wpdb->prepare( 276 $query, 277 array_merge( $ideographic_countries, array( $escaped_location_name, $country, $timezone ) ) 278 ); 279 280 return $wpdb->get_row( $prepared_query ); 281 } 282 283 /** 284 * Get an array of countries where ideographic languages are common 285 * 286 * Derived from https://en.wikipedia.org/wiki/List_of_writing_systems#List_of_writing_scripts_by_adoption 287 * 288 * @todo Some of these individual countries may be able to be removed, to further narrow the rows that need to be 289 * scanned by `guess_ideographic_location_from_geonames()`. Some of the entire categories could possibly be 290 * removed too, but let's err on the side of caution for now. 291 */ 292 function get_ideographic_counties() { 293 $middle_east = array( 'AE', 'BH', 'CY', 'EG', 'IL', 'IR', 'IQ', 'JO', 'KW', 'LB', 'OM', 'PS', 'QA', 'SA', 'SY', 'TR', 'YE' ); 294 $north_africa = array( 'DZ', 'EH', 'EG', 'LY', 'MA', 'SD', 'SS', 'TN' ); 295 296 $abjad_countries = array_merge( $middle_east, $north_africa, array( 'CN', 'IL', 'IN', 'MY', 'PK' ) ); 297 $abugida_countries = array( 'BD', 'BT', 'ER', 'ET', 'ID', 'IN', 'KH', 'LA', 'LK', 'MV', 'MY', 'MU', 'MM', 'NP', 'PK', 'SG', 'TH' ); 298 $logographic_countries = array( 'CN', 'JP', 'KR', 'MY', 'SG'); 299 300 $all_ideographic_countries = array_merge( $abjad_countries, $abugida_countries, $logographic_countries ); 301 302 return array_unique( $all_ideographic_countries ); 303 } 304 305 /** 306 * Build a string of placeholders to pass to `WPDB::prepare()` 307 * 308 * Sometimes it's convenient to be able to generate placeholders for `prepare()` dynamically. For example, when 309 * looping through a multi-dimensional array where the sub-arrays have distinct counts; or when the total 310 * number of items is too large to conveniently count by hand. 311 * 312 * See https://iandunn.name/2016/03/31/generating-dynamic-placeholders-for-wpdb-prepare/ 313 * 314 * @param int $number The number of placeholders needed 315 * @param string $format An sprintf()-like format accepted by WPDB::prepare() 316 * 317 * @return string 318 */ 319 function get_prepare_placeholders( $number, $format ) { 320 return implode( ', ', array_fill( 0, $number, $format ) ); 210 321 } 211 322
Note: See TracChangeset
for help on using the changeset viewer.