Making WordPress.org

Ticket #2823: 2823.2.diff

File 2823.2.diff, 30.3 KB (added by dd32, 6 years ago)

Approach using Google Geocode to power user lookups instead of attempting to roll our own geocode service - Google does it better, and doesn't receive any personal user details with a server-side request

  • 1.0/index.php

     
    11<?php
    22
    33namespace Dotorg\API\Events;
    4 use stdClass;
    54
    65/**
    76 * Main entry point
    87 */
    98function main() {
    10         global $cache_group, $cache_life;
     9        global $cache_group, $cache_life, $cache_life_google_geolocate;
    1110
    1211        bootstrap();
    1312        wp_cache_init();
    1413
    1514        // The test suite just needs the functions defined and doesn't want any headers or output
    1615        if ( defined( 'RUNNING_TESTS' ) && RUNNING_TESTS ) {
    1716                return;
    1817        }
    1918
    2019        $cache_group   = 'events';
    2120        $cache_life    = 12 * 60 * 60;
     21        $cache_life_google_geolocate = $cache_life * 5;
    2222        $ttl           = 12 * 60 * 60; // Time the client should cache the document.
    2323        $location_args = parse_request();
    2424        $location      = get_location( $location_args );
    25         $response      = build_response( $location, $location_args );
     25
     26        // Increase the client cache time when no location can be detected.
     27        // This avoids increasing load on w.org whenw it's unlikely the data will change often.
     28        if ( ! $location ) {
     29                $ttl = 3 * 24 * 60 * 60;
     30        }
     31
     32        $response      = build_response( $location );
    2633
    2734        send_response( $response, $ttl );
    2835}
    2936
    3037/**
    3138 * Include dependencies
    3239 */
    3340function bootstrap() {
    3441        $base_dir = dirname( dirname(__DIR__ ) );
    3542
    3643        require( $base_dir . '/init.php' );
    3744        require( $base_dir . '/includes/hyperdb/bb-10-hyper-db.php' );
    3845        include( $base_dir . '/includes/object-cache.php' );
    3946        include( $base_dir . '/includes/wp-json-encode.php' );
    4047}
    function bootstrap() { 
    4552 * @return array
    4653 */
    4754function parse_request() {
    4855        $location_args = array();
    4956
    5057        // If a precise location is known, use a GET request. The values here should come from the `location` key of the result of a POST request.
    5158        if ( isset( $_GET['latitude'] ) ) {
    5259                $location_args['latitude'] = $_GET['latitude'];
    5360                $location_args['longitude'] = $_GET['longitude'];
    5461        }
    5562
    5663        if ( isset( $_GET['country'] ) ) {
    5764                $location_args['country'] = $_GET['country'];
    5865        }
    5966
    60         // If a precise location is not known, create a POST request with a bunch of data which can be used to determine a precise location for future GET requests.
    61         if ( isset( $_POST['location_data'] ) ) {
    62                 $location_args = $_POST['location_data'];
    63         }
    64 
    65         // Simplified parameters for lookup by location (city) name, with optional timezone and locale params for extra context.
     67        // Simplified parameter for lookup by location (city) name
    6668        if ( isset( $_REQUEST['location'] ) ) {
    6769                $location_args['location_name'] = trim( $_REQUEST['location'] );
    68                 $location_args['location_name'] = str_replace( ',', '', $location_args['location_name'] );
    69         }
    70 
    71         if ( isset( $_REQUEST['timezone'] ) ) {
    72                 $location_args['timezone'] = $_REQUEST['timezone'];
    73         }
    74 
    75         if ( isset( $_REQUEST['locale'] ) ) {
    76                 $location_args['locale'] = $_REQUEST['locale'];
    7770        }
    7871
    7972        if ( isset( $_REQUEST['ip'] ) ) {
    8073                /*
    8174                 * In local development environments, the IP sent by the Events widget will typically be
    8275                 * private. In those cases, we can use the web server's IP address, which is the user's
    8376                 * actual public address.
    8477                 */
    8578                $public_ip = filter_var(
    8679                    $_REQUEST['ip'],
    8780                    FILTER_VALIDATE_IP,
    8881                    FILTER_FLAG_IPV4 | FILTER_FLAG_IPV6 | FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE
    8982                );
    9083
    9184                $location_args['ip'] = $public_ip ? $public_ip : $_SERVER['REMOTE_ADDR'];
    9285        }
    9386
    9487        return $location_args;
    9588}
    9689
    9790/**
    9891 * Build the API's response to the client's request
    9992 *
    10093 * @param array $location
    101  * @param array $location_args
    10294 *
    10395 * @return array
    10496 */
    105 function build_response( $location, $location_args ) {
    106         $events = array();
    107 
    108         if ( $location ) {
     97function build_response( $location ) {
     98        if ( false === $location ) {
     99                // No location was determined for the request. Bail with an error.
     100                $events = array();
     101                $error = 'no_location_available';
     102        } else {
    109103                $event_args = array();
    110104
    111105                if ( isset( $_REQUEST['number'] ) ) {
    112106                        $event_args['number'] = $_REQUEST['number'];
    113107                }
    114108
    115109                if ( ! empty( $location['latitude'] ) ) {
    116110                        $event_args['nearby'] = array(
    117                                 'latitude' => $location['latitude'],
     111                                'latitude'  => $location['latitude'],
    118112                                'longitude' => $location['longitude'],
    119113                        );
    120114                }
    121115
    122116                if ( ! empty( $location['country'] ) ) {
    123117                        $event_args['country'] = $location['country'];
    124118                }
    125119
    126120                $events = get_events( $event_args );
    127121
    128                 /*
    129                  * There are two conditions which can cause a location to not have a description:
    130                  * 1) When the request only passed latitude/longtude coordinates. We don't lookup
    131                  *    a location here because it's too expensive. See r5497.
    132                  * 2) When the location was determined by geolocating the IP. We don't return the
    133                  *    location here because it would violate the ip2location EULA. See r5491.
    134                  *
    135                  * For WP 4.8-beta1 those conditions were handled by setting "fuzzy" locations
    136                  * instead; the location of the first upcoming event was used, since it will be
    137                  * within driving distance of the location that was geolocated.
    138                  *
    139                  * After beta1 was released, though, there was a lot of feedback about the locations
    140                  * being too inaccurate, so we're going to try a different approach for beta2. See
    141                  * #40702-core.
    142                  *
    143                  * @todo Update the user-agent strings if 40702-geoip.2.diff doesn't make it into beta2
    144                  * @todo Remove this back-compat code after beta2 has been out for a few days
    145                  */
    146                 $use_fuzzy_locations = false !== strpos( $_SERVER['HTTP_USER_AGENT'], '4.7' ) || false !== strpos( $_SERVER['HTTP_USER_AGENT'], '4.8-beta1' );
    147                 if ( $use_fuzzy_locations ) {
    148                         if ( empty( $location['description'] ) || ( isset( $location['internal'] ) && $location['internal'] ) ) {
    149                                 $location = rebuild_location_from_event_source( $events );
    150                         }
    151                 } elseif ( isset( $location['internal'] ) && $location['internal'] ) {
    152                         // Let the client know that a location was successfully determined based on their IP
    153                         $location = array( 'ip' => $location_args['ip'] );
     122                if ( isset( $location['internal'] ) && $location['internal'] ) {
     123                        $location = rebuild_location_from_event_source( $events );
    154124                }
    155         } else {
    156                 $error = 'no_location_available';
    157125        }
    158126
    159127        return compact( 'error', 'location', 'events', 'ttl' );
    160128}
    161129
    162130/**
    163131 * Send the API's response to the client's request
    164132 *
    165133 * @param array $response
    166134 * @param int   $ttl
    167135 */
    168136function send_response( $response, $ttl ) {
    169137        header( 'Expires: ' . gmdate( 'r', time() + $ttl ) );
    170138        header( 'Access-Control-Allow-Origin: *' );
    171139        header( 'Content-Type: application/json; charset=UTF-8' );
    172140
    173141        echo wp_json_encode( $response );
    174142}
    175143
    176144/**
    177  * Guess the location based on a city inside the given input
    178  *
    179  * @param string $location_name
    180  * @param string $timezone
    181  * @param string $country_code
    182  *
    183  * @return false|object false on failure; an object on success
    184  */
    185 function guess_location_from_city( $location_name, $timezone, $country_code ) {
    186         $guess = guess_location_from_geonames( $location_name, $timezone, $country_code );
    187         $location_word_count = str_word_count( $location_name );
    188         $location_name_parts = explode( ' ', $location_name );
    189 
    190         /*
    191          * Multi-word queries may contain cities, regions, and countries, so try to extract just the city
    192          *
    193          * This won't work for most ideographic languages, because they don't use the space character as a word
    194          * delimiter. That's ok, though, because `guess_ideographic_location_from_geonames()` should cover those
    195          * cases.
    196          */
    197         if ( ! $guess && $location_word_count >= 2 ) {
    198                 // Catch input like "Portland Maine"
    199                 $guess = guess_location_from_geonames( $location_name_parts[0], $timezone, $country_code );
    200         }
    201 
    202         if ( ! $guess && $location_word_count >= 3 ) {
    203                 // Catch input like "Sao Paulo Brazil"
    204                 $city_name = sprintf( '%s %s', $location_name_parts[0], $location_name_parts[1] );
    205                 $guess     = guess_location_from_geonames( $city_name, $timezone, $country_code );
    206         }
    207 
    208         // Normalize all errors to boolean false for consistency
    209         if ( empty ( $guess ) ) {
    210                 $guess = false;
    211         }
    212 
    213         return $guess;
    214 }
    215 
    216 /**
    217  * Look for the given location in the Geonames database
    218  *
    219  * @param string $location_name
    220  * @param string $timezone
    221  * @param string $country
    222  *
    223  * @return stdClass|null
    224  */
    225 function guess_location_from_geonames( $location_name, $timezone, $country ) {
    226         global $wpdb;
    227         // Look for a location that matches the name.
    228         // The FIELD() orderings give preference to rows that match the country and/or timezone, without excluding rows that don't match.
    229         // And we sort by population desc, assuming that the biggest matching location is the most likely one.
    230 
    231         // Strip all quotes from the search query, and then enclose it in double quotes, to force an exact literal search
    232         $quoted_location_name = sprintf(
    233                 '"%s"',
    234                 strtr( $location_name, [ '"' => '', "'" => '' ] )
    235         );
    236 
    237         $row = $wpdb->get_row( $wpdb->prepare( "
    238                 SELECT name, latitude, longitude, country
    239                 FROM geoname
    240                 WHERE
    241                         MATCH( name, asciiname, alternatenames )
    242                         AGAINST( %s IN BOOLEAN MODE )
    243                 ORDER BY
    244                         FIELD( %s, country  ) DESC,
    245                         FIELD( %s, timezone ) DESC,
    246                         population DESC
    247                 LIMIT 1",
    248                 $quoted_location_name,
    249                 $country,
    250                 $timezone
    251         ) );
    252 
    253         if ( ! is_a( $row, 'stdClass' ) && 'ASCII' !== mb_detect_encoding( $location_name ) ) {
    254                 $row = guess_location_from_geonames_fallback( $location_name, $country, $timezone, 'exact', 'ideographic' );
    255         }
    256 
    257         return $row;
    258 }
    259 
    260 /**
    261  * Look for the given location in the Geonames database using a LIKE query
    262  *
    263  * This is a fallback for situations where the full-text search in `guess_location_from_geonames()` resulted
    264  * in a false-negative.
    265  *
    266  * One situation where this happens is with queries in ideographic languages, because MySQL < 5.7.6 doesn't
    267  * support full-text searches for them, because it can't determine where the word boundaries are.
    268  * See https://dev.mysql.com/doc/refman/5.7/en/fulltext-restrictions.html
    269  *
    270  * There are also edge cases where the exact query doesn't exist in the database, but a loose LIKE query will find
    271  * a similar alternate, like `Osakashi`.
    272  *
    273  * @param string $location_name
    274  * @param string $country
    275  * @param string $timezone
    276  * @param string $mode          'exact' to only return exact matches from the database;
    277  *                              'loose' to return any match. This has a high chance of false positives.
    278  * @param string $restrict_counties 'ideographic' to only search in countries where ideographic languages are common;
    279  *                                  'none' to search all countries
    280  *
    281  * @return stdClass|null
    282  */
    283 function guess_location_from_geonames_fallback( $location_name, $country, $timezone, $mode = 'exact', $restrict_counties = 'ideographic' ) {
    284         global $wpdb;
    285 
    286         $where = $ideographic_countries = $ideographic_country_placeholders = '';
    287 
    288         /*
    289          * The name is wrapped in commas in order to ensure that we're only matching the exact location, which is
    290          * delimited by commas. Otherwise, there would be false positives in situations where `$location_name`
    291          * appears in other rows, which happens sometimes.
    292          *
    293          * Because this will only match entries that are prefixed _and_ postfixed with a comma, it will never match the
    294          * first and last entries in the column. That's ok, though, because the first entry is often an airport code
    295          * in English, which is shorter than `ft_min_word_len` anyway. The last entry is often ideographic, so it'd be nice
    296          * to match it, but this is good enough for now.
    297          */
    298         $escaped_location_name = sprintf(
    299                 'loose' === $mode ? '%%%s%%' : '%%,%s,%%',
    300                 $wpdb->esc_like( $location_name )
    301         );
    302 
    303         $prepare_args = array( $escaped_location_name, $country, $timezone );
    304 
    305         if ( 'ideographic' == $restrict_counties ) {
    306                 $ideographic_countries            = get_ideographic_counties();
    307                 $ideographic_country_placeholders = get_prepare_placeholders( count( $ideographic_countries ), '%s' );
    308 
    309                 $where .= "country IN ( $ideographic_country_placeholders ) AND";
    310 
    311                 $prepare_args = array_merge( $ideographic_countries, $prepare_args );
    312         }
    313 
    314         /*
    315          * REPLACE() is used because sometimes the `alternatenames` column contains entries where the `asciiname` is
    316          * prefixed to an ideographic name; for example: `,Karachi - كراچى,`
    317          *
    318          * If that prefix is not removed, then the LIKE query will fail in those cases, because
    319          * `$escaped_location_name` is wrapped in commas.
    320          *
    321          * The query is restricted to countries where ideographic languages are common, in order to avoid a full-table
    322          * scan.
    323          */
    324         $query = "
    325                 SELECT name, latitude, longitude, country
    326                 FROM `geoname`
    327                 WHERE
    328                         $where
    329                         REPLACE( alternatenames, CONCAT( asciiname, ' - ' ), '' ) LIKE %s
    330                 ORDER BY
    331                         FIELD( %s, country  ) DESC,
    332                         FIELD( %s, timezone ) DESC,
    333                         population DESC
    334                 LIMIT 1";
    335 
    336         $prepared_query = $wpdb->prepare( $query, $prepare_args );
    337 
    338         return $wpdb->get_row( $prepared_query );
    339 }
    340 
    341 /**
    342  * Get an array of countries where ideographic languages are common
    343  *
    344  * Derived from https://en.wikipedia.org/wiki/List_of_writing_systems#List_of_writing_scripts_by_adoption
    345  *
    346  * @todo Some of these individual countries may be able to be removed, to further narrow the rows that need to be
    347  *       scanned by `guess_ideographic_location_from_geonames()`. Some of the entire categories could possibly be
    348  *       removed too, but let's err on the side of caution for now.
    349  */
    350 function get_ideographic_counties() {
    351         $middle_east  = array( 'AE', 'BH', 'CY', 'EG', 'IL', 'IR', 'IQ', 'JO', 'KW', 'LB', 'OM', 'PS', 'QA', 'SA', 'SY', 'TR', 'YE' );
    352         $north_africa = array( 'DZ', 'EH', 'EG', 'LY', 'MA', 'SD', 'SS', 'TN' );
    353 
    354         $abjad_countries       = array_merge( $middle_east, $north_africa, array( 'CN', 'IL', 'IN', 'MY', 'PK' ) );
    355         $abugida_countries     = array( 'BD', 'BT', 'ER', 'ET', 'ID', 'IN', 'KH', 'LA', 'LK', 'MV', 'MY', 'MU', 'MM', 'NP', 'PK', 'SG', 'TH' );
    356         $logographic_countries = array( 'CN', 'JP', 'KR', 'MY', 'SG');
    357 
    358         $all_ideographic_countries = array_merge( $abjad_countries, $abugida_countries, $logographic_countries );
    359 
    360         return array_unique( $all_ideographic_countries );
    361 }
    362 
    363 /**
    364  * Build a string of placeholders to pass to `WPDB::prepare()`
    365  *
    366  * Sometimes it's convenient to be able to generate placeholders for `prepare()` dynamically. For example, when
    367  * looping through a multi-dimensional array where the sub-arrays have distinct counts; or when the total
    368  * number of items is too large to conveniently count by hand.
    369  *
    370  * See https://iandunn.name/2016/03/31/generating-dynamic-placeholders-for-wpdb-prepare/
    371  *
    372  * @param int    $number The number of placeholders needed
    373  * @param string $format An sprintf()-like format accepted by WPDB::prepare()
    374  *
    375  * @return string
    376  */
    377 function get_prepare_placeholders( $number, $format ) {
    378         return implode( ', ', array_fill( 0, $number, $format ) );
    379 }
    380 
    381 /**
    382145 * Determine a location for the given IPv4 address
    383146 *
    384147 * NOTE: The location that is found here cannot be returned to the client.
    385148 *       See `rebuild_location_from_geonames()`.
    386149 *
    387150 * @todo - Add support for IPv6 addresses. Otherwise, this will quickly lose effectiveness. As of March 2017, IPv6
    388151 *         adoption is at 16% globally and rising relatively fast. Some countries are as high as 30%.
    389152 *         See https://www.google.com/intl/en/ipv6/statistics.html#tab=ipv6-adoption for current stats.
    390153 *
    391154 * @todo - Core sends anonymized IPs like `2a03:2880:2110:df07::`, so make sure those work when implementing IPv6
    392155 *
    393156 * @param string $dotted_ip
    394157 *
    395158 * @return null|object `null` on failure; an object on success
    396159 */
    397160function guess_location_from_ip( $dotted_ip ) {
    398161        global $wpdb;
    399162
    400163        $long_ip = ip2long( $dotted_ip );
    401         if ( $long_ip === false )
    402                 return;
     164        if ( $long_ip === false ) {
     165                return false;
     166        }
    403167
    404         $row = $wpdb->get_row( $wpdb->prepare( "
    405                 SELECT ip_city, ip_latitude, ip_longitude, country_short
     168        $row = $wpdb->get_row( $sql = $wpdb->prepare( "
     169                SELECT ip_latitude, ip_longitude, country_short
    406170                FROM ip2location
    407171                WHERE ip_to >= %d
    408172                ORDER BY ip_to ASC
    409173                LIMIT 1",
    410174                $long_ip
    411175        ) );
    412176
    413177        // Unknown location:
    414178        if ( ! $row || '-' == $row->country_short ) {
    415                 return;
     179                return false;
    416180        }
    417181
    418         return $row;
     182        return array(
     183                'description' => false,
     184                'latitude'    => round( $row->ip_latitude,  2 ),
     185                'longitude'   => round( $row->ip_longitude, 2 ),
     186                'internal'    => true,
     187        );
     188
    419189}
    420190
    421191/**
    422192 * Rebuild the location given to the client from the event source data
    423193 *
    424194 * We cannot publicly expose location data that we retrieve from the `ip2location` database, because that would
    425195 * violate their licensing terms. We can only use the information internally, for the purposes of completing the
    426196 * program's business logic (determining nearby events).
    427197 *
    428198 * Once we have nearby events, though, we can take advantage of the data that's available in the `wporg_events` table.
    429199 * That table contains the locations details for the event's venue, which was sourced from the respective APIs
    430200 * (WordCamp.org, Meetup.com, etc). We can return the venue's location data without violating any terms.
    431201 *
    432202 * See https://meta.trac.wordpress.org/ticket/2823#comment:15
    433203 * See https://meta.trac.wordpress.org/ticket/2823#comment:21
    434204 *
    435  * This isn't ideal, since the location it picks is potentially an hour's driving time from the user. If we get a
    436  * lot of complaints, we could potentially change this to search the `geonames` database for the name of the city
    437  * that was returned by the `ip2location` database. That should be more accurate, but it would require an extra
    438  * database lookup, and could potentially fail to return any results.
    439  *
    440205 * @param array $events
    441206 *
    442  * @return array|false
     207 * @return array
    443208 */
    444209function rebuild_location_from_event_source( $events ) {
    445         $location = false;
    446 
    447210        foreach ( $events as $event ) {
    448211                if ( ! empty( $event['location']['location'] ) && ! empty( $event['location']['latitude'] ) ) {
    449                         $location = $event['location'];
    450                         $location['description'] = $location['location'];
    451                         unset( $location['location'] );
    452 
    453                         /*
    454                          * If the event is a WordCamp, continue searching until a meetup is found. Meetups have a much smaller
    455                          * search radius in `get_events()`, so they'll be closer to the user's location. Some cities will only
    456                          * have WordCamps scheduled at the moment, though, so we can fall back to those.
    457                          */
    458                         if ( 'meetup' === $event['type'] ) {
    459                                 break;
    460                         }
     212                        return array(
     213                                'description' => $event['location']['location'],
     214                                'latitude'    => round( $event['location']['latitude'], 2 ),
     215                                'longitude'   => round( $event['location']['longitude'], 2 ),
     216                        );
    461217                }
    462218        }
    463219
    464         return $location;
     220        return false;
    465221}
    466222
    467223/**
    468224 * Determine a location for the given parameters
    469225 *
    470226 * @param array $args
    471227 *
    472228 * @return false|array
    473229 */
    474230function get_location( $args = array() ) {
    475         global $cache_life, $cache_group;
    476 
    477         $cache_key = 'get_location:' . md5( serialize( $args ) );
    478         $location  = wp_cache_get( $cache_key, $cache_group );
    479 
    480         if ( false !== $location ) {
    481                 return $location;
    482         }
    483 
    484         // For a country request, no lat/long are returned.
    485         if ( isset( $args['country'] ) ) {
    486                 $location = array(
    487                         'country' => $args['country'],
    488                 );
    489         }
    490 
    491         $country_code = get_country_code_from_locale( $args['locale'] ?? '' );
    492 
    493231        // Coordinates provided
    494232        if (
    495                 ! $location && (
    496                         ! empty( $args['latitude'] )  && is_numeric( $args['latitude'] ) &&
    497                         ! empty( $args['longitude'] ) && is_numeric( $args['longitude'] )
    498                 )
     233                ! empty( $args['latitude'] )  && is_numeric( $args['latitude'] ) &&
     234                ! empty( $args['longitude'] ) && is_numeric( $args['longitude'] )
    499235        ) {
    500                 $location = array(
     236                return array(
    501237                        'description' => false,
    502238                        'latitude'    => $args['latitude'],
    503239                        'longitude'   => $args['longitude']
    504240                );
    505241        }
    506242
    507         // City was provided by the user:
    508         if ( ! $location && isset( $args['location_name'] ) ) {
    509                 $guess = guess_location_from_city( $args['location_name'], $args['timezone'] ?? '', $country_code  );
    510 
    511                 if ( $guess ) {
    512                         $location = array(
    513                                 'description' => $guess->name,
    514                                 'latitude' => $guess->latitude,
    515                                 'longitude' => $guess->longitude,
    516                                 'country' => $guess->country,
    517                         );
    518                 } else {
    519                         $guess = guess_location_from_country( $args['location_name'] );
    520 
    521                         if ( ! $location && $guess ) {
    522                                 $location = array(
    523                                         'country'     => $guess['country_short'],
    524                                         'description' => $guess['country_long'],
    525                                 );
    526                         }
    527                 }
     243        // For a country request, no lat/long are returned.
     244        if ( !empty( $args['country'] ) ) {
     245                return array(
     246                        'country' => $args['country'],
     247                );
    528248        }
    529249
    530         /*
    531          * If all else fails, cast a wide net and try to find something before giving up, even
    532          * if the chance of success if lower than normal. Returning false is guaranteed failure, so this improves things
    533          * even if it only works 10% of the time.
    534          *
    535          * This must be done as the very last thing before giving up, because the likelihood of false positives is high.
    536          */
    537         if ( ! $location && isset( $args['location_name'] ) ) {
    538                 if ( 'ASCII' === mb_detect_encoding( $args['location_name'] ) ) {
    539                         $guess = guess_location_from_geonames_fallback( $args['location_name'], $country_code, $args['timezone'] ?? '', 'loose', 'none' );
    540                 } else {
    541                         $guess = guess_location_from_geonames_fallback( $args['location_name'], $country_code, $args['timezone'] ?? '', 'loose', 'ideographic' );
    542                 }
    543 
     250        // City was provided by the user:
     251        if ( !empty( $args['location_name'] ) ) {
     252                $guess = guess_location_from_city_by_google( $args['location_name'] );
    544253                if ( $guess ) {
    545                         $location = array(
    546                                 'description' => $guess->name,
    547                                 'latitude'    => $guess->latitude,
    548                                 'longitude'   => $guess->longitude,
    549                                 'country'     => $guess->country,
    550                         );
    551                 }
    552         }
    553 
    554         if ( ! $location ) {
    555                 if ( isset( $args['location_name'] ) || isset( $args['ip'] ) || ! empty( $args['latitude'] ) || ! empty( $args['longitude'] ) ) {
    556                         // If any of these are specified, and no localitity was guessed based on the above checks, bail with no location.
    557                         $location = false;
    558                 } else {
    559                         // No specific location details.
    560                         $location = array();
     254                        return $guess;
    561255                }
    562256        }
    563257
    564258        // IP:
    565         if ( ! $location && isset( $args['ip'] ) && ! isset( $args['location_name'] ) ) {
     259        if ( !empty( $args['ip'] ) ) {
    566260                $guess = guess_location_from_ip( $args['ip'] );
    567 
    568261                if ( $guess ) {
    569                         $location = array(
    570                                 'description' => $guess->ip_city,
    571                                 'latitude'    => $guess->ip_latitude,
    572                                 'longitude'   => $guess->ip_longitude,
    573                                 'country'     => $guess->country_short,
    574                                 'internal'    => true, // this location cannot be shared publicly, see `rebuild_location_from_geonames()`
    575                         );
     262                        return $guess;
    576263                }
    577264        }
    578265
    579         wp_cache_set( $cache_key, $location, $cache_group, $cache_life );
    580         return $location;
     266        return false;
    581267}
    582268
    583 /**
    584  * Extract the country code from the given locale
    585  *
    586  * @param string $locale
    587  *
    588  * @return string|null
    589  */
    590 function get_country_code_from_locale( $locale ) {
    591         /*
    592          * `en_US` is ignored, because it's the default locale in Core, and many users never set it. That
    593          * leads to a lot of false-positives; e.g., Hampton-Sydney, Virginia, USA instead of Sydney, Australia.
    594          */
    595         if ( empty( $locale ) || 'en_US' === $locale ) {
    596                 return null;
     269function guess_location_from_city_by_google( $location ) {
     270        global $cache_group, $cache_life_google_geolocate;
     271        if ( ! $location ) {
     272                return false;
    597273        }
    598274
    599         preg_match( '/^[a-z]+[-_]([a-z]+)$/i', $locale, $match );
    600 
    601         $country_code = $match[1] ?? null;
    602 
    603         return $country_code;
    604 }
    605 
    606 /**
    607  * Guess the location based on a country identifier inside the given input
    608  *
    609  * This isn't perfect because some of the country names in the database are in a format that regular
    610  * people wouldn't type -- e.g., "Venezuela, Bolvarian Republic Of" -- but this will still match a
    611  * majority of them.
    612  *
    613  * Currently, this only works with English names because that's the only data we have.
    614  *
    615  * @param string $location_name
    616  *
    617  * @return false|array false on failure; an array with country details on success
    618  */
    619 function guess_location_from_country( $location_name ) {
    620         // Check if they entered only the country name, e.g. "Germany" or "New Zealand"
    621         $country             = get_country_from_name( $location_name );
    622         $location_word_count = str_word_count( $location_name );
    623         $location_name_parts = explode( ' ', $location_name );
    624         $valid_country_codes = get_valid_country_codes();
    625 
    626         /*
    627          * Multi-word queries may contain cities, regions, and countries, so try to extract just the country
    628          */
    629         if ( ! $country && $location_word_count >= 2 ) {
    630                 // Catch input like "Vancouver Canada"
    631                 $country_id   = $location_name_parts[ $location_word_count - 1 ];
    632                 $country      = get_country_from_name( $country_id );
    633         }
    634 
    635         if ( ! $country && $location_word_count >= 3 ) {
    636                 // Catch input like "Santiago De Los Caballeros, Dominican Republic"
    637                 $country_name = sprintf(
    638                         '%s %s',
    639                         $location_name_parts[ $location_word_count - 2 ],
    640                         $location_name_parts[ $location_word_count - 1 ]
    641                 );
    642                 $country = get_country_from_name( $country_name );
     275        $cache_key = 'geolocate:' . ( strlen( $location ) > 230 ? 'md5:' . md5( $location ) : $location );
     276        if ( false !== ( $location_data = wp_cache_get( $cache_key, $cache_group ) ) ) {
     277                return $location_data;
    643278        }
    644279
    645         if ( ! $country && $location_word_count >= 4 ) {
    646                 // Catch input like "Kaga-Bandoro, Central African Republic"
    647                 $country_name = sprintf(
    648                         '%s %s %s',
    649                         $location_name_parts[ $location_word_count - 3 ],
    650                         $location_name_parts[ $location_word_count - 2 ],
    651                         $location_name_parts[ $location_word_count - 1 ]
    652                 );
    653                 $country = get_country_from_name( $country_name );
     280        $api_unavailable_cache_key = 'geolocate:apifailure';
     281        if ( wp_cache_get( $api_unavailable_cache_key, $cache_group ) >= 20 /* FAILURE_THRESHOLD */ ) {
     282                return false;
    654283        }
    655284
    656         return $country;
    657 }
     285        $stream_context = stream_context_create( array(
     286                'http' => array(
     287                        'user_agent' => 'WordPress.org Events API/1.0; https://api.wordpress.org/events/1.0/',
     288                        'follow_location' => false,
     289                        'timeout' => 1
     290                ),
     291        ) );
    658292
    659 /**
    660  * Get a list of valid country codes
    661  *
    662  * @return array
    663  */
    664 function get_valid_country_codes() {
    665         global $wpdb;
     293        $location_data = json_decode( file_get_contents(
     294                'https://maps.googleapis.com/maps/api/geocode/json?' .
     295                        ( defined( 'GOOGLE_GEOLOCATE_SERVICE_API_KEY' ) ? 'key=' . GOOGLE_GEOLOCATE_SERVICE_API_KEY : '' ) .
     296                        '&address=' . urlencode( $location ),
     297                $use_include_path = false,
     298                $stream_context
     299        ) );
    666300
    667         return $wpdb->get_col( "SELECT DISTINCT country FROM geoname" );
    668 }
     301        // Bump the number of API calls made this hour:
     302        $apicalls_cache_key = 'geolocate:apicalls:hour:' . gmdate( 'Y-m-d-H' );
     303        if ( ! $hour_requests = wp_cache_incr( $apicalls_cache_key, 1, $cache_group ) ) {
     304                wp_cache_add( $apicalls_cache_key, 1, $cache_group, 24 * 60 * 60 );
     305        }
     306        $apicalls_cache_key = 'geolocate:apicalls:day:' . gmdate( 'Y-m-d' );
     307        if ( ! $day_requests = wp_cache_incr( $apicalls_cache_key, 1, $cache_group ) ) {
     308                wp_cache_add( $apicalls_cache_key, 1, $cache_group, 24 * 60 * 60 );
     309        }
     310
     311        if ( ! $location_data || ( 'OK' != $location_data->status && 'ZERO_RESULTS' != $location_data->status ) ) {
     312                // API Failure or rejection. Cache this search failure for 5 mins
     313                wp_cache_set( $cache_key, array(), $cache_group, 5 * 60 );
     314
     315                // Failure, bump the failure count for this minute to warn future scripts
     316                if ( ! wp_cache_incr( $api_unavailable_cache_key, 1, $cache_group ) ) {
     317                        wp_cache_add( $api_unavailable_cache_key, 1, $cache_group, 60 );
    669318
    670 /**
    671  * Get the country that corresponds to the given country name
    672  *
    673  * @param string $country_name
    674  *
    675  * @return false|array false on failure; an array with country details on success
    676  */
    677 function get_country_from_name( $country_name ) {
    678         global $wpdb;
     319                        // Once a minute, trigger a warning
     320                        trigger_error( "Events API Geocode failure: {$location_data->status} - {$location_data->error_message} (API Requests - Day: {$day_requests}, Hour: {$hour_requests})" , E_USER_WARNING );
    679321
    680         $country = $wpdb->get_row( $wpdb->prepare( "
    681                 SELECT country_short, country_long
    682                 FROM ip2location
    683                 WHERE
    684                         country_long  = %s OR
    685                         country_short = %s
    686                 LIMIT 1",
    687                 $country_name,
    688                 $country_name
    689         ), 'ARRAY_A' );
     322                }
     323                return false;
     324        }
     325
     326        $location = array();
     327        foreach ( $location_data->results as $possible_location ) {
     328                // The formatted_address returned by Google is not normally a user-friedly friendly city, state, country format, often with extra useless details added in.
     329                // This reduces it to City, StateLevel, Country.
     330                // TODO: Full context makes for a better UX for multiple locations, however core currently works best with a singular location.
     331                // TODO: Flip out `description` and `long_desc` as appropriate?
     332
     333                $short_name = ''; // City
     334                $name = array_reduce( $possible_location->address_components, function( $carry, $item ) use ( &$short_name ) {
     335                        if (
     336                                in_array( 'political', $item->types ) &&
     337                                ! array_intersect(
     338                                        // These political areas are not needed in formatted addresses
     339                                        array(
     340                                                'administrative_area_level_5',
     341                                                'administrative_area_level_4',
     342                                                'administrative_area_level_3',
     343                                                'administrative_area_level_2',
     344                                                'sublocality',
     345                                                'neighborhood'
     346                                        ),
     347                                        $item->types
     348                                )
     349                        ) {
     350                                if ( $carry ) {
     351                                        $carry .= ', ' . $item->long_name;
     352                                } else {
     353                                        $carry = $short_name = $item->long_name;
     354                                }
     355                        }
     356                        return $carry;
     357                }, '' );
     358
     359                if ( ! $location ) {
     360                        $location = array(
     361                                'description' => $short_name,
     362                                'long_desc'   => $name,
     363                                'latitude'    => round( $possible_location->geometry->location->lat, 2 ),
     364                                'longitude'   => round( $possible_location->geometry->location->lng, 2 ),
     365                                'alternate' => array(),
     366                        );
     367                } else {
     368                        $location['alternate'][] = array(
     369                                'description' => $short_name,
     370                                'long_desc'   => $name,
     371                                'latitude'    => round( $possible_location->geometry->location->lat, 2 ),
     372                                'longitude'   => round( $possible_location->geometry->location->lng, 2 ),
     373                        );
     374                }
    690375
    691         // Convert all errors to boolean false for consistency
    692         if ( empty( $country ) ) {
    693                 $country = false;
    694376        }
    695377
    696         return $country;
     378        wp_cache_set( $cache_key, $location, $cache_group, $cache_life_google_geolocate );
     379
     380        return $location;
    697381}
    698382
    699383function get_events( $args = array() ) {
    700384        global $wpdb, $cache_life, $cache_group;
    701385
    702386        // Sort to ensure consistent cache keys.
    703387        ksort( $args );
    704388
    705389        // number should be between 0 and 100, with a default of 10.
    706390        $args['number'] = $args['number'] ?? 10;
    707391        $args['number'] = max( 0, min( $args['number'], 100 ) );
    708392
    709393        $cache_key = 'events:' . md5( serialize( $args ) );
    710394        if ( false !== ( $data = wp_cache_get( $cache_key, $cache_group ) ) ) {
    711395                return $data;