Making WordPress.org

Changeset 12483


Ignore:
Timestamp:
03/20/2023 01:17:16 AM (3 years ago)
Author:
dd32
Message:

HelpScout: When parsing bounce emails, check in the attachments (which is likely the original email) when no emails within the body content match a WordPress.org account.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • sites/trunk/api.wordpress.org/public_html/dotorg/helpscout/common.php

    r12481 r12483  
    4343 */
    4444function get_email_thread( $thread_id, $force = false ) {
    45     wp_cache_add_global_groups( 'helpscout-thread' );
    46 
    4745    if ( ! $thread_id ) {
    4846        return false;
    4947    }
    5048
    51     $client    = get_client();
    52     $cache_key = "{$client->name}:{$thread_id}";
    53 
    54     if ( $thread = wp_cache_get( $cache_key, 'helpscout-thread' ) ) {
    55         if ( ! $force ) {
     49    return cached_helpscout_get( '/v2/conversations/' . $thread_id . '?embed=threads', $force );
     50}
     51
     52/**
     53 * Caching wrapper around the HelpScout GET API.
     54 *
     55 * TODO: This should probably be moved to the MU plugin.
     56 */
     57function cached_helpscout_get( $url, $force = false, $instance = false ) {
     58    wp_cache_add_global_groups( 'helpscout-cache' );
     59    $client    = get_client( $instance );
     60    $cache_key = "{$client->name}:" . sha1( $url );
     61
     62    if ( $data = wp_cache_get( $cache_key, 'helpscout-cache' ) ) {
     63        if ( ! $data ) {
    5664            return $thread;
    5765        }
    5866    }
    5967
    60     $email_obj = $client->get( '/v2/conversations/' . $thread_id . '?embed=threads' );
    61 
    62     wp_cache_set( $cache_key, $email_obj, 'helpscout-thread', 6 * HOUR_IN_SECONDS );
    63 
    64     return $email_obj;
     68    $data = $client->get( $url );
     69
     70    wp_cache_set( $cache_key, $data, 'helpscout-cache', 6 * HOUR_IN_SECONDS );
     71
     72    return $data;
    6573}
    6674
     
    118126            $email_obj = get_email_thread( $request->ticket->id ?? 0 );
    119127            if ( ! empty( $email_obj->_embedded->threads ) ) {
     128                $attachment_api_urls = [];
     129
    120130                foreach ( $email_obj->_embedded->threads as $thread ) {
    121131                    if ( 'customer' !== $thread->type ) {
     
    125135                    // Extract emails from the mailer-daemon.
    126136                    $email_body = strip_tags( str_replace( '<br>', "\n", $thread->body ) );
    127 
    128                     // Extract `To:`, `X-Orig-To:`, and fallback to all emails.
    129                     $emails = [];
    130                     if ( preg_match( '!^(x-orig-to:|to:|Final-Recipient:(\s*rfc\d+;)?)\s*(?P<email>.+@.+)$!im', $email_body, $m ) ) {
    131                         $m['email'] = str_replace( [ '&lt;', '&gt;' ], '', $m['email'] );
    132                         $m['email'] = trim( $m['email'], '<> ' );
    133 
    134                         $emails = [ $m['email'] ];
    135                     } else {
    136                         // Ugly regex for emails, but it's good for mailer-daemon emails.
    137                         if ( preg_match_all( '![^\s;"]+@[^\s;&"]+\.[^\s;&"]+[a-z]!', $email_body, $m ) ) {
    138                             $emails = array_unique( array_diff( $m[0], [ $request->mailbox->email ] ) );
     137                    $user       = get_user_from_emails( extract_emails_from_text( $email_body ) );
     138                    if ( $user ) {
     139                        break;
     140                    }
     141
     142                    // Track the attachments too, sometimes the email included in the body of the email is a final forwarded destination, but the attachment contains the real email.
     143                    foreach ( $thread->_embedded->attachments ?? [] as $attachment ) {
     144                        if (
     145                            ! $attachment->width && // Exclude imagey attachments.
     146                            $attachment->size < 100 * KB_IN_BYTES &&
     147                            (
     148                                str_contains( $attachment->mimeType, 'message' ) ||
     149                                str_contains( $attachment->mimeType, 'text' ) ||
     150                                str_contains( $attachment->mimeType, 'rfc' )
     151                            )
     152                        ) {
     153                            $attachment_api_urls[] = $attachment->_links->data->href;
    139154                        }
    140155                    }
    141 
    142                     foreach ( $emails as $maybe_email ) {
    143                         $user = get_user_by( 'email', $maybe_email );
    144                         if ( $user ) {
    145                             break;
     156                }
     157
     158                // If we didn't find a user, try to extract the email from the attachments (Which is likely the original email)
     159                if ( ! $user && $attachment_api_urls ) {
     160                    foreach ( $attachment_api_urls as $attachment_api_url ) {
     161                        $data = cached_helpscout_get( $attachment_api_url )->data ?? '';
     162                        if ( $data ) {
     163                            $data = base64_decode( $data ) ?: $data;
     164                            $user = get_user_from_emails( extract_emails_from_text( $data ) );
     165                            if ( $user ) {
     166                                break;
     167                            }
    146168                        }
    147169                    }
     
    154176}
    155177
     178/**
     179 * Extract email-like strings from a string.
     180 *
     181 * @param string $text The text to look in.
     182 * @return array
     183 */
     184function extract_emails_from_text( $text ) {
     185    // Extract `To:`, `X-Orig-To:`, and fallback to all emails.
     186    $emails = [];
     187    if ( preg_match( '!^(x-orig-to:|to:|Final-Recipient:(\s*rfc\d+;)?)\s*(?P<email>.+@.+)$!im', $text, $m ) ) {
     188        $m['email'] = str_replace( [ '&lt;', '&gt;' ], '', $m['email'] );
     189        $m['email'] = trim( $m['email'], '<> ' );
     190
     191        $emails = [ $m['email'] ];
     192    } else {
     193        // Ugly regex for emails, but it's good for mailer-daemon emails.
     194        if ( preg_match_all( '![^\s;"]+@[^\s;&"]+\.[^\s;&"]+[a-z]!', $text, $m ) ) {
     195            $emails = array_unique( array_diff( $m[0], [ $request->mailbox->email ] ) );
     196        }
     197    }
     198
     199    return $emails;
     200}
     201
     202/**
     203 * Given a list of emails, find the first user that matches.
     204 *
     205 * @param array $emails The list of emails to check.
     206 * @return WP_User|false
     207 */
     208function get_user_from_emails( $emails ) {
     209    foreach ( $emails as $maybe_email ) {
     210        $user = get_user_by( 'email', $maybe_email );
     211        if ( $user ) {
     212            return $user;
     213        }
     214    }
     215
     216    return false;
     217}
     218
     219/**
     220 * Get the possible plugins or themes from the email.
     221 */
    156222function get_plugin_or_theme_from_email( $request ) {
    157223    $subject = $request->ticket->subject ?? '';
Note: See TracChangeset for help on using the changeset viewer.