Making WordPress.org


Ignore:
Timestamp:
03/01/2021 06:05:09 AM (4 years ago)
Author:
dd32
Message:

Translate: Expand upon the GlotPress tag warnings to validate the href values separately to the rest of the tags.

This change also allows for the tags to be translated in any order, but doesn't validate the HTML is correctly nested (follow up change needed)

See #5155.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • sites/trunk/wordpress.org/public_html/wp-content/plugins/wporg-gp-custom-warnings/wporg-gp-custom-warnings.php

    r10689 r10730  
    112112
    113113    /**
    114      * Extends the GlotPress tags warning to allow some URL changes.
    115      *
    116      * @param string    $original    The original string.
    117      * @param string    $translation The translated string.
    118      * @param GP_Locale $locale      The locale.
     114     * Replaces the GlotPress tags warning to allow some URL changes.
     115     *
     116     * Differences from GlotPress:
     117     *  - URLs (href + src) are run through `self::warning_mismatching_urls()`
     118     *    - The domain may change for some safe domains
     119     *    - The protocol may change between https & http
     120     *    - The URL may include/remove a trailing slash
     121     *  - The value of translatable/url attributes is excluded from the error message if it's not related to the issue at hand.
     122     *  - Tags are sorted, <em>One</em> <strong>Two</strong> can be translated as <strong>foo</strong> <em>bar</em> without generating warning.
     123     *  - TODO: Tags are not validated to be nested correctly. GlotPress handles this by validating the ordering of the tags remained the same.
     124     *  - TODO: Allow Japanese (and other locales?) remove certain style/formatting tags that don't apply in the locale.
     125     *
     126     * @param string    $original    The source string.
     127     * @param string    $translation The translation.
     128     * @param GP_Locale $locale      The locale of the translation.
     129     * @return string|true True if check is OK, otherwise warning message.
    119130     */
    120131    public function warning_tags( $original, $translation, $locale ) {
    121         // Allow URL changes in `href` attributes by substituting the original URL when appropriate
    122         // if that passes the checks, assume it's okay, otherwise throw the warning with the original payload.
    123 
    124         $altered_translation = $translation;
    125         foreach ( $this->allowed_domain_changes as $domain => $regex ) {
    126             if ( false === stripos( $original, '://' . $domain ) ) {
     132        $tag_pattern       = '(<[^>]*>)';
     133        $tag_re            = "/$tag_pattern/Us";
     134        $original_parts    = [];
     135        $translation_parts = [];
     136
     137        if ( preg_match_all( $tag_re, $original, $m ) ) {
     138            $original_parts = $m[1];
     139        }
     140        if ( preg_match_all( $tag_re, $translation, $m ) ) {
     141            $translation_parts = $m[1];
     142        }
     143
     144        if ( count( $original_parts ) > count( $translation_parts ) ) {
     145            return 'Missing tags from translation. Expected: ' . implode( ' ', array_diff( $original_parts, $translation_parts ) );
     146        }
     147        if ( count( $original_parts ) < count( $translation_parts ) ) {
     148            return 'Too many tags in translation. Found: ' . implode( ' ', array_diff( $translation_parts, $original_parts ) );
     149        }
     150
     151        // TODO: Validate nesting of HTML is same.
     152        // GlotPress handled this by requiring the HTML be in the same order.
     153
     154        // Sort the tags, from this point out as long as all the tags are present is okay.
     155        rsort( $original_parts );
     156        rsort( $translation_parts );
     157
     158        $changeable_attributes = array(
     159            // We allow certain attributes to be different in translations.
     160            'title',
     161            'aria-label',
     162            // src and href will be checked separately.
     163            'src',
     164            'href',
     165        );
     166
     167        $attribute_regex       = '/(\s*(?P<attr>%s))=([\'"])(?P<value>.+)\\3(\s*)/i';
     168        $attribute_replace     = '$1=$3...$3$5';
     169        $changeable_attr_regex = sprintf( $attribute_regex, implode( '|', $changable_attributes ) );
     170        $link_attr_regex       = sprintf( $attribute_regex, 'href|src' );
     171
     172        // Items are sorted, so if all is well, will match up.
     173        $parts_tags = array_combine( $original_parts, $translation_parts );
     174
     175        $warnings = [];
     176        foreach ( $parts_tags as $original_tag => $translation_tag ) {
     177            if ( $original_tag === $translation_tag ) {
    127178                continue;
    128179            }
    129180
    130             // Make an assumption that the first protocol for the given domain is the protocol in use.
    131             $protocol = 'https';
    132             if ( preg_match( '!(https?)://' . $regex . '!', $original, $m ) ) {
    133                 $protocol = $m[1];
    134             }
    135 
    136             $altered_translation = preg_replace_callback(
    137                 '!(href=[\'"]?)(https?)://(' . $regex . ')!i',
    138                 function( $m ) use( $protocol, $domain ) {
    139                     return $m[1] . $protocol . '://' . $domain;
    140                 },
    141                 $altered_translation
    142             );
    143         }
    144 
    145         if ( $altered_translation !== $translation ) {
    146             $altered_warning = GP::$builtin_translation_warnings->warning_tags( $original, $altered_translation, $locale );
    147             if ( true === $altered_warning ) {
    148                 return true;
    149             }
    150         }
    151 
    152         // Pass through to the core GlotPress warning method.
    153         return GP::$builtin_translation_warnings->warning_tags( $original, $translation, $locale );
    154     }
     181            // Remove any attributes that can be expected to differ.
     182            $original_tag    = preg_replace( $changeable_attr_regex, $attribute_replace, $original_tag );
     183            $translation_tag = preg_replace( $changeable_attr_regex, $attribute_replace, $translation_tag );
     184
     185            if ( $original_tag !== $translation_tag ) {
     186                $warnings[] = "Expected $original_tag, got $translation_tag.";
     187            }
     188        }
     189
     190        // Now check that the URLs mentioned within href & src tags match.
     191        $original_links    = '';
     192        $translation_links = '';
     193
     194        if ( preg_match_all( $link_attr_regex, implode( ' ', $original_parts ), $m ) ) {
     195            $original_links = implode( "\n", $m['value'] );
     196        }
     197        if ( preg_match_all( $link_attr_regex, implode( ' ', $translation_parts ), $m ) ) {
     198            $translation_links = implode( "\n", $m['value'] );
     199        }
     200
     201        // Validate the URLs if present.
     202        if ( $original_links || $translation_links ) {
     203            $url_warnings = $this->warning_mismatching_urls( $original_links, $translation_links );
     204
     205            if ( true !== $url_warnings ) {
     206                $warnings[] = $url_warnings;
     207            }
     208        }
     209
     210        if ( empty( $warnings ) ) {
     211            return true;
     212        }
     213
     214        return implode( "\n", $warnings );
     215   }
    155216
    156217    /**
Note: See TracChangeset for help on using the changeset viewer.