Chris@0
|
1 <?php
|
Chris@0
|
2
|
Chris@0
|
3 namespace Drupal\Component\Utility;
|
Chris@0
|
4
|
Chris@0
|
5 /**
|
Chris@0
|
6 * Provides user agent related utility functions.
|
Chris@0
|
7 *
|
Chris@0
|
8 * @ingroup utility
|
Chris@0
|
9 */
|
Chris@0
|
10 class UserAgent {
|
Chris@0
|
11
|
Chris@0
|
12 /**
|
Chris@0
|
13 * Identifies user agent language from the Accept-language HTTP header.
|
Chris@0
|
14 *
|
Chris@0
|
15 * The algorithm works as follows:
|
Chris@0
|
16 * - map user agent language codes to available language codes.
|
Chris@0
|
17 * - order all user agent language codes by qvalue from high to low.
|
Chris@0
|
18 * - add generic user agent language codes if they aren't already specified
|
Chris@0
|
19 * but with a slightly lower qvalue.
|
Chris@0
|
20 * - find the most specific available language code with the highest qvalue.
|
Chris@0
|
21 * - if 2 or more languages are having the same qvalue, respect the order of
|
Chris@0
|
22 * them inside the $languages array.
|
Chris@0
|
23 *
|
Chris@0
|
24 * We perform user agent accept-language parsing only if page cache is
|
Chris@0
|
25 * disabled, otherwise we would cache a user-specific preference.
|
Chris@0
|
26 *
|
Chris@0
|
27 * @param string $http_accept_language
|
Chris@0
|
28 * The value of the "Accept-Language" HTTP header.
|
Chris@0
|
29 * @param array $langcodes
|
Chris@0
|
30 * An array of available language codes to pick from.
|
Chris@0
|
31 * @param array $mappings
|
Chris@0
|
32 * (optional) Custom mappings to support user agents that are sending non
|
Chris@0
|
33 * standard language codes. No mapping is assumed by default.
|
Chris@0
|
34 *
|
Chris@0
|
35 * @return string
|
Chris@0
|
36 * The selected language code or FALSE if no valid language can be
|
Chris@0
|
37 * identified.
|
Chris@0
|
38 */
|
Chris@0
|
39 public static function getBestMatchingLangcode($http_accept_language, $langcodes, $mappings = []) {
|
Chris@0
|
40 // The Accept-Language header contains information about the language
|
Chris@0
|
41 // preferences configured in the user's user agent / operating system.
|
Chris@0
|
42 // RFC 2616 (section 14.4) defines the Accept-Language header as follows:
|
Chris@0
|
43 // Accept-Language = "Accept-Language" ":"
|
Chris@0
|
44 // 1#( language-range [ ";" "q" "=" qvalue ] )
|
Chris@0
|
45 // language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" )
|
Chris@0
|
46 // Samples: "hu, en-us;q=0.66, en;q=0.33", "hu,en-us;q=0.5"
|
Chris@0
|
47 $ua_langcodes = [];
|
Chris@0
|
48 if (preg_match_all('@(?<=[, ]|^)([a-zA-Z-]+|\*)(?:;q=([0-9.]+))?(?:$|\s*,\s*)@', trim($http_accept_language), $matches, PREG_SET_ORDER)) {
|
Chris@0
|
49 foreach ($matches as $match) {
|
Chris@0
|
50 if ($mappings) {
|
Chris@0
|
51 $langcode = strtolower($match[1]);
|
Chris@0
|
52 foreach ($mappings as $ua_langcode => $standard_langcode) {
|
Chris@0
|
53 if ($langcode == $ua_langcode) {
|
Chris@0
|
54 $match[1] = $standard_langcode;
|
Chris@0
|
55 }
|
Chris@0
|
56 }
|
Chris@0
|
57 }
|
Chris@0
|
58 // We can safely use strtolower() here, tags are ASCII.
|
Chris@0
|
59 // RFC2616 mandates that the decimal part is no more than three digits,
|
Chris@0
|
60 // so we multiply the qvalue by 1000 to avoid floating point
|
Chris@0
|
61 // comparisons.
|
Chris@0
|
62 $langcode = strtolower($match[1]);
|
Chris@0
|
63 $qvalue = isset($match[2]) ? (float) $match[2] : 1;
|
Chris@0
|
64 // Take the highest qvalue for this langcode. Although the request
|
Chris@0
|
65 // supposedly contains unique langcodes, our mapping possibly resolves
|
Chris@0
|
66 // to the same langcode for different qvalues. Keep the highest.
|
Chris@0
|
67 $ua_langcodes[$langcode] = max(
|
Chris@0
|
68 (int) ($qvalue * 1000),
|
Chris@0
|
69 (isset($ua_langcodes[$langcode]) ? $ua_langcodes[$langcode] : 0)
|
Chris@0
|
70 );
|
Chris@0
|
71 }
|
Chris@0
|
72 }
|
Chris@0
|
73
|
Chris@0
|
74 // We should take pristine values from the HTTP headers, but Internet
|
Chris@0
|
75 // Explorer from version 7 sends only specific language tags (eg. fr-CA)
|
Chris@0
|
76 // without the corresponding generic tag (fr) unless explicitly configured.
|
Chris@0
|
77 // In that case, we assume that the lowest value of the specific tags is the
|
Chris@0
|
78 // value of the generic language to be as close to the HTTP 1.1 spec as
|
Chris@0
|
79 // possible.
|
Chris@0
|
80 // See http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4 and
|
Chris@0
|
81 // http://blogs.msdn.com/b/ie/archive/2006/10/17/accept-language-header-for-internet-explorer-7.aspx
|
Chris@0
|
82 asort($ua_langcodes);
|
Chris@0
|
83 foreach ($ua_langcodes as $langcode => $qvalue) {
|
Chris@0
|
84 // For Chinese languages the generic tag is either zh-hans or zh-hant, so
|
Chris@0
|
85 // we need to handle this separately, we can not split $langcode on the
|
Chris@0
|
86 // first occurrence of '-' otherwise we get a non-existing language zh.
|
Chris@0
|
87 // All other languages use a langcode without a '-', so we can safely
|
Chris@0
|
88 // split on the first occurrence of it.
|
Chris@0
|
89 if (strlen($langcode) > 7 && (substr($langcode, 0, 7) == 'zh-hant' || substr($langcode, 0, 7) == 'zh-hans')) {
|
Chris@0
|
90 $generic_tag = substr($langcode, 0, 7);
|
Chris@0
|
91 }
|
Chris@0
|
92 else {
|
Chris@0
|
93 $generic_tag = strtok($langcode, '-');
|
Chris@0
|
94 }
|
Chris@0
|
95 if (!empty($generic_tag) && !isset($ua_langcodes[$generic_tag])) {
|
Chris@0
|
96 // Add the generic langcode, but make sure it has a lower qvalue as the
|
Chris@0
|
97 // more specific one, so the more specific one gets selected if it's
|
Chris@0
|
98 // defined by both the user agent and us.
|
Chris@0
|
99 $ua_langcodes[$generic_tag] = $qvalue - 0.1;
|
Chris@0
|
100 }
|
Chris@0
|
101 }
|
Chris@0
|
102
|
Chris@0
|
103 // Find the added language with the greatest qvalue, following the rules
|
Chris@0
|
104 // of RFC 2616 (section 14.4). If several languages have the same qvalue,
|
Chris@0
|
105 // prefer the one with the greatest weight.
|
Chris@0
|
106 $best_match_langcode = FALSE;
|
Chris@0
|
107 $max_qvalue = 0;
|
Chris@0
|
108 foreach ($langcodes as $langcode_case_sensitive) {
|
Chris@0
|
109 // Language tags are case insensitive (RFC2616, sec 3.10).
|
Chris@0
|
110 $langcode = strtolower($langcode_case_sensitive);
|
Chris@0
|
111
|
Chris@0
|
112 // If nothing matches below, the default qvalue is the one of the wildcard
|
Chris@0
|
113 // language, if set, or is 0 (which will never match).
|
Chris@0
|
114 $qvalue = isset($ua_langcodes['*']) ? $ua_langcodes['*'] : 0;
|
Chris@0
|
115
|
Chris@0
|
116 // Find the longest possible prefix of the user agent supplied language
|
Chris@0
|
117 // ('the language-range') that matches this site language ('the language
|
Chris@0
|
118 // tag').
|
Chris@0
|
119 $prefix = $langcode;
|
Chris@0
|
120 do {
|
Chris@0
|
121 if (isset($ua_langcodes[$prefix])) {
|
Chris@0
|
122 $qvalue = $ua_langcodes[$prefix];
|
Chris@0
|
123 break;
|
Chris@0
|
124 }
|
Chris@0
|
125 } while ($prefix = substr($prefix, 0, strrpos($prefix, '-')));
|
Chris@0
|
126
|
Chris@0
|
127 // Find the best match.
|
Chris@0
|
128 if ($qvalue > $max_qvalue) {
|
Chris@0
|
129 $best_match_langcode = $langcode_case_sensitive;
|
Chris@0
|
130 $max_qvalue = $qvalue;
|
Chris@0
|
131 }
|
Chris@0
|
132 }
|
Chris@0
|
133
|
Chris@0
|
134 return $best_match_langcode;
|
Chris@0
|
135 }
|
Chris@0
|
136
|
Chris@0
|
137 }
|