Module | Addressable::IDNA |
In: |
lib/addressable/idna/pure.rb
lib/addressable/idna/native.rb |
UNICODE_TABLE | = | File.expand_path( File.join(File.dirname(__FILE__), '../../..', 'data/unicode.data') |
This module is loosely based on idn_actionmailer by Mick Staugaard, the
unicode library by Yoshida Masato, and the punycode implementation by
Kazuhiro Nishiyama. Most of the code was copied verbatim, but some
reformatting was done, and some translation from C was done.
Without their code to work from as a base, we‘d all still be relying on the presence of libidn. Which nobody ever seems to have installed. Original sources: github.com/staugaard/idn_actionmailer www.yoshidam.net/Ruby.html#unicode rubyforge.org/frs/?group_id=2550 |
|
ACE_PREFIX | = | "xn--" | ||
UTF8_REGEX | = | /\A(?: [\x09\x0A\x0D\x20-\x7E] # ASCII | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 | [\xF1-\xF3][\x80-\xBF]{3} # planes 4nil5 | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 )*\z/mnx | ||
UTF8_REGEX_MULTIBYTE | = | /(?: [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 | [\xF1-\xF3][\x80-\xBF]{3} # planes 4nil5 | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 )/mnx | ||
HANGUL_SBASE | = | 0xac00 | ||
HANGUL_LBASE | = | 0x1100 | ||
HANGUL_LCOUNT | = | 19 | ||
HANGUL_VBASE | = | 0x1161 | ||
HANGUL_VCOUNT | = | 21 | ||
HANGUL_TBASE | = | 0x11a7 | ||
HANGUL_TCOUNT | = | 28 | ||
HANGUL_NCOUNT | = | HANGUL_VCOUNT * HANGUL_TCOUNT # 588 | ||
HANGUL_SCOUNT | = | HANGUL_LCOUNT * HANGUL_NCOUNT # 11172 | ||
UNICODE_DATA_COMBINING_CLASS | = | 0 | ||
UNICODE_DATA_EXCLUSION | = | 1 | ||
UNICODE_DATA_CANONICAL | = | 2 | ||
UNICODE_DATA_COMPATIBILITY | = | 3 | ||
UNICODE_DATA_UPPERCASE | = | 4 | ||
UNICODE_DATA_LOWERCASE | = | 5 | ||
UNICODE_DATA_TITLECASE | = | 6 | ||
UNICODE_DATA | = | File.open(UNICODE_TABLE, "rb") do |file| Marshal.load(file.read) | This is a sparse Unicode table. Codepoints without entries are assumed to have the value: [0, 0, nil, nil, nil, nil, nil] |