# File lib/itex_stringsupport.rb, line 107 def is_utf8? #expand NCRs to utf-8 text = self.check_ncrs.as_bytes # You might think this is faster, but it isn't #pieces = self.split(/&#[xX]([a-fA-F0-9]+);/) #1.step(pieces.length-1, 2) {|i| pieces[i] = [pieces[i].hex].pack('U*')} #pieces = pieces.join.split(/&#(\d+);/) #1.step(pieces.length-1, 2) {|i| pieces[i] = [pieces[i].to_i].pack('U*')} #text = pieces.join #ensure the resulting string of bytes is valid utf-8 text =~ UTF8_REGEX end