require 'uri' module WaybackClassic class LegacyClientEncoding def self.detect(*args) new(*args) end attr_reader :utf8, :encoding_override def initialize(env=ENV) @utf8 = nil # TODO: Handle un-encoded values, somehow? query = URI.decode_www_form(env["QUERY_STRING"] || "").to_h unless query["utf8"].nil? || query["utf8"].empty? @utf8 = query["utf8"] query.delete("utf8") env["QUERY_STRING"] = URI.encode_www_form query end @encoding_override = if @utf8 != nil canary_bytes = @utf8.split('').map(&:ord) # Note: UTF-8 would be [0x2713] case canary_bytes # Safari forced to Shift_JIS mode when [0xfffd, 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3b], # Dream Passport 3 [0xfffd, 0x13] "Shift_JIS" # or GB 2312 # when [0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3b, # 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3b, # 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3b] # "ISO-2022-JP" # when [0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3b, # 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3b] # "EUC-JP" # or Big5, or Korean Windows # when [0x2704, 0x31, 0xfffd, 0x37] # "GB 18030" # or ISO Latin2 end end end def encode(value) return value unless @encoding_override value.encode(@encoding_override, undef: :replace).force_encoding("UTF-8") end def quotify(value) if @encoding_override "\"#{value}\"" else "“#{value}”" end end end end