-------------------------
-- UTF-8 helper functions
-------------------------

--
-- remove the last utf-8 character from a string,
-- taking into account multi-byte characters
--
function Utf8_RemoveLastCharacter(utf8String)
  local len = #utf8String
  if len <= 1 then
    return ""
  end
  
  -- Simple non-encoded character (no top bit set)
  if bit32.band(utf8String:byte(len), 0x80) == 0 then
    return string.sub(utf8String, 1, len - 1)
  end
  
  -- Trim off the 10xxxxxx bits indicating utf-8 encoding
  while len > 1 and bit32.band(utf8String:byte(len), 0xC0) == 0x80 do
    len = len - 1
  end
  
  -- Take one off for the final 11xxxxxx bits
  len = len - 1
  
  return len >= 1 and string.sub(utf8String, 1, len) or ""
end

--
-- get the number of characters in a utf-8 string
--
function Utf8_StrLen(utf8String)
  local strlen = 0
  
  for i = 1, #utf8String do
    if bit32.band(utf8String:byte(i), 0xC0) ~= 0x80 then
      strlen = strlen + 1
    end
  end
  
  return strlen
end

--
-- Converts a UCS2 (16bit) value to a utf-8 character
-- table for most code points (ignores U+D7FF to U+DFFF
-- or those above U+FFFF)
--
function Utf8_FromUcs2(ucs2Codepoint)
  local char_table = {}

  if ucs2Codepoint < 0x80 then
    table.insert(char_table, string.char(ucs2Codepoint))
    return table.concat(char_table)
  end
    
  if ucs2Codepoint >= 0x80 and ucs2Codepoint < 0x800 then
    local byte0 = bit32.bor(bit32.rshift(ucs2Codepoint, 6), 0xC0)
    local byte1 = bit32.bor(bit32.band(ucs2Codepoint, 0x3F), 0x80)
    table.insert(char_table, string.char(byte0))
    table.insert(char_table, string.char(byte1))
    return table.concat(char_table)
  end
    
  if ucs2Codepoint >= 0x800 and ucs2Codepoint < 0xFFFF then
    local byte0 = bit32.bor(bit32.rshift(ucs2Codepoint, 12), 0xE0)
    local byte1 = bit32.bor(bit32.band(bit32.rshift(ucs2Codepoint, 6), 0x3F), 0x80)
    local byte2 = bit32.bor(bit32.band(ucs2Codepoint, 0x3F), 0x80)
    table.insert(char_table, string.char(byte0))
    table.insert(char_table, string.char(byte1))
    table.insert(char_table, string.char(byte2))
    return table.concat(char_table)
  end
  
  return ""
end

--
-- This function converts a utf-8 value into a table
-- with its corresponding unicode codepoints (utf-32)
-- 
function Utf8_ToUcs2(utf8Codepoint) 
  local res, seq, val = {}, 0, nil
  for i = 1, #utf8Codepoint do
    local c = utf8Codepoint:byte(i)
    if seq == 0 then
      table.insert(res, val)
      seq = c < 0x80 and 1 or c < 0xE0 and 2 or c < 0xF0 and 3 or
            c < 0xF8 and 4 or c < 0xFC and 5 or c < 0xFE and 6 or
            error("invalid UTF-8 character sequence") -- keyboard does not generate invalid utf-8
      val = bit32.band(c, 2^(8-seq) - 1)
    else
      val = bit32.bor(bit32.lshift(val, 6), bit32.band(c, 0x3F))
    end
    seq = seq - 1
  end
  table.insert(res, val)
  table.insert(res, 0)
  return res
end
