21.02

2025-12-08 02:07:06 -06:00 · 2021-07-22 23:00:14 +01:00
parent 4a960640a3
commit 585698650f
619 changed files with 34904 additions and 10859 deletions
--- a/CPP/Common/CRC.cpp
+++ b/CPP/Common/CRC.cpp
@@ -4,4 +4,4 @@

 #include "../../C/7zCrc.h"

-struct CCRCTableInit { CCRCTableInit() { CrcGenerateTable(); } } g_CRCTableInit;
+static struct CCRCTableInit { CCRCTableInit() { CrcGenerateTable(); } } g_CRCTableInit;
--- a/CPP/Common/C_FileIO.cpp
+++ b/CPP/Common/C_FileIO.cpp
@@ -1,92 +1,3 @@
 // Common/C_FileIO.cpp

-#include "C_FileIO.h"
-
-#include <fcntl.h>
-#ifdef _WIN32
-#include <io.h>
-#else
-#include <unistd.h>
-#endif
-
-namespace NC {
-namespace NFile {
-namespace NIO {
-
-bool CFileBase::OpenBinary(const char *name, int flags)
-{
-  #ifdef O_BINARY
-  flags |= O_BINARY;
-  #endif
-  Close();
-  _handle = ::open(name, flags, 0666);
-  return _handle != -1;
-}
-
-bool CFileBase::Close()
-{
-  if (_handle == -1)
-    return true;
-  if (close(_handle) != 0)
-    return false;
-  _handle = -1;
-  return true;
-}
-
-bool CFileBase::GetLength(UInt64 &length) const
-{
-  off_t curPos = Seek(0, SEEK_CUR);
-  off_t lengthTemp = Seek(0, SEEK_END);
-  Seek(curPos, SEEK_SET);
-  length = (UInt64)lengthTemp;
-  return true;
-}
-
-off_t CFileBase::Seek(off_t distanceToMove, int moveMethod) const
-{
-  return ::lseek(_handle, distanceToMove, moveMethod);
-}
-
-/////////////////////////
-// CInFile
-
-bool CInFile::Open(const char *name)
-{
-  return CFileBase::OpenBinary(name, O_RDONLY);
-}
-
-bool CInFile::OpenShared(const char *name, bool)
-{
-  return Open(name);
-}
-
-ssize_t CInFile::Read(void *data, size_t size)
-{
-  return read(_handle, data, size);
-}
-
-/////////////////////////
-// COutFile
-
-bool COutFile::Create(const char *name, bool createAlways)
-{
-  if (createAlways)
-  {
-    Close();
-    _handle = ::creat(name, 0666);
-    return _handle != -1;
-  }
-  return OpenBinary(name, O_CREAT | O_EXCL | O_WRONLY);
-}
-
-bool COutFile::Open(const char *name, DWORD creationDisposition)
-{
-  return Create(name, false);
-}
-
-ssize_t COutFile::Write(const void *data, size_t size)
-{
-  return write(_handle, data, size);
-}
-
-}}}
+#include "StdAfx.h"
--- a/CPP/Common/C_FileIO.h
+++ b/CPP/Common/C_FileIO.h
@@ -3,51 +3,4 @@
 #ifndef __COMMON_C_FILEIO_H
 #define __COMMON_C_FILEIO_H

-#include <stdio.h>
-#include <sys/types.h>
-
-#include "MyTypes.h"
-#include "MyWindows.h"
-
-#ifdef _WIN32
-#ifdef _MSC_VER
-typedef size_t ssize_t;
-#endif
-#endif
-
-namespace NC {
-namespace NFile {
-namespace NIO {
-
-class CFileBase
-{
-protected:
-  int _handle;
-  bool OpenBinary(const char *name, int flags);
-public:
-  CFileBase(): _handle(-1) {};
-  ~CFileBase() { Close(); }
-  bool Close();
-  bool GetLength(UInt64 &length) const;
-  off_t Seek(off_t distanceToMove, int moveMethod) const;
-};
-
-class CInFile: public CFileBase
-{
-public:
-  bool Open(const char *name);
-  bool OpenShared(const char *name, bool shareForWrite);
-  ssize_t Read(void *data, size_t size);
-};
-
-class COutFile: public CFileBase
-{
-public:
-  bool Create(const char *name, bool createAlways);
-  bool Open(const char *name, DWORD creationDisposition);
-  ssize_t Write(const void *data, size_t size);
-};
-
-}}}
-
 #endif
--- a/CPP/Common/CommandLineParser.cpp
+++ b/CPP/Common/CommandLineParser.cpp
@@ -84,7 +84,7 @@ bool CParser::ParseString(const UString &s, const CSwitchForm *switchForms, unsi
    if (IsString1PrefixedByString2_NoCase_Ascii((const wchar_t *)s + pos, key))
    {
      switchIndex = i;
-      maxLen = switchLen;
+      maxLen = (int)switchLen;
    }
  }

@@ -94,7 +94,7 @@ bool CParser::ParseString(const UString &s, const CSwitchForm *switchForms, unsi
    return false;
  }

-  pos += maxLen;
+  pos += (unsigned)maxLen;
  
  CSwitchResult &sw = _switches[switchIndex];
  const CSwitchForm &form = switchForms[switchIndex];
@@ -107,7 +107,7 @@ bool CParser::ParseString(const UString &s, const CSwitchForm *switchForms, unsi

  sw.ThereIs = true;

-  int rem = s.Len() - pos;
+  const unsigned rem = s.Len() - pos;
  if (rem < form.MinLen)
  {
    ErrorMessage = "Too short switch:";
@@ -178,7 +178,7 @@ bool CParser::ParseStrings(const CSwitchForm *switchForms, unsigned numSwitches,
    {
      if (s.IsEqualTo(kStopSwitchParsing))
      {
-        StopSwitchIndex = NonSwitchStrings.Size();
+        StopSwitchIndex = (int)NonSwitchStrings.Size();
        continue;
      }
      if (!s.IsEmpty() && IsItSwitchChar(s[0]))
--- a/CPP/Common/CommandLineParser.h
+++ b/CPP/Common/CommandLineParser.h
@@ -38,7 +38,7 @@ struct CSwitchResult
  int PostCharIndex;
  UStringVector PostStrings;
  
-  CSwitchResult(): ThereIs(false) {};
+  CSwitchResult(): ThereIs(false) {}
 };
  
 class CParser
--- a/CPP/Common/Common.h
+++ b/CPP/Common/Common.h
@@ -40,4 +40,18 @@ you can change this h file or h files included in this file.
  #define MY_ARRAY_NEW(p, T, size) p = new T[size];
 #endif

+#if (defined(__GNUC__) && (__GNUC__ >= 8))
+  #define MY_ATTR_NORETURN __attribute__((noreturn))
+#elif (defined(__clang__) && (__clang_major__ >= 3))
+  #if __has_feature(cxx_attributes)
+    #define MY_ATTR_NORETURN [[noreturn]]
+  #else
+    #define MY_ATTR_NORETURN __attribute__ ((noreturn))
+  #endif
+#elif (defined(_MSC_VER) && (_MSC_VER >= 1900))
+  #define MY_ATTR_NORETURN [[noreturn]]
+#else
+  #define MY_ATTR_NORETURN
+#endif
+
 #endif
--- a/CPP/Common/CrcReg.cpp
+++ b/CPP/Common/CrcReg.cpp
@@ -16,8 +16,10 @@ typedef UInt32 (MY_FAST_CALL *CRC_FUNC)(UInt32 v, const void *data, size_t size,
 UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table);

 extern CRC_FUNC g_CrcUpdate;
-extern CRC_FUNC g_CrcUpdateT8;
 extern CRC_FUNC g_CrcUpdateT4;
+extern CRC_FUNC g_CrcUpdateT8;
+extern CRC_FUNC g_CrcUpdateT0_32;
+extern CRC_FUNC g_CrcUpdateT0_64;

 EXTERN_C_END

@@ -41,25 +43,20 @@ public:

 bool CCrcHasher::SetFunctions(UInt32 tSize)
 {
-  _updateFunc = g_CrcUpdate;
+  CRC_FUNC f = NULL;
+       if (tSize ==  0) f = g_CrcUpdate;
+  else if (tSize ==  1) f = CrcUpdateT1;
+  else if (tSize ==  4) f = g_CrcUpdateT4;
+  else if (tSize ==  8) f = g_CrcUpdateT8;
+  else if (tSize == 32) f = g_CrcUpdateT0_32;
+  else if (tSize == 64) f = g_CrcUpdateT0_64;
  
-  if (tSize == 1)
-    _updateFunc = CrcUpdateT1;
-  else if (tSize == 4)
+  if (!f)
  {
-    if (g_CrcUpdateT4)
-      _updateFunc = g_CrcUpdateT4;
-    else
-      return false;
+    _updateFunc = g_CrcUpdate;
+    return false;
  }
-  else if (tSize == 8)
-  {
-    if (g_CrcUpdateT8)
-      _updateFunc = g_CrcUpdateT8;
-    else
-      return false;
-  }
-  
+  _updateFunc = f;
  return true;
 }

--- a/CPP/Common/Defs.h
+++ b/CPP/Common/Defs.h
@@ -10,6 +10,7 @@ template <class T> inline int MyCompare(T a, T b)
  { return a == b ? 0 : (a < b ? -1 : 1); }

 inline int BoolToInt(bool v) { return (v ? 1 : 0); }
+inline unsigned BoolToUInt(bool v) { return (v ? (unsigned)1 : (unsigned)0); }
 inline bool IntToBool(int v) { return (v != 0); }

 #endif
--- a/CPP/Common/DynLimBuf.cpp
+++ b/CPP/Common/DynLimBuf.cpp
@@ -51,7 +51,7 @@ CDynLimBuf & CDynLimBuf::operator+=(char c) throw()
    _chars = newBuf;
    _size = n;
  }
-  _chars[_pos++] = c;
+  _chars[_pos++] = (Byte)c;
  return *this;
 }

--- a/CPP/Common/DynamicBuffer.h
+++ b/CPP/Common/DynamicBuffer.h
@@ -54,7 +54,7 @@ public:
    memcpy(GetCurPtrAndGrow(size), data, size * sizeof(T));
  }

-  const size_t GetPos() const { return _pos; }
+  size_t GetPos() const { return _pos; }

  // void Empty() { _pos = 0; }
 };
--- a/CPP/Common/IntToString.cpp
+++ b/CPP/Common/IntToString.cpp
@@ -10,20 +10,20 @@
  unsigned char temp[tempSize]; unsigned i = 0; \
  while (val >= 10) { temp[i++] = (unsigned char)('0' + (unsigned)(val % 10)); val /= 10; } \
  *s++ = (charType)('0' + (unsigned)val); \
-  while (i != 0) { i--; *s++ = temp[i]; } \
-  *s = 0;
+  while (i != 0) { i--; *s++ = (charType)temp[i]; } \
+  *s = 0; \
+  return s;

-void ConvertUInt32ToString(UInt32 val, char *s) throw()
+char * ConvertUInt32ToString(UInt32 val, char *s) throw()
 {
  CONVERT_INT_TO_STR(char, 16);
 }

-void ConvertUInt64ToString(UInt64 val, char *s) throw()
+char * ConvertUInt64ToString(UInt64 val, char *s) throw()
 {
  if (val <= (UInt32)0xFFFFFFFF)
  {
-    ConvertUInt32ToString((UInt32)val, s);
-    return;
+    return ConvertUInt32ToString((UInt32)val, s);
  }
  CONVERT_INT_TO_STR(char, 24);
 }
@@ -119,17 +119,16 @@ void ConvertUInt32ToHex8Digits(UInt32 val, wchar_t *s)
 }
 */

-void ConvertUInt32ToString(UInt32 val, wchar_t *s) throw()
+wchar_t * ConvertUInt32ToString(UInt32 val, wchar_t *s) throw()
 {
  CONVERT_INT_TO_STR(wchar_t, 16);
 }

-void ConvertUInt64ToString(UInt64 val, wchar_t *s) throw()
+wchar_t * ConvertUInt64ToString(UInt64 val, wchar_t *s) throw()
 {
  if (val <= (UInt32)0xFFFFFFFF)
  {
-    ConvertUInt32ToString((UInt32)val, s);
-    return;
+    return ConvertUInt32ToString((UInt32)val, s);
  }
  CONVERT_INT_TO_STR(wchar_t, 24);
 }
@@ -141,7 +140,7 @@ void ConvertInt64ToString(Int64 val, char *s) throw()
    *s++ = '-';
    val = -val;
  }
-  ConvertUInt64ToString(val, s);
+  ConvertUInt64ToString((UInt64)val, s);
 }

 void ConvertInt64ToString(Int64 val, wchar_t *s) throw()
@@ -151,7 +150,7 @@ void ConvertInt64ToString(Int64 val, wchar_t *s) throw()
    *s++ = L'-';
    val = -val;
  }
-  ConvertUInt64ToString(val, s);
+  ConvertUInt64ToString((UInt64)val, s);
 }


--- a/CPP/Common/IntToString.h
+++ b/CPP/Common/IntToString.h
@@ -5,11 +5,13 @@

 #include "MyTypes.h"

-void ConvertUInt32ToString(UInt32 value, char *s) throw();
-void ConvertUInt64ToString(UInt64 value, char *s) throw();
+// return: the pointer to the "terminating" null character after written characters

-void ConvertUInt32ToString(UInt32 value, wchar_t *s) throw();
-void ConvertUInt64ToString(UInt64 value, wchar_t *s) throw();
+char * ConvertUInt32ToString(UInt32 value, char *s) throw();
+char * ConvertUInt64ToString(UInt64 value, char *s) throw();
+
+wchar_t * ConvertUInt32ToString(UInt32 value, wchar_t *s) throw();
+wchar_t * ConvertUInt64ToString(UInt64 value, wchar_t *s) throw();

 void ConvertUInt64ToOct(UInt64 value, char *s) throw();

--- a/CPP/Common/Lang.cpp
+++ b/CPP/Common/Lang.cpp
@@ -31,7 +31,7 @@ bool CLang::OpenFromString(const AString &s2)

  for (const char *p = kLangSignature;; i++)
  {
-    Byte c = *p++;
+    Byte c = (Byte)(*p++);
    if (c == 0)
      break;
    if (s[i] != c)
@@ -122,10 +122,10 @@ bool CLang::Open(CFSTR fileName, const char *id)
    return false;
  
  AString s;
-  unsigned len = (unsigned)length;
+  const unsigned len = (unsigned)length;
  char *p = s.GetBuf(len);
-  UInt32 processed;
-  if (!file.Read(p, len, processed))
+  size_t processed;
+  if (!file.ReadFull(p, len, processed))
    return false;
  file.Close();
  if (len != processed)
@@ -159,5 +159,5 @@ const wchar_t *CLang::Get(UInt32 id) const throw()
  int index = _ids.FindInSorted(id);
  if (index < 0)
    return NULL;
-  return _text + (size_t)_offsets[index];
+  return _text + (size_t)_offsets[(unsigned)index];
 }
--- a/CPP/Common/ListFileUtils.cpp
+++ b/CPP/Common/ListFileUtils.cpp
@@ -4,14 +4,19 @@

 #include "../../C/CpuArch.h"

-#include "../Windows/FileIO.h"
-
 #include "ListFileUtils.h"
 #include "MyBuffer.h"
 #include "StringConvert.h"
 #include "UTFConvert.h"

-static const char kQuoteChar = '\"';
+#include "../Windows/FileIO.h"
+
+#define CSysInFile NWindows::NFile::NIO::CInFile
+#define MY_GET_LAST_ERROR ::GetLastError()
+
+
+#define kQuoteChar '\"'
+

 static void AddName(UStringVector &strings, UString &s)
 {
@@ -25,19 +30,37 @@ static void AddName(UStringVector &strings, UString &s)
    strings.Add(s);
 }

+
+static bool My_File_Read(CSysInFile &file, void *data, size_t size, DWORD &lastError)
+{
+  size_t processed;
+  if (!file.ReadFull(data, size, processed))
+  {
+    lastError = MY_GET_LAST_ERROR;
+    return false;
+  }
+  if (processed != size)
+  {
+    lastError = 1; // error: size of listfile was changed
+    return false;
+  }
+  return true;
+}
+
+
 bool ReadNamesFromListFile2(CFSTR fileName, UStringVector &strings, UINT codePage, DWORD &lastError)
 {
  lastError = 0;
-  NWindows::NFile::NIO::CInFile file;
+  CSysInFile file;
  if (!file.Open(fileName))
  {
-    lastError = ::GetLastError();
+    lastError = MY_GET_LAST_ERROR;
    return false;
  }
  UInt64 fileSize;
  if (!file.GetLength(fileSize))
  {
-    lastError = ::GetLastError();
+    lastError = MY_GET_LAST_ERROR;
    return false;
  }
  if (fileSize >= ((UInt32)1 << 31) - 32)
@@ -48,16 +71,12 @@ bool ReadNamesFromListFile2(CFSTR fileName, UStringVector &strings, UINT codePag
    if ((fileSize & 1) != 0)
      return false;
    CByteArr buf((size_t)fileSize);
-    UInt32 processed;
-    if (!file.Read(buf, (UInt32)fileSize, processed))
-    {
-      lastError = ::GetLastError();
-      return false;
-    }
-    if (processed != fileSize)
+
+    if (!My_File_Read(file, buf, (size_t)fileSize, lastError))
      return false;
+
    file.Close();
-    unsigned num = (unsigned)fileSize / 2;
+    const unsigned num = (unsigned)fileSize / 2;
    wchar_t *p = u.GetBuf(num);
    if (codePage == MY__CP_UTF16)
      for (unsigned i = 0; i < num; i++)
@@ -82,22 +101,21 @@ bool ReadNamesFromListFile2(CFSTR fileName, UStringVector &strings, UINT codePag
  {
    AString s;
    char *p = s.GetBuf((unsigned)fileSize);
-    UInt32 processed;
-    if (!file.Read(p, (UInt32)fileSize, processed))
-    {
-      lastError = ::GetLastError();
-      return false;
-    }
-    if (processed != fileSize)
+
+    if (!My_File_Read(file, p, (size_t)fileSize, lastError))
      return false;
+
    file.Close();
-    s.ReleaseBuf_CalcLen((unsigned)processed);
-    if (s.Len() != processed)
+    s.ReleaseBuf_CalcLen((unsigned)fileSize);
+    if (s.Len() != fileSize)
      return false;
    
    // #ifdef CP_UTF8
    if (codePage == CP_UTF8)
    {
+      // we must check UTF8 here, if convert function doesn't check
+      if (!CheckUTF8_AString(s))
+        return false;
      if (!ConvertUTF8ToUnicode(s, u))
        return false;
    }
--- a/CPP/Common/MyBuffer.h
+++ b/CPP/Common/MyBuffer.h
@@ -4,6 +4,7 @@
 #define __COMMON_MY_BUFFER_H

 #include "Defs.h"
+#include "MyTypes.h"

 /* 7-Zip now uses CBuffer only as CByteBuffer.
   So there is no need to use MY_ARRAY_NEW macro in CBuffer code. */
@@ -91,6 +92,12 @@ public:
    _size = newSize;
  }

+  void Wipe()
+  {
+    if (_size != 0)
+      memset(_items, 0, _size * sizeof(T));
+  }
+
  CBuffer& operator=(const CBuffer &buffer)
  {
    if (&buffer != this)
@@ -127,6 +134,17 @@ bool operator!=(const CBuffer<T>& b1, const CBuffer<T>& b2)
 typedef CBuffer<unsigned char> CByteBuffer;


+class CByteBuffer_Wipe: public CByteBuffer
+{
+  CLASS_NO_COPY(CByteBuffer_Wipe)
+public:
+  // CByteBuffer_Wipe(): CBuffer<unsigned char>() {}
+  CByteBuffer_Wipe(size_t size): CBuffer<unsigned char>(size) {}
+  ~CByteBuffer_Wipe() { Wipe(); }
+};
+
+
+
 template <class T> class CObjArray
 {
 protected:
--- a/CPP/Common/MyBuffer2.h
+++ b/CPP/Common/MyBuffer2.h
@@ -57,6 +57,15 @@ public:
    ISzAlloc_Free(&g_AlignedAlloc, _data);
  }

+  CAlignedBuffer(size_t size): _size(0)
+  {
+    _data = NULL;
+    _data = (Byte *)ISzAlloc_Alloc(&g_AlignedAlloc, size);
+    if (!_data)
+      throw 1;
+    _size = size;
+  }
+
  void Free()
  {
    ISzAlloc_Free(&g_AlignedAlloc, _data);
--- a/CPP/Common/MyCom.h
+++ b/CPP/Common/MyCom.h
@@ -4,6 +4,7 @@
 #define __MY_COM_H

 #include "MyWindows.h"
+#include "MyTypes.h"

 #ifndef RINOK
 #define RINOK(x) { HRESULT __result_ = (x); if (__result_ != S_OK) return __result_; }
@@ -81,7 +82,7 @@ inline HRESULT StringToBstr(LPCOLESTR src, BSTR *bstr)
 class CMyComBSTR
 {
  BSTR m_str;
-
+  CLASS_NO_COPY(CMyComBSTR)
 public:
  CMyComBSTR(): m_str(NULL) {}
  ~CMyComBSTR() { ::SysFreeString(m_str); }
@@ -89,13 +90,23 @@ public:
  operator LPCOLESTR() const { return m_str; }
  // operator bool() const { return m_str != NULL; }
  // bool operator!() const { return m_str == NULL; }
+
+  void Wipe_and_Free()
+  {
+    if (m_str)
+    {
+      memset(m_str, 0, ::SysStringLen(m_str) * sizeof(*m_str));
+      Empty();
+    }
+  }
+
 private:
  // operator BSTR() const { return m_str; }

  CMyComBSTR(LPCOLESTR src) { m_str = ::SysAllocString(src); }
  // CMyComBSTR(int nSize) { m_str = ::SysAllocStringLen(NULL, nSize); }
  // CMyComBSTR(int nSize, LPCOLESTR sz) { m_str = ::SysAllocStringLen(sz, nSize);  }
-  CMyComBSTR(const CMyComBSTR& src) { m_str = src.MyCopy(); }
+  // CMyComBSTR(const CMyComBSTR& src) { m_str = src.MyCopy(); }
  
  /*
  CMyComBSTR(REFGUID src)
@@ -107,6 +118,7 @@ private:
  }
  */
  
+  /*
  CMyComBSTR& operator=(const CMyComBSTR& src)
  {
    if (m_str != src.m_str)
@@ -117,6 +129,7 @@ private:
    }
    return *this;
  }
+  */
  
  CMyComBSTR& operator=(LPCOLESTR src)
  {
@@ -158,6 +171,15 @@ private:
 };


+class CMyComBSTR_Wipe: public CMyComBSTR
+{
+  CLASS_NO_COPY(CMyComBSTR_Wipe)
+public:
+  CMyComBSTR_Wipe(): CMyComBSTR() {}
+  ~CMyComBSTR_Wipe() { Wipe_and_Free(); }
+};
+
+

 /*
  If CMyUnknownImp doesn't use virtual destructor, the code size is smaller.
@@ -168,17 +190,24 @@ private:
      virtual ~class_1();
    In that case, class_1::Release() calls correct destructor of class_2.

-  Also you can use virtual ~CMyUnknownImp(), if you want to disable warning
+  We use virtual ~CMyUnknownImp() to disable warning
    "class has virtual functions, but destructor is not virtual".
+
+  also we can use virtual ~IUnknown() {} in MyWindows.h
 */

 class CMyUnknownImp
 {
+  CLASS_NO_COPY(CMyUnknownImp)
 public:
  ULONG __m_RefCount;
  CMyUnknownImp(): __m_RefCount(0) {}

-  // virtual
+  #ifdef _WIN32
+  #if defined(__GNUC__) || defined(__clang__)
+  virtual // to disable GCC/CLANG varnings
+  #endif
+  #endif
  ~CMyUnknownImp() {}
 };

--- a/CPP/Common/MyGuidDef.h
+++ b/CPP/Common/MyGuidDef.h
@@ -18,6 +18,9 @@ typedef struct {
 #define REFGUID const GUID *
 #endif

+// typedef GUID IID;
+typedef GUID CLSID;
+
 #define REFCLSID REFGUID
 #define REFIID REFGUID

--- a/CPP/Common/MyInitGuid.h
+++ b/CPP/Common/MyInitGuid.h
@@ -19,13 +19,17 @@ Also we need IID_IUnknown that is initialized in some file for linking:
  Other: we define IID_IUnknown in this file
 */

+#ifdef __clang__
+  #pragma clang diagnostic ignored "-Wmissing-variable-declarations"
+#endif
+
 #ifdef _WIN32

 #ifdef UNDER_CE
 #include <basetyps.h>
 #endif

-#include <initguid.h>
+#include <InitGuid.h>

 #ifdef UNDER_CE
 DEFINE_GUID(IID_IUnknown,
--- a/CPP/Common/MyString.cpp
+++ b/CPP/Common/MyString.cpp
@@ -237,11 +237,25 @@ bool UString::IsPrefixedBy_Ascii_NoCase(const char *s) const throw()
  }
 }

+bool StringsAreEqual_Ascii(const char *u, const char *a) throw()
+{
+  for (;;)
+  {
+    char c = *a;
+    if (c != *u)
+      return false;
+    if (c == 0)
+      return true;
+    a++;
+    u++;
+  }
+}
+
 bool StringsAreEqual_Ascii(const wchar_t *u, const char *a) throw()
 {
  for (;;)
  {
-    unsigned char c = *a;
+    unsigned char c = (unsigned char)*a;
    if (c != *u)
      return false;
    if (c == 0)
@@ -632,9 +646,8 @@ AString &AString::operator+=(const AString &s)

 void AString::Add_UInt32(UInt32 v)
 {
-  char sz[16];
-  ConvertUInt32ToString(v, sz);
-  (*this) += sz;
+  Grow(10);
+  _len = (unsigned)(ConvertUInt32ToString(v, _chars + _len) - _chars);
 }

 void AString::SetFrom(const char *s, unsigned len) // no check
@@ -835,7 +848,7 @@ void AString::Replace(char oldChar, char newChar) throw()
  char *chars = _chars;
  while ((unsigned)pos < _len)
  {
-    pos = Find(oldChar, pos);
+    pos = Find(oldChar, (unsigned)pos);
    if (pos < 0)
      break;
    chars[(unsigned)pos] = newChar;
@@ -857,11 +870,11 @@ void AString::Replace(const AString &oldString, const AString &newString)
  int pos = 0;
  while ((unsigned)pos < _len)
  {
-    pos = Find(oldString, pos);
+    pos = Find(oldString, (unsigned)pos);
    if (pos < 0)
      break;
-    Delete(pos, oldLen);
-    Insert(pos, newString);
+    Delete((unsigned)pos, oldLen);
+    Insert((unsigned)pos, newString);
    pos += newLen;
    // number++;
  }
@@ -1150,9 +1163,31 @@ void UString::SetFrom(const wchar_t *s, unsigned len) // no check
  _len = len;
 }

-void UString::SetFromBstr(BSTR s)
+void UString::SetFromBstr(LPCOLESTR s)
 {
-  unsigned len = ::SysStringLen(s);
+  unsigned len = ::SysStringLen((BSTR)(void *)(s));
+
+  /*
+  #if WCHAR_MAX > 0xffff
+  size_t num_wchars = 0;
+  for (size_t i = 0; i < len;)
+  {
+    wchar_t c = s[i++];
+    if (c >= 0xd800 && c < 0xdc00 && i + 1 != len)
+    {
+      wchar_t c2 = s[i];
+      if (c2 >= 0xdc00 && c2 < 0x10000)
+      {
+        c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff);
+        i++;
+      }
+    }
+    num_wchars++;
+  }
+  len = num_wchars;
+  #endif
+  */
+
  if (len > _limit)
  {
    wchar_t *newBuf = MY_STRING_NEW_wchar_t(len + 1);
@@ -1161,8 +1196,33 @@ void UString::SetFromBstr(BSTR s)
    _limit = len;
  }
  _len = len;
+
+  /*
+  #if WCHAR_MAX > 0xffff
+
+  wchar_t *chars = _chars;
+  for (size_t i = 0; i <= len; i++)
+  {
+    wchar_t c = *s++;
+    if (c >= 0xd800 && c < 0xdc00 && i + 1 != len)
+    {
+      wchar_t c2 = *s;
+      if (c2 >= 0xdc00 && c2 < 0x10000)
+      {
+        s++;
+        c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff);
+      }
+    }
+    chars[i] = c;
+  }
+
+  #else
+  */
+
  // if (s)
    wmemcpy(_chars, s, len + 1);
+  
+  // #endif
 }

 UString &UString::operator=(const char *s)
@@ -1229,9 +1289,8 @@ UString &UString::operator+=(const char *s)

 void UString::Add_UInt32(UInt32 v)
 {
-  char sz[16];
-  ConvertUInt32ToString(v, sz);
-  (*this) += sz;
+  Grow(10);
+  _len = (unsigned)(ConvertUInt32ToString(v, _chars + _len) - _chars);
 }


@@ -1341,7 +1400,7 @@ void UString::InsertAtFront(wchar_t c)
 }

 /*
-void UString::Insert(unsigned index, wchar_t c)
+void UString::Insert_wchar_t(unsigned index, wchar_t c)
 {
  InsertSpace(index, 1);
  _chars[index] = c;
@@ -1409,7 +1468,7 @@ void UString::Replace(wchar_t oldChar, wchar_t newChar) throw()
  wchar_t *chars = _chars;
  while ((unsigned)pos < _len)
  {
-    pos = Find(oldChar, pos);
+    pos = Find(oldChar, (unsigned)pos);
    if (pos < 0)
      break;
    chars[(unsigned)pos] = newChar;
@@ -1431,11 +1490,11 @@ void UString::Replace(const UString &oldString, const UString &newString)
  int pos = 0;
  while ((unsigned)pos < _len)
  {
-    pos = Find(oldString, pos);
+    pos = Find(oldString, (unsigned)pos);
    if (pos < 0)
      break;
-    Delete(pos, oldLen);
-    Insert(pos, newString);
+    Delete((unsigned)pos, oldLen);
+    Insert((unsigned)pos, newString);
    pos += newLen;
    // number++;
  }
@@ -1609,6 +1668,8 @@ int MyStringCompareNoCase(const char *s1, const char *s2)
 }
 */

+#if !defined(USE_UNICODE_FSTRING) || !defined(_UNICODE)
+
 static inline UINT GetCurrentCodePage()
 {
  #if defined(UNDER_CE) || !defined(_WIN32)
@@ -1618,6 +1679,8 @@ static inline UINT GetCurrentCodePage()
  #endif
 }

+#endif
+
 #ifdef USE_UNICODE_FSTRING

 #ifndef _UNICODE
@@ -1637,9 +1700,9 @@ FString fas2fs(const AString &s)
  return MultiByteToUnicodeString(s, GetCurrentCodePage());
 }

-#endif
+#endif //  _UNICODE

-#else
+#else // USE_UNICODE_FSTRING

 UString fs2us(const FChar *s)
 {
@@ -1656,4 +1719,4 @@ FString us2fs(const wchar_t *s)
  return UnicodeStringToMultiByte(s, GetCurrentCodePage());
 }

-#endif
+#endif // USE_UNICODE_FSTRING
--- a/CPP/Common/MyString.h
+++ b/CPP/Common/MyString.h
@@ -159,7 +159,7 @@ inline wchar_t MyCharUpper(wchar_t c) throw()
      return (wchar_t)MyCharUpper_WIN(c);
    #endif
  #else
-    return (wchar_t)towupper(c);
+    return (wchar_t)towupper((wint_t)c);
  #endif
 }

@@ -207,6 +207,7 @@ int MyStringCompareNoCase(const wchar_t *s1, const wchar_t *s2) throw();

 // ---------- ASCII ----------
 // char values in ASCII strings must be less then 128
+bool StringsAreEqual_Ascii(const char *u, const char *a) throw();
 bool StringsAreEqual_Ascii(const wchar_t *u, const char *a) throw();
 bool StringsAreEqualNoCase_Ascii(const char *s1, const char *s2) throw();
 bool StringsAreEqualNoCase_Ascii(const wchar_t *s1, const char *s2) throw();
@@ -231,7 +232,7 @@ bool StringsAreEqualNoCase_Ascii(const wchar_t *s1, const wchar_t *s2) throw();
  cls &operator=(const t *); \
  cls &operator+=(t); \
  cls &operator+=(const t *); \
-  FORBID_STRING_OPS_2(cls, t); \
+  FORBID_STRING_OPS_2(cls, t) \

 /*
  cls &operator+(t); \
@@ -266,7 +267,7 @@ class AString
  AString(const AString &s, char c); // it's for String + char
  AString(const char *s1, unsigned num1, const char *s2, unsigned num2);

-  friend AString operator+(const AString &s, char c) { return AString(s, c); } ;
+  friend AString operator+(const AString &s, char c) { return AString(s, c); }
  // friend AString operator+(char c, const AString &s); // is not supported

  friend AString operator+(const AString &s1, const AString &s2);
@@ -300,6 +301,7 @@ public:
  void Empty() { _len = 0; _chars[0] = 0; }

  operator const char *() const { return _chars; }
+  char *Ptr_non_const() const { return _chars; }
  const char *Ptr() const { return _chars; }
  const char *Ptr(unsigned pos) const { return _chars + pos; }
  const char *RightPtr(unsigned num) const { return _chars + _len - num; }
@@ -438,8 +440,30 @@ public:
      _chars[index] = 0;
    }
  }
+  
+  void Wipe_and_Empty()
+  {
+    if (_chars)
+    {
+      memset(_chars, 0, (_limit + 1) * sizeof(*_chars));
+      _len = 0;
+    }
+  }
 };

+
+class AString_Wipe: public AString
+{
+  CLASS_NO_COPY(AString_Wipe)
+public:
+  AString_Wipe(): AString() {}
+  // AString_Wipe(const AString &s): AString(s) {}
+  // AString_Wipe &operator=(const AString &s) { AString::operator=(s); return *this; }
+  // AString_Wipe &operator=(const char *s) { AString::operator=(s); return *this; }
+  ~AString_Wipe() { Wipe_and_Empty(); }
+};
+
+
 bool operator<(const AString &s1, const AString &s2);
 bool operator>(const AString &s1, const AString &s2);

@@ -500,7 +524,7 @@ class UString
  UString(const UString &s, wchar_t c); // it's for String + char
  UString(const wchar_t *s1, unsigned num1, const wchar_t *s2, unsigned num2);

-  friend UString operator+(const UString &s, wchar_t c) { return UString(s, c); } ;
+  friend UString operator+(const UString &s, wchar_t c) { return UString(s, c); }
  // friend UString operator+(wchar_t c, const UString &s); // is not supported

  friend UString operator+(const UString &s1, const UString &s2);
@@ -539,6 +563,7 @@ public:
  void Empty() { _len = 0; _chars[0] = 0; }

  operator const wchar_t *() const { return _chars; }
+  wchar_t *Ptr_non_const() const { return _chars; }
  const wchar_t *Ptr() const { return _chars; }
  const wchar_t *Ptr(unsigned pos) const { return _chars + pos; }
  const wchar_t *RightPtr(unsigned num) const { return _chars + _len - num; }
@@ -578,7 +603,7 @@ public:
  UString &operator=(const wchar_t *s);
  UString &operator=(const UString &s);
  void SetFrom(const wchar_t *s, unsigned len); // no check
-  void SetFromBstr(BSTR s);
+  void SetFromBstr(LPCOLESTR s);
  UString &operator=(const char *s);
  UString &operator=(const AString &s) { return operator=(s.Ptr()); }

@@ -659,7 +684,7 @@ public:
  }

  void InsertAtFront(wchar_t c);
-  // void Insert(unsigned index, wchar_t c);
+  // void Insert_wchar_t(unsigned index, wchar_t c);
  void Insert(unsigned index, const wchar_t *s);
  void Insert(unsigned index, const UString &s);

@@ -680,8 +705,30 @@ public:
      _chars[index] = 0;
    }
  }
+  
+  void Wipe_and_Empty()
+  {
+    if (_chars)
+    {
+      memset(_chars, 0, (_limit + 1) * sizeof(*_chars));
+      _len = 0;
+    }
+  }
 };

+
+class UString_Wipe: public UString
+{
+  CLASS_NO_COPY(UString_Wipe)
+public:
+  UString_Wipe(): UString() {}
+  // UString_Wipe(const UString &s): UString(s) {}
+  // UString_Wipe &operator=(const UString &s) { UString::operator=(s); return *this; }
+  // UString_Wipe &operator=(const wchar_t *s) { UString::operator=(s); return *this; }
+  ~UString_Wipe() { Wipe_and_Empty(); }
+};
+
+
 bool operator<(const UString &s1, const UString &s2);
 bool operator>(const UString &s1, const UString &s2);

@@ -866,3 +913,20 @@ typedef const FChar *CFSTR;
 typedef CObjectVector<FString> FStringVector;

 #endif
+
+
+
+#if defined(_WIN32)
+  // #include <wchar.h>
+  // WCHAR_MAX is defined as ((wchar_t)-1)
+  #define _WCHART_IS_16BIT 1
+#elif (defined(WCHAR_MAX) && (WCHAR_MAX <= 0xffff)) \
+   || (defined(__SIZEOF_WCHAR_T__) && (__SIZEOF_WCHAR_T__ == 2))
+  #define _WCHART_IS_16BIT 1
+#endif
+
+#if WCHAR_PATH_SEPARATOR == L'\\'
+// WSL scheme
+#define WCHAR_IN_FILE_NAME_BACKSLASH_REPLACEMENT  ((wchar_t)((unsigned)(0xF000) + (unsigned)'\\'))
+// #define WCHAR_IN_FILE_NAME_BACKSLASH_REPLACEMENT  '_'
+#endif
--- a/CPP/Common/MyTypes.h
+++ b/CPP/Common/MyTypes.h
@@ -32,4 +32,15 @@ struct CBoolPair
  cls(const cls &); \
  cls &operator=(const cls &);

+class CUncopyable
+{
+protected:
+  CUncopyable() {} // allow constructor
+  // ~CUncopyable() {}
+CLASS_NO_COPY(CUncopyable)
+};
+
+#define MY_UNCOPYABLE  :private CUncopyable
+// #define MY_UNCOPYABLE
+
 #endif
--- a/CPP/Common/MyVector.h
+++ b/CPP/Common/MyVector.h
@@ -35,7 +35,7 @@ class CRecordVector

 public:

-  CRecordVector(): _items(0), _size(0), _capacity(0) {}
+  CRecordVector(): _items(NULL), _size(0), _capacity(0) {}
  
  CRecordVector(const CRecordVector &v): _items(0), _size(0), _capacity(0)
  {
@@ -257,7 +257,7 @@ public:
      unsigned mid = (left + right) / 2;
      const T midVal = (*this)[mid];
      if (item == midVal)
-        return mid;
+        return (int)mid;
      if (item < midVal)
        right = mid;
      else
@@ -274,7 +274,7 @@ public:
      const T& midVal = (*this)[mid];
      int comp = item.Compare(midVal);
      if (comp == 0)
-        return mid;
+        return (int)mid;
      if (comp < 0)
        right = mid;
      else
@@ -428,7 +428,7 @@ public:
  // void Reserve(unsigned newCapacity) { _v.Reserve(newCapacity); }
  void ClearAndReserve(unsigned newCapacity) { Clear(); _v.ClearAndReserve(newCapacity); }

-  CObjectVector() {};
+  CObjectVector() {}
  CObjectVector(const CObjectVector &v)
  {
    unsigned size = v.Size();
@@ -568,7 +568,7 @@ public:
      const T& midVal = (*this)[mid];
      int comp = item.Compare(midVal);
      if (comp == 0)
-        return mid;
+        return (int)mid;
      if (comp < 0)
        right = mid;
      else
@@ -624,9 +624,9 @@ public:
    { _v.Sort(compare, param); }

  static int CompareObjectItems(void *const *a1, void *const *a2, void * /* param */)
-    { return (*(*((const T **)a1))).Compare(*(*((const T **)a2))); }
+    { return (*(*((const T *const *)a1))).Compare(*(*((const T *const *)a2))); }

-  void Sort() { _v.Sort(CompareObjectItems, 0); }
+  void Sort() { _v.Sort(CompareObjectItems, NULL); }
 };

 #define FOR_VECTOR(_i_, _v_) for (unsigned _i_ = 0; _i_ < (_v_).Size(); _i_++)
--- a/CPP/Common/MyWindows.cpp
+++ b/CPP/Common/MyWindows.cpp
@@ -5,6 +5,10 @@
 #ifndef _WIN32

 #include <stdlib.h>
+#include <time.h>
+#ifdef __GNUC__
+#include <sys/time.h>
+#endif

 #include "MyWindows.h"

@@ -38,11 +42,11 @@ BSTR SysAllocStringByteLen(LPCSTR s, UINT len)
  /* Original SysAllocStringByteLen in Win32 maybe fills only unaligned null OLECHAR at the end.
     We provide also aligned null OLECHAR at the end. */

-  if (len >= (k_BstrSize_Max - sizeof(OLECHAR) - sizeof(OLECHAR) - sizeof(CBstrSizeType)))
+  if (len >= (k_BstrSize_Max - (UINT)sizeof(OLECHAR) - (UINT)sizeof(OLECHAR) - (UINT)sizeof(CBstrSizeType)))
    return NULL;

-  UINT size = (len + sizeof(OLECHAR) + sizeof(OLECHAR) - 1) & ~(sizeof(OLECHAR) - 1);
-  void *p = AllocateForBSTR(size + sizeof(CBstrSizeType));
+  UINT size = (len + (UINT)sizeof(OLECHAR) + (UINT)sizeof(OLECHAR) - 1) & ~((UINT)sizeof(OLECHAR) - 1);
+  void *p = AllocateForBSTR(size + (UINT)sizeof(CBstrSizeType));
  if (!p)
    return NULL;
  *(CBstrSizeType *)p = (CBstrSizeType)len;
@@ -56,11 +60,11 @@ BSTR SysAllocStringByteLen(LPCSTR s, UINT len)

 BSTR SysAllocStringLen(const OLECHAR *s, UINT len)
 {
-  if (len >= (k_BstrSize_Max - sizeof(OLECHAR) - sizeof(CBstrSizeType)) / sizeof(OLECHAR))
+  if (len >= (k_BstrSize_Max - (UINT)sizeof(OLECHAR) - (UINT)sizeof(CBstrSizeType)) / (UINT)sizeof(OLECHAR))
    return NULL;

-  UINT size = len * sizeof(OLECHAR);
-  void *p = AllocateForBSTR(size + sizeof(CBstrSizeType) + sizeof(OLECHAR));
+  UINT size = len * (UINT)sizeof(OLECHAR);
+  void *p = AllocateForBSTR(size + (UINT)sizeof(CBstrSizeType) + (UINT)sizeof(OLECHAR));
  if (!p)
    return NULL;
  *(CBstrSizeType *)p = (CBstrSizeType)size;
@@ -98,7 +102,7 @@ UINT SysStringLen(BSTR bstr)
 {
  if (!bstr)
    return 0;
-  return *((CBstrSizeType *)bstr - 1) / sizeof(OLECHAR);
+  return *((CBstrSizeType *)bstr - 1) / (UINT)sizeof(OLECHAR);
 }


@@ -139,7 +143,150 @@ LONG CompareFileTime(const FILETIME* ft1, const FILETIME* ft2)

 DWORD GetLastError()
 {
-  return 0;
+  return (DWORD)errno;
+}
+
+void SetLastError(DWORD dw)
+{
+  errno = (int)dw;
+}
+
+
+static LONG TIME_GetBias()
+{
+  time_t utc = time(NULL);
+  struct tm *ptm = localtime(&utc);
+  int localdaylight = ptm->tm_isdst; /* daylight for local timezone */
+  ptm = gmtime(&utc);
+  ptm->tm_isdst = localdaylight; /* use local daylight, not that of Greenwich */
+  LONG bias = (int)(mktime(ptm)-utc);
+  return bias;
+}
+
+#define TICKS_PER_SEC 10000000
+/*
+#define SECS_PER_DAY (24 * 60 * 60)
+#define SECS_1601_TO_1970  ((369 * 365 + 89) * (UInt64)SECS_PER_DAY)
+#define TICKS_1601_TO_1970 (SECS_1601_TO_1970 * TICKS_PER_SEC)
+*/
+
+#define GET_TIME_64(pft) ((pft)->dwLowDateTime | ((UInt64)(pft)->dwHighDateTime << 32))
+
+#define SET_FILETIME(ft, v64) \
+   (ft)->dwLowDateTime = (DWORD)v64; \
+   (ft)->dwHighDateTime = (DWORD)(v64 >> 32);
+
+
+BOOL WINAPI FileTimeToLocalFileTime(const FILETIME *fileTime, FILETIME *localFileTime)
+{
+  UInt64 v = GET_TIME_64(fileTime);
+  v = (UInt64)((Int64)v - (Int64)TIME_GetBias() * TICKS_PER_SEC);
+  SET_FILETIME(localFileTime, v);
+  return TRUE;
+}
+
+BOOL WINAPI LocalFileTimeToFileTime(const FILETIME *localFileTime, FILETIME *fileTime)
+{
+  UInt64 v = GET_TIME_64(localFileTime);
+  v = (UInt64)((Int64)v + (Int64)TIME_GetBias() * TICKS_PER_SEC);
+  SET_FILETIME(fileTime, v);
+  return TRUE;
+}
+
+/*
+VOID WINAPI GetSystemTimeAsFileTime(FILETIME *ft)
+{
+  UInt64 t = 0;
+  timeval tv;
+  if (gettimeofday(&tv, NULL) == 0)
+  {
+    t = tv.tv_sec * (UInt64)TICKS_PER_SEC + TICKS_1601_TO_1970;
+    t += tv.tv_usec * 10;
+  }
+  SET_FILETIME(ft, t);
+}
+*/
+
+DWORD WINAPI GetTickCount(VOID)
+{
+  #ifndef _WIN32
+  // gettimeofday() doesn't work in some MINGWs by unknown reason
+  timeval tv;
+  if (gettimeofday(&tv, NULL) == 0)
+  {
+    // tv_sec and tv_usec are (long)
+    return (DWORD)((UInt64)(Int64)tv.tv_sec * (UInt64)1000 + (UInt64)(Int64)tv.tv_usec / 1000);
+  }
+  #endif
+  return (DWORD)time(NULL) * 1000;
+}
+
+
+#define PERIOD_4 (4 * 365 + 1)
+#define PERIOD_100 (PERIOD_4 * 25 - 1)
+#define PERIOD_400 (PERIOD_100 * 4 + 1)
+
+BOOL WINAPI FileTimeToSystemTime(const FILETIME *ft, SYSTEMTIME *st)
+{
+  UInt32 v;
+  UInt64 v64 = GET_TIME_64(ft);
+  v64 /= 10000;
+  st->wMilliseconds = (WORD)(v64 % 1000); v64 /= 1000;
+  st->wSecond       = (WORD)(v64 %   60); v64 /= 60;
+  st->wMinute       = (WORD)(v64 %   60); v64 /= 60;
+  v = (UInt32)v64;
+  st->wHour         = (WORD)(v %   24); v /= 24;
+
+  // 1601-01-01 was Monday
+  st->wDayOfWeek = (WORD)((v + 1) % 7);
+
+  UInt32 leaps, year, day, mon;
+  leaps = (3 * ((4 * v + (365 - 31 - 28) * 4 + 3) / PERIOD_400) + 3) / 4;
+  v += 28188 + leaps;
+  // leaps - the number of exceptions from PERIOD_4 rules starting from 1600-03-01
+  // (1959 / 64) - converts day from 03-01 to month
+  year = (20 * v - 2442) / (5 * PERIOD_4);
+  day = v - (year * PERIOD_4) / 4;
+  mon = (64 * day) / 1959;
+  st->wDay = (WORD)(day - (1959 * mon) / 64);
+  mon -= 1;
+  year += 1524;
+  if (mon > 12)
+  {
+    mon -= 12;
+    year++;
+  }
+  st->wMonth = (WORD)mon;
+  st->wYear = (WORD)year;
+
+  /*
+  unsigned year, mon;
+  unsigned char ms[] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
+  unsigned t;
+
+  year = (WORD)(1601 + v / PERIOD_400 * 400);
+  v %= PERIOD_400;
+
+  t = v / PERIOD_100; if (t ==  4) t =  3; year += t * 100; v -= t * PERIOD_100;
+  t = v / PERIOD_4;   if (t == 25) t = 24; year += t * 4;   v -= t * PERIOD_4;
+  t = v / 365;        if (t ==  4) t =  3; year += t;       v -= t * 365;
+
+  st->wYear = (WORD)year;
+
+  if (year % 4 == 0 && (year % 100 != 0 || year % 400 == 0))
+    ms[1] = 29;
+  for (mon = 0;; mon++)
+  {
+    unsigned d = ms[mon];
+    if (v < d)
+      break;
+    v -= d;
+  }
+  st->wDay = (WORD)(v + 1);
+  st->wMonth = (WORD)(mon + 1);
+  */
+
+  return TRUE;
 }

 #endif
--- a/CPP/Common/MyWindows.h
+++ b/CPP/Common/MyWindows.h
@@ -5,14 +5,14 @@

 #ifdef _WIN32

-#include <windows.h>
+#include <Windows.h>

 #ifdef UNDER_CE
  #undef VARIANT_TRUE
  #define VARIANT_TRUE ((VARIANT_BOOL)-1)
 #endif

-#else
+#else // _WIN32

 #include <stddef.h> // for wchar_t
 #include <string.h>
@@ -20,7 +20,9 @@

 #include "MyGuidDef.h"

+// WINAPI is __stdcall in Windows-MSVC in windef.h
 #define WINAPI
+#define EXTERN_C MY_EXTERN_C

 typedef char CHAR;
 typedef unsigned char UCHAR;
@@ -35,17 +37,12 @@ typedef unsigned short USHORT;
 typedef unsigned short WORD;
 typedef short VARIANT_BOOL;

-typedef int INT;
-typedef Int32 INT32;
-typedef unsigned int UINT;
-typedef UInt32 UINT32;
-typedef INT32 LONG;   // LONG, ULONG and DWORD must be 32-bit
-typedef UINT32 ULONG;
+#define LOWORD(l) ((WORD)((DWORD_PTR)(l) & 0xffff))
+#define HIWORD(l) ((WORD)((DWORD_PTR)(l) >> 16))

-#undef DWORD
-typedef UINT32 DWORD;
-
-typedef long BOOL;
+// MS uses long for BOOL, but long is 32-bit in MS. So we use int.
+// typedef long BOOL;
+typedef int BOOL;

 #ifndef FALSE
  #define FALSE 0
@@ -53,7 +50,7 @@ typedef long BOOL;
 #endif

 // typedef size_t ULONG_PTR;
-typedef size_t DWORD_PTR;
+// typedef size_t DWORD_PTR;
 // typedef uintptr_t UINT_PTR;
 // typedef ptrdiff_t UINT_PTR;

@@ -80,28 +77,33 @@ typedef struct _FILETIME
 } FILETIME;

 #define HRESULT LONG
-#define FAILED(Status) ((HRESULT)(Status)<0)
+#define SUCCEEDED(hr) ((HRESULT)(hr) >= 0)
+#define FAILED(hr)    ((HRESULT)(hr) < 0)
 typedef ULONG PROPID;
 typedef LONG SCODE;

-#define ERROR_NEGATIVE_SEEK 131L

 #define S_OK    ((HRESULT)0x00000000L)
 #define S_FALSE ((HRESULT)0x00000001L)
-#define E_NOTIMPL ((HRESULT)0x80004001L)
+#define E_NOTIMPL     ((HRESULT)0x80004001L)
 #define E_NOINTERFACE ((HRESULT)0x80004002L)
-#define E_ABORT ((HRESULT)0x80004004L)
-#define E_FAIL ((HRESULT)0x80004005L)
-#define STG_E_INVALIDFUNCTION ((HRESULT)0x80030001L)
-#define E_OUTOFMEMORY ((HRESULT)0x8007000EL)
-#define E_INVALIDARG ((HRESULT)0x80070057L)
+#define E_ABORT       ((HRESULT)0x80004004L)
+#define E_FAIL        ((HRESULT)0x80004005L)
+#define STG_E_INVALIDFUNCTION     ((HRESULT)0x80030001L)
+#define CLASS_E_CLASSNOTAVAILABLE ((HRESULT)0x80040111L)
+

 #ifdef _MSC_VER
 #define STDMETHODCALLTYPE __stdcall
+#define STDAPICALLTYPE    __stdcall
 #else
+// do we need __export here?
 #define STDMETHODCALLTYPE
+#define STDAPICALLTYPE
 #endif

+#define STDAPI  EXTERN_C HRESULT STDAPICALLTYPE
+
 #define STDMETHOD_(t, f) virtual t STDMETHODCALLTYPE f
 #define STDMETHOD(f) STDMETHOD_(HRESULT, f)
 #define STDMETHODIMP_(type) type STDMETHODCALLTYPE
@@ -120,9 +122,8 @@ struct IUnknown
  STDMETHOD(QueryInterface) (REFIID iid, void **outObject) PURE;
  STDMETHOD_(ULONG, AddRef)() PURE;
  STDMETHOD_(ULONG, Release)() PURE;
-  #ifndef _WIN32
  virtual ~IUnknown() {}
-  #endif
+  // We use virtual ~IUnknown() here for binary compatibility with 7z.so from p7zip
 };

 typedef IUnknown *LPUNKNOWN;
@@ -214,8 +215,14 @@ MY_EXTERN_C UINT SysStringByteLen(BSTR bstr);
 MY_EXTERN_C UINT SysStringLen(BSTR bstr);

 MY_EXTERN_C DWORD GetLastError();
+MY_EXTERN_C void SetLastError(DWORD dwCode);
 MY_EXTERN_C LONG CompareFileTime(const FILETIME* ft1, const FILETIME* ft2);

+MY_EXTERN_C DWORD GetCurrentThreadId();
+MY_EXTERN_C DWORD GetCurrentProcessId();
+
+#define MAX_PATH 1024
+
 #define CP_ACP    0
 #define CP_OEMCP  1
 #define CP_UTF8   65001
@@ -227,5 +234,35 @@ typedef enum tagSTREAM_SEEK
  STREAM_SEEK_END = 2
 } STREAM_SEEK;

-#endif
+
+
+typedef struct _SYSTEMTIME
+{
+  WORD wYear;
+  WORD wMonth;
+  WORD wDayOfWeek;
+  WORD wDay;
+  WORD wHour;
+  WORD wMinute;
+  WORD wSecond;
+  WORD wMilliseconds;
+} SYSTEMTIME;
+
+BOOL WINAPI FileTimeToLocalFileTime(const FILETIME *fileTime, FILETIME *localFileTime);
+BOOL WINAPI LocalFileTimeToFileTime(const FILETIME *localFileTime, FILETIME *fileTime);
+BOOL WINAPI FileTimeToSystemTime(const FILETIME *fileTime, SYSTEMTIME *systemTime);
+// VOID WINAPI GetSystemTimeAsFileTime(FILETIME *systemTimeAsFileTime);
+
+DWORD GetTickCount();
+
+
+#define CREATE_NEW          1
+#define CREATE_ALWAYS       2
+#define OPEN_EXISTING       3
+#define OPEN_ALWAYS         4
+#define TRUNCATE_EXISTING   5
+
+
+#endif // _WIN32
+
 #endif
--- a/CPP/Common/MyXml.cpp
+++ b/CPP/Common/MyXml.cpp
@@ -7,9 +7,9 @@
 static bool IsValidChar(char c)
 {
  return
-    c >= 'a' && c <= 'z' ||
-    c >= 'A' && c <= 'Z' ||
-    c >= '0' && c <= '9' ||
+    (c >= 'a' && c <= 'z') ||
+    (c >= 'A' && c <= 'Z') ||
+    (c >= '0' && c <= '9') ||
    c == '-';
 }

@@ -24,7 +24,7 @@ int CXmlItem::FindProp(const char *propName) const throw()
 {
  FOR_VECTOR (i, Props)
    if (Props[i].Name == propName)
-      return i;
+      return (int)i;
  return -1;
 }

@@ -32,7 +32,7 @@ AString CXmlItem::GetPropVal(const char *propName) const
 {
  int index = FindProp(propName);
  if (index >= 0)
-    return Props[index].Value;
+    return Props[(unsigned)index].Value;
  return AString();
 }

@@ -45,7 +45,7 @@ int CXmlItem::FindSubTag(const char *tag) const throw()
 {
  FOR_VECTOR (i, SubItems)
    if (SubItems[i].IsTagged(tag))
-      return i;
+      return (int)i;
  return -1;
 }

@@ -75,7 +75,7 @@ AString CXmlItem::GetSubStringForTag(const char *tag) const
 {
  int index = FindSubTag(tag);
  if (index >= 0)
-    return SubItems[index].GetSubString();
+    return SubItems[(unsigned)index].GetSubString();
  return AString();
 }

--- a/CPP/Common/Sha1Prepare.cpp
+++ b/CPP/Common/Sha1Prepare.cpp
@@ -0,0 +1,7 @@
+// Sha1Prepare.cpp
+
+#include "StdAfx.h"
+
+#include "../../C/Sha1.h"
+
+static struct CSha1Prepare { CSha1Prepare() { Sha1Prepare(); } } g_Sha1Prepare;
--- a/CPP/Common/Sha1Reg.cpp
+++ b/CPP/Common/Sha1Reg.cpp
@@ -4,37 +4,67 @@

 #include "../../C/Sha1.h"

+#include "../Common/MyBuffer2.h"
 #include "../Common/MyCom.h"

 #include "../7zip/Common/RegisterCodec.h"

 class CSha1Hasher:
  public IHasher,
+  public ICompressSetCoderProperties,
  public CMyUnknownImp
 {
-  CSha1 _sha;
+  CAlignedBuffer _buf;
  Byte mtDummy[1 << 7];
  
+  CSha1 *Sha() { return (CSha1 *)(void *)(Byte *)_buf; }
 public:
-  CSha1Hasher() { Sha1_Init(&_sha); }
+  CSha1Hasher():
+    _buf(sizeof(CSha1))
+  {
+    Sha1_SetFunction(Sha(), 0);
+    Sha1_InitState(Sha());
+  }

-  MY_UNKNOWN_IMP1(IHasher)
+  MY_UNKNOWN_IMP2(IHasher, ICompressSetCoderProperties)
  INTERFACE_IHasher(;)
+  STDMETHOD(SetCoderProperties)(const PROPID *propIDs, const PROPVARIANT *props, UInt32 numProps);
 };

 STDMETHODIMP_(void) CSha1Hasher::Init() throw()
 {
-  Sha1_Init(&_sha);
+  Sha1_InitState(Sha());
 }

 STDMETHODIMP_(void) CSha1Hasher::Update(const void *data, UInt32 size) throw()
 {
-  Sha1_Update(&_sha, (const Byte *)data, size);
+  Sha1_Update(Sha(), (const Byte *)data, size);
 }

 STDMETHODIMP_(void) CSha1Hasher::Final(Byte *digest) throw()
 {
-  Sha1_Final(&_sha, digest);
+  Sha1_Final(Sha(), digest);
+}
+
+
+STDMETHODIMP CSha1Hasher::SetCoderProperties(const PROPID *propIDs, const PROPVARIANT *coderProps, UInt32 numProps)
+{
+  unsigned algo = 0;
+  for (UInt32 i = 0; i < numProps; i++)
+  {
+    const PROPVARIANT &prop = coderProps[i];
+    if (propIDs[i] == NCoderPropID::kDefaultProp)
+    {
+      if (prop.vt != VT_UI4)
+        return E_INVALIDARG;
+      if (prop.ulVal > 2)
+        return E_NOTIMPL;
+      algo = (unsigned)prop.ulVal;
+    }
+  }
+  if (!Sha1_SetFunction(Sha(), algo))
+    return E_NOTIMPL;
+  return S_OK;
 }

 REGISTER_HASHER(CSha1Hasher, 0x201, "SHA1", SHA1_DIGEST_SIZE)
--- a/CPP/Common/Sha256Prepare.cpp
+++ b/CPP/Common/Sha256Prepare.cpp
@@ -0,0 +1,7 @@
+// Sha256Prepare.cpp
+
+#include "StdAfx.h"
+
+#include "../../C/Sha256.h"
+
+static struct CSha256Prepare { CSha256Prepare() { Sha256Prepare(); } } g_Sha256Prepare;
--- a/CPP/Common/Sha256Reg.cpp
+++ b/CPP/Common/Sha256Reg.cpp
@@ -4,37 +4,67 @@

 #include "../../C/Sha256.h"

+#include "../Common/MyBuffer2.h"
 #include "../Common/MyCom.h"

 #include "../7zip/Common/RegisterCodec.h"

 class CSha256Hasher:
  public IHasher,
+  public ICompressSetCoderProperties,
  public CMyUnknownImp
 {
-  CSha256 _sha;
+  CAlignedBuffer _buf;
  Byte mtDummy[1 << 7];

+  CSha256 *Sha() { return (CSha256 *)(void *)(Byte *)_buf; }
 public:
-  CSha256Hasher() { Sha256_Init(&_sha); }
+  CSha256Hasher():
+    _buf(sizeof(CSha256))
+  {
+    Sha256_SetFunction(Sha(), 0);
+    Sha256_InitState(Sha());
+  }

-  MY_UNKNOWN_IMP1(IHasher)
+  MY_UNKNOWN_IMP2(IHasher, ICompressSetCoderProperties)
  INTERFACE_IHasher(;)
+  STDMETHOD(SetCoderProperties)(const PROPID *propIDs, const PROPVARIANT *props, UInt32 numProps);
 };

 STDMETHODIMP_(void) CSha256Hasher::Init() throw()
 {
-  Sha256_Init(&_sha);
+  Sha256_InitState(Sha());
 }

 STDMETHODIMP_(void) CSha256Hasher::Update(const void *data, UInt32 size) throw()
 {
-  Sha256_Update(&_sha, (const Byte *)data, size);
+  Sha256_Update(Sha(), (const Byte *)data, size);
 }

 STDMETHODIMP_(void) CSha256Hasher::Final(Byte *digest) throw()
 {
-  Sha256_Final(&_sha, digest);
+  Sha256_Final(Sha(), digest);
+}
+
+
+STDMETHODIMP CSha256Hasher::SetCoderProperties(const PROPID *propIDs, const PROPVARIANT *coderProps, UInt32 numProps)
+{
+  unsigned algo = 0;
+  for (UInt32 i = 0; i < numProps; i++)
+  {
+    const PROPVARIANT &prop = coderProps[i];
+    if (propIDs[i] == NCoderPropID::kDefaultProp)
+    {
+      if (prop.vt != VT_UI4)
+        return E_INVALIDARG;
+      if (prop.ulVal > 2)
+        return E_NOTIMPL;
+      algo = (unsigned)prop.ulVal;
+    }
+  }
+  if (!Sha256_SetFunction(Sha(), algo))
+    return E_NOTIMPL;
+  return S_OK;
 }

 REGISTER_HASHER(CSha256Hasher, 0xA, "SHA256", SHA256_DIGEST_SIZE)
--- a/CPP/Common/StdInStream.cpp
+++ b/CPP/Common/StdInStream.cpp
@@ -2,7 +2,9 @@

 #include "StdAfx.h"

+#ifdef _WIN32
 #include <tchar.h>
+#endif

 #include "StdInStream.h"
 #include "StringConvert.h"
@@ -14,14 +16,18 @@

 #define kFileOpenMode TEXT("r")

-extern int g_CodePage;
-
 CStdInStream g_StdIn(stdin);

 bool CStdInStream::Open(LPCTSTR fileName) throw()
 {
  Close();
-  _stream = _tfopen(fileName, kFileOpenMode);
+  _stream =
+    #ifdef _WIN32
+      _tfopen
+    #else
+      fopen
+    #endif
+      (fileName, kFileOpenMode);
  _streamIsOpen = (_stream != 0);
  return _streamIsOpen;
 }
@@ -56,7 +62,7 @@ bool CStdInStream::ScanUStringUntilNewLine(UString &dest)
  dest.Empty();
  AString s;
  bool res = ScanAStringUntilNewLine(s);
-  int codePage = g_CodePage;
+  int codePage = CodePage;
  if (codePage == -1)
    codePage = CP_OEMCP;
  if (codePage == CP_UTF8)
--- a/CPP/Common/StdInStream.h
+++ b/CPP/Common/StdInStream.h
@@ -13,8 +13,14 @@ class CStdInStream
  FILE *_stream;
  bool _streamIsOpen;
 public:
-  CStdInStream(): _stream(0), _streamIsOpen(false) {};
-  CStdInStream(FILE *stream): _stream(stream), _streamIsOpen(false) {};
+  int CodePage;
+
+  CStdInStream(FILE *stream = NULL):
+      _stream(stream),
+      _streamIsOpen(false),
+      CodePage(-1)
+      {};
+
  ~CStdInStream() { Close(); }

  bool Open(LPCTSTR fileName) throw();
--- a/CPP/Common/StdOutStream.cpp
+++ b/CPP/Common/StdOutStream.cpp
@@ -2,7 +2,9 @@

 #include "StdAfx.h"

+#ifdef _WIN32
 #include <tchar.h>
+#endif

 #include "IntToString.h"
 #include "StdOutStream.h"
@@ -11,8 +13,6 @@

 #define kFileOpenMode "wt"

-extern int g_CodePage;
-
 CStdOutStream g_StdOut(stdout);
 CStdOutStream g_StdErr(stderr);

@@ -47,34 +47,29 @@ CStdOutStream & endl(CStdOutStream & outStream) throw()

 CStdOutStream & CStdOutStream::operator<<(const wchar_t *s)
 {
-  int codePage = g_CodePage;
-  if (codePage == -1)
-    codePage = CP_OEMCP;
-  AString dest;
-  if (codePage == CP_UTF8)
-    ConvertUnicodeToUTF8(s, dest);
-  else
-    UnicodeStringToMultiByte2(dest, s, (UINT)codePage);
-  return operator<<((const char *)dest);
-}
-
-void StdOut_Convert_UString_to_AString(const UString &s, AString &temp)
-{
-  int codePage = g_CodePage;
-  if (codePage == -1)
-    codePage = CP_OEMCP;
-  if (codePage == CP_UTF8)
-    ConvertUnicodeToUTF8(s, temp);
-  else
-    UnicodeStringToMultiByte2(temp, s, (UINT)codePage);
+  AString temp;
+  UString s2(s);
+  PrintUString(s2, temp);
+  return *this;
 }

 void CStdOutStream::PrintUString(const UString &s, AString &temp)
 {
-  StdOut_Convert_UString_to_AString(s, temp);
+  Convert_UString_to_AString(s, temp);
  *this << (const char *)temp;
 }

+void CStdOutStream::Convert_UString_to_AString(const UString &src, AString &dest)
+{
+  int codePage = CodePage;
+  if (codePage == -1)
+    codePage = CP_OEMCP;
+  if (codePage == CP_UTF8)
+    ConvertUnicodeToUTF8(src, dest);
+  else
+    UnicodeStringToMultiByte2(dest, src, (UINT)codePage);
+}
+

 static const wchar_t kReplaceChar = '_';

--- a/CPP/Common/StdOutStream.h
+++ b/CPP/Common/StdOutStream.h
@@ -14,9 +14,15 @@ class CStdOutStream
  bool _streamIsOpen;
 public:
  bool IsTerminalMode;
+  int CodePage;
+
+  CStdOutStream(FILE *stream = 0):
+      _stream(stream),
+      _streamIsOpen(false),
+      IsTerminalMode(false),
+      CodePage(-1)
+      {};

-  CStdOutStream(): _stream(0), _streamIsOpen(false), IsTerminalMode(false) {};
-  CStdOutStream(FILE *stream): _stream(stream), _streamIsOpen(false) {};
  ~CStdOutStream() { Close(); }

  // void AttachStdStream(FILE *stream) { _stream  = stream; _streamIsOpen = false; }
@@ -52,6 +58,7 @@ public:

  CStdOutStream & operator<<(const wchar_t *s);
  void PrintUString(const UString &s, AString &temp);
+  void Convert_UString_to_AString(const UString &src, AString &dest);

  void Normalize_UString__LF_Allowed(UString &s);
  void Normalize_UString(UString &s);
@@ -66,6 +73,4 @@ CStdOutStream & endl(CStdOutStream & outStream) throw();
 extern CStdOutStream g_StdOut;
 extern CStdOutStream g_StdErr;

-void StdOut_Convert_UString_to_AString(const UString &s, AString &temp);
-
 #endif
--- a/CPP/Common/StringConvert.cpp
+++ b/CPP/Common/StringConvert.cpp
@@ -5,9 +5,18 @@
 #include "StringConvert.h"

 #ifndef _WIN32
+// #include <stdio.h>
 #include <stdlib.h>
 #endif

+#if !defined(_WIN32) || defined(ENV_HAVE_LOCALE)
+#include "UTFConvert.h"
+#endif
+
+#ifdef ENV_HAVE_LOCALE
+#include <locale.h>
+#endif
+
 static const char k_DefultChar = '_';

 #ifdef _WIN32
@@ -71,7 +80,7 @@ void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
    d[i] = 0;
    dest.ReleaseBuf_SetLen(i);
    */
-    unsigned len = MultiByteToWideChar(codePage, 0, src, src.Len(), NULL, 0);
+    unsigned len = (unsigned)MultiByteToWideChar(codePage, 0, src, (int)src.Len(), NULL, 0);
    if (len == 0)
    {
      if (GetLastError() != 0)
@@ -79,7 +88,7 @@ void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
    }
    else
    {
-      len = MultiByteToWideChar(codePage, 0, src, src.Len(), dest.GetBuf(len), len);
+      len = (unsigned)MultiByteToWideChar(codePage, 0, src, (int)src.Len(), dest.GetBuf(len), (int)len);
      if (len == 0)
        throw 282228;
      dest.ReleaseBuf_SetEnd(len);
@@ -175,7 +184,7 @@ static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT co
    }
    */

-    unsigned len = WideCharToMultiByte(codePage, 0, src, src.Len(), NULL, 0, NULL, NULL);
+    unsigned len = (unsigned)WideCharToMultiByte(codePage, 0, src, (int)src.Len(), NULL, 0, NULL, NULL);
    if (len == 0)
    {
      if (GetLastError() != 0)
@@ -186,8 +195,8 @@ static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT co
      BOOL defUsed = FALSE;
      bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7);
      // defaultChar = defaultChar;
-      len = WideCharToMultiByte(codePage, 0, src, src.Len(),
-          dest.GetBuf(len), len,
+      len = (unsigned)WideCharToMultiByte(codePage, 0, src, (int)src.Len(),
+          dest.GetBuf(len), (int)len,
          (isUtf ? NULL : &defaultChar),
          (isUtf ? NULL : &defUsed)
          );
@@ -213,23 +222,137 @@ AString SystemStringToOemString(const CSysString &src)
 #endif
 */

-#else
+#else // _WIN32

-void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT /* codePage */)
+// #include <stdio.h>
+/*
+  if (wchar_t is 32-bit (#if WCHAR_MAX > 0xffff),
+      and utf-8 string contains big unicode character > 0xffff),
+  then we still use 16-bit surrogate pair in UString.
+  It simplifies another code where utf-16 encoding is used.
+  So we use surrogate-conversion code only in is file.
+*/
+
+/*
+   mbstowcs() returns error if there is error in utf-8 stream,
+   mbstowcs() returns error if there is single surrogates point (d800-dfff) in utf-8 stream
+*/
+
+/*
+static void MultiByteToUnicodeString2_Native(UString &dest, const AString &src)
 {
  dest.Empty();
  if (src.IsEmpty())
    return;

-  size_t limit = ((size_t)src.Len() + 1) * 2;
+  const size_t limit = ((size_t)src.Len() + 1) * 2;
  wchar_t *d = dest.GetBuf((unsigned)limit);
-  size_t len = mbstowcs(d, src, limit);
+  const size_t len = mbstowcs(d, src, limit);
  if (len != (size_t)-1)
  {
    dest.ReleaseBuf_SetEnd((unsigned)len);
    return;
  }
+  dest.ReleaseBuf_SetEnd(0);
+}
+*/
+
+bool g_ForceToUTF8 = true; // false;
+
+void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
+{
+  dest.Empty();
+  if (src.IsEmpty())
+    return;
+
+  if (codePage == CP_UTF8 || g_ForceToUTF8)
+  {
+    ConvertUTF8ToUnicode(src, dest);
+    return;
+  }
+
+  const size_t limit = ((size_t)src.Len() + 1) * 2;
+  wchar_t *d = dest.GetBuf((unsigned)limit);
+  const size_t len = mbstowcs(d, src, limit);
+  if (len != (size_t)-1)
+  {
+    dest.ReleaseBuf_SetEnd((unsigned)len);
+
+    #if WCHAR_MAX > 0xffff
+    d = dest.GetBuf();
+    for (size_t i = 0;; i++)
+    {
+      // wchar_t c = dest[i];
+      wchar_t c = d[i];
+      if (c == 0)
+        break;
+      if (c >= 0x10000 && c < 0x110000)
+      {
+        /*
+        c -= 0x10000;
+        unsigned c0 = 0xd800 + ((c >> 10) & 0x3FF);
+        dest.ReplaceOneCharAtPos(i, c0);
+        i++;
+        c = 0xdc00 + (c & 0x3FF);
+        dest.Insert_wchar_t(i, c);
+        */
+        UString temp = d + i;
+
+        for (size_t t = 0;; t++)
+        {
+          wchar_t w = temp[t];
+          if (w == 0)
+            break;
+          if (i == limit)
+            break; // unexpected error
+          if (w >= 0x10000 && w < 0x110000)
+          {
+            if (i + 1 == limit)
+              break; // unexpected error
+            w -= 0x10000;
+            d[i++] = (unsigned)0xd800 + (((unsigned)w >> 10) & 0x3FF);
+            w = 0xdc00 + (w & 0x3FF);
+          }
+          d[i++] = w;
+        }
+        dest.ReleaseBuf_SetEnd((unsigned)i);
+      }
+    }
+
+    #endif
+ 
+    /*
+    printf("\nMultiByteToUnicodeString2 (%d) %s\n", (int)src.Len(),  src.Ptr());
+    printf("char:    ");
+    for (unsigned i = 0; i < src.Len(); i++)
+      printf (" %02x", (int)(Byte)src[i]);
+    printf("\n");
+    printf("\n-> (%d) %ls\n", (int)dest.Len(), dest.Ptr());
+    printf("wchar_t: ");
+    for (unsigned i = 0; i < dest.Len(); i++)
+    {
+      printf (" %02x", (int)dest[i]);
+    }
+    printf("\n");
+    */
+
+    return;
+  }
+
+  /* if there is mbstowcs() error, we have two ways:
+     
+     1) change 0x80+ characters to some character: '_'
+        in that case we lose data, but we have correct UString()
+        and that scheme can show errors to user in early stages,
+        when file converted back to mbs() cannot be found
+
+     2) transfer bad characters in some UTF-16 range.
+        it can be non-original Unicode character.
+        but later we still can restore original character.
+  */
+
  
+  // printf("\nmbstowcs  ERROR !!!!!! s=%s\n", src.Ptr());
  {
    unsigned i;
    const char *s = (const char *)src;
@@ -238,6 +361,8 @@ void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT /* codePa
      Byte c = (Byte)s[i];
      if (c == 0)
        break;
+      // we can use ascii compatibilty character '_'
+      // if (c > 0x7F) c = '_'; // we replace "bad: character
      d[i++] = (wchar_t)c;
    }
    d[i] = 0;
@@ -245,43 +370,131 @@ void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT /* codePa
  }
 }

-static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT /* codePage */, char defaultChar, bool &defaultCharWasUsed)
+static void UnicodeStringToMultiByte2_Native(AString &dest, const UString &src)
 {
  dest.Empty();
-  defaultCharWasUsed = false;
  if (src.IsEmpty())
    return;

-  size_t limit = ((size_t)src.Len() + 1) * 6;
+  const size_t limit = ((size_t)src.Len() + 1) * 6;
  char *d = dest.GetBuf((unsigned)limit);
-  size_t len = wcstombs(d, src, limit);
+
+  const size_t len = wcstombs(d, src, limit);
+
  if (len != (size_t)-1)
  {
    dest.ReleaseBuf_SetEnd((unsigned)len);
    return;
  }
+  dest.ReleaseBuf_SetEnd(0);
+}
+
+
+static void UnicodeStringToMultiByte2(AString &dest, const UString &src2, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
+{
+  // if (codePage == 1234567) // for debug purposes
+  if (codePage == CP_UTF8 || g_ForceToUTF8)
+  {
+    defaultCharWasUsed = false;
+    ConvertUnicodeToUTF8(src2, dest);
+    return;
+  }
+
+  UString src = src2;
+  #if WCHAR_MAX > 0xffff
+  {
+    src.Empty();
+    for (unsigned i = 0; i < src2.Len();)
+    {
+      wchar_t c = src2[i];
+      if (c >= 0xd800 && c < 0xdc00 && i + 1 != src2.Len())
+      {
+        const wchar_t c2 = src2[i + 1];
+        if (c2 >= 0xdc00 && c2 < 0x10000)
+        {
+          // printf("\nSurragate [%d]: %4x %4x -> ", i, (int)c, (int)c2);
+          c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff);
+          // printf("%4x\n", (int)c);
+          i++;
+        }
+      }
+      src += c;
+      i++;
+    }
+  }
+  #endif
+
+  dest.Empty();
+  defaultCharWasUsed = false;
+  if (src.IsEmpty())
+    return;
+
+  const size_t len = wcstombs(NULL, src, 0);
+
+  if (len != (size_t)-1)
+  {
+    const unsigned limit = ((unsigned)len);
+    if (limit == len)
+    {
+      char *d = dest.GetBuf(limit);
+
+      /*
+      {
+        printf("\nwcstombs; len = %d %ls \n", (int)src.Len(), src.Ptr());
+        for (unsigned i = 0; i < src.Len(); i++)
+          printf (" %02x", (int)src[i]);
+        printf("\n");
+        printf("\ndest Limit = %d \n", limit);
+      }
+      */
+
+      const size_t len2 = wcstombs(d, src, len + 1);
+      
+      if (len2 != (size_t)-1 && len2 <= limit)
+      {
+        /*
+        printf("\nOK : destLen = %d : %s\n", (int)len, dest.Ptr());
+        for (unsigned i = 0; i < len2; i++)
+          printf(" %02x", (int)(Byte)dest[i]);
+        printf("\n");
+        */
+        dest.ReleaseBuf_SetEnd((unsigned)len2);
+        return;
+      }
+    }
+  }

  {
    const wchar_t *s = (const wchar_t *)src;
+    char *d = dest.GetBuf(src.Len());
+
    unsigned i;
    for (i = 0;;)
    {
      wchar_t c = s[i];
      if (c == 0)
        break;
-      if (c >= 0x100)
+      if (c >=
+            0x100
+            // 0x80
+          )
      {
        c = defaultChar;
        defaultCharWasUsed = true;
      }
+
      d[i++] = (char)c;
    }
    d[i] = 0;
    dest.ReleaseBuf_SetLen(i);
+    /*
+    printf("\nUnicodeStringToMultiByte2; len = %d \n", (int)src.Len());
+    printf("ERROR: %s\n", dest.Ptr());
+    */
  }
 }

-#endif
+#endif // _WIN32


 UString MultiByteToUnicodeString(const AString &src, UINT codePage)
@@ -317,3 +530,228 @@ AString UnicodeStringToMultiByte(const UString &src, UINT codePage)
  UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed);
  return dest;
 }
+
+
+
+
+
+#ifdef _WIN32
+#define U_to_A(a, b, c)  UnicodeStringToMultiByte2
+// #define A_to_U(a, b, c)  MultiByteToUnicodeString2
+#else
+// void MultiByteToUnicodeString2_Native(UString &dest, const AString &src);
+#define U_to_A(a, b, c)  UnicodeStringToMultiByte2_Native(a, b)
+// #define A_to_U(a, b, c)  MultiByteToUnicodeString2_Native(a, b)
+#endif
+
+#if !defined(_WIN32) || defined(ENV_HAVE_LOCALE)
+
+bool IsNativeUTF8()
+{
+  UString u;
+  AString a, a2;
+  // for (unsigned c = 0x80; c < (UInt32)0x10000; c += (c >> 9) + 1)
+  for (unsigned c = 0x80; c < (UInt32)0xD000; c += (c >> 2) + 1)
+  {
+    u.Empty();
+    u += (wchar_t)c;
+    /*
+    if (Unicode_Is_There_Utf16SurrogateError(u))
+      continue;
+    #ifndef _WIN32
+    if (Unicode_Is_There_BmpEscape(u))
+      continue;
+    #endif
+    */
+    ConvertUnicodeToUTF8(u, a);
+    U_to_A(a2, u, CP_OEMCP);
+    if (a != a2)
+      return false;
+  }
+  return true;
+}
+
+#endif
+
+
+#ifdef ENV_HAVE_LOCALE
+
+const char *GetLocale(void)
+{
+  #ifdef ENV_HAVE_LOCALE
+    // printf("\n\nsetlocale(LC_CTYPE, NULL) : return : ");
+    const char *s = setlocale(LC_CTYPE, NULL);
+    if (!s)
+    {
+      // printf("[NULL]\n");
+      s = "C";
+    }
+    else
+    {
+      // ubuntu returns "C" after program start
+      // printf("\"%s\"\n", s);
+    }
+    return s;
+  #elif defined(LOCALE_IS_UTF8)
+    return "utf8";
+  #else
+    return "C";
+  #endif
+}
+
+#ifdef _WIN32
+  static void Set_ForceToUTF8(bool) {}
+#else
+  static void Set_ForceToUTF8(bool val) { g_ForceToUTF8 = val; }
+#endif
+
+static bool Is_Default_Basic_Locale(const char *locale)
+{
+  const AString a (locale);
+  if (a.IsEqualTo_Ascii_NoCase("")
+      || a.IsEqualTo_Ascii_NoCase("C")
+      || a.IsEqualTo_Ascii_NoCase("POSIX"))
+      return true;
+  return false;
+}
+
+static bool Is_Default_Basic_Locale()
+{
+  return Is_Default_Basic_Locale(GetLocale());
+}
+
+
+void MY_SetLocale()
+{
+  #ifdef ENV_HAVE_LOCALE
+  /*
+  {
+    const char *s = GetLocale();
+    printf("\nGetLocale() : returned : \"%s\"\n", s);
+  }
+  */
+  
+  unsigned start = 0;
+  // unsigned lim = 0;
+  unsigned lim = 3;
+
+  /*
+  #define MY_SET_LOCALE_FLAGS__FROM_ENV 1
+  #define MY_SET_LOCALE_FLAGS__TRY_UTF8 2
+
+  unsigned flags =
+      MY_SET_LOCALE_FLAGS__FROM_ENV |
+      MY_SET_LOCALE_FLAGS__TRY_UTF8
+
+  if (flags != 0)
+  {
+    if (flags & MY_SET_LOCALE_FLAGS__FROM_ENV)
+      lim = (flags & MY_SET_LOCALE_FLAGS__TRY_UTF8) ? 3 : 1;
+    else
+    {
+      start = 1;
+      lim = 2;
+    }
+  }
+  */
+
+  for (unsigned i = start; i < lim; i++)
+  {
+    /*
+    man7: "If locale is an empty string, "", each part of the locale that
+    should be modified is set according to the environment variables.
+    for glibc: glibc, first from the user's environment variables:
+      1) the environment variable LC_ALL,
+      2) environment variable with the same name as the category (see the
+      3) the environment variable LANG
+    The locale "C" or "POSIX" is a portable locale; it exists on all conforming systems.
+    
+    for WIN32 : MSDN :
+      Sets the locale to the default, which is the user-default
+      ANSI code page obtained from the operating system.
+      The locale name is set to the value returned by GetUserDefaultLocaleName.
+      The code page is set to the value returned by GetACP
+  */
+    const char *newLocale = "";
+    
+    #ifdef __APPLE__
+    
+    /* look also CFLocale
+       there is no C.UTF-8 in macos
+       macos has UTF-8 locale only with some language like en_US.UTF-8
+       what is best way to set UTF-8 locale in macos? */
+    if (i == 1)
+      newLocale = "en_US.UTF-8";
+   
+    /* file open with non-utf8 sequencies return
+      #define EILSEQ    92    // "Illegal byte sequence"
+    */
+#else
+    // newLocale = "C";
+    if (i == 1)
+    {
+      newLocale = "C.UTF-8";    // main UTF-8 locale in ubuntu
+      // newLocale = ".utf8";    // supported in new Windows 10 build 17134 (April 2018 Update), the Universal C Runtime
+      // newLocale = "en_US.utf8"; // supported by ubuntu ?
+      // newLocale = "en_US.UTF-8";
+      /* setlocale() in ubuntu allows locales with minor chracter changes in strings
+        "en_US.UTF-8" /  "en_US.utf8" */
+    }
+    
+#endif
+    
+    // printf("\nsetlocale(LC_ALL, \"%s\") : returned: ", newLocale);
+    
+    // const char *s =
+    setlocale(LC_ALL, newLocale);
+    
+    /*
+    if (!s)
+      printf("NULL: can't set locale");
+    else
+      printf("\"%s\"\n", s);
+    */
+    
+    // request curent locale of program
+    const char *locale = GetLocale();
+    if (locale)
+    {
+      AString a (locale);
+      a.MakeLower_Ascii();
+      // if (a.Find("utf") >= 0)
+      {
+        if (IsNativeUTF8())
+        {
+          Set_ForceToUTF8(true);
+          return;
+        }
+      }
+      if (!Is_Default_Basic_Locale(locale))
+      {
+        // if there is some non-default and non-utf locale, we want to use it
+        break; // comment it for debug
+      }
+    }
+  }
+
+  if (IsNativeUTF8())
+  {
+    Set_ForceToUTF8(true);
+    return;
+  }
+
+  if (Is_Default_Basic_Locale())
+  {
+    Set_ForceToUTF8(true);
+    return;
+  }
+
+  Set_ForceToUTF8(false);
+
+  #elif defined(LOCALE_IS_UTF8)
+    // assume LC_CTYPE="utf8"
+  #else
+    // assume LC_CTYPE="C"
+  #endif
+}
+#endif
--- a/CPP/Common/StringConvert.h
+++ b/CPP/Common/StringConvert.h
@@ -85,4 +85,26 @@ inline AString GetOemString(const UString &u)
 AString SystemStringToOemString(const CSysString &src);
 #endif

+
+#ifdef _WIN32
+/* we don't need locale functions in Windows
+   but we can define ENV_HAVE_LOCALE here for debug purposes */
+// #define ENV_HAVE_LOCALE
+#else
+#define ENV_HAVE_LOCALE
+#endif
+
+#ifdef ENV_HAVE_LOCALE
+void MY_SetLocale();
+const char *GetLocale(void);
+#endif
+
+#if !defined(_WIN32) || defined(ENV_HAVE_LOCALE)
+bool IsNativeUTF8();
+#endif
+
+#ifndef _WIN32
+extern bool g_ForceToUTF8;
+#endif
+
 #endif
--- a/CPP/Common/StringToInt.cpp
+++ b/CPP/Common/StringToInt.cpp
@@ -17,7 +17,7 @@ static const UInt64 k_UInt64_max = UINT64_CONST(0xFFFFFFFFFFFFFFFF);
      if (c < '0' || c > '9') { if (end) *end = s; return res; } \
      if (res > (k_ ## uintType ## _max) / 10) return 0; \
      res *= 10; \
-      unsigned v = (c - '0'); \
+      unsigned v = (unsigned)(c - '0'); \
      if (res > (k_ ## uintType ## _max) - v) return 0; \
      res += v; }}

--- a/CPP/Common/UTFConvert.cpp
+++ b/CPP/Common/UTFConvert.cpp
@@ -2,11 +2,17 @@

 #include "StdAfx.h"

+// #include <stdio.h>
+
 #include "MyTypes.h"
 #include "UTFConvert.h"

-#ifdef _WIN32
-#define _WCHART_IS_16BIT 1
+
+#ifndef _WCHART_IS_16BIT
+#ifndef __APPLE__
+  // we define it if the system supports files with non-utf8 symbols:
+  #define _UTF8_RAW_NON_UTF8_SUPPORTED
+#endif
 #endif

 /*
@@ -18,78 +24,332 @@
  1 : 0xC0 : 11 :
  2 : 0xE0 : 16 : Basic Multilingual Plane
  3 : 0xF0 : 21 : Unicode space
-  3 : 0xF8 : 26 :
-  5 : 0xFC : 31 : UCS-4
+  4 : 0xF8 : 26 :
+  5 : 0xFC : 31 : UCS-4 : wcstombs() in ubuntu is limited to that value
  6 : 0xFE : 36 : We can use it, if we want to encode any 32-bit value
  7 : 0xFF :
 */

 #define _UTF8_START(n) (0x100 - (1 << (7 - (n))))

-#define _UTF8_HEAD_PARSE2(n) if (c < _UTF8_START((n) + 1)) { numBytes = (n); c -= _UTF8_START(n); }
+#define _UTF8_HEAD_PARSE2(n) \
+    if (c < _UTF8_START((n) + 1)) \
+    { numBytes = (n); val -= _UTF8_START(n); }

+#ifndef _WCHART_IS_16BIT
+
+/*
+   if (wchar_t is 32-bit), we can support large points in long UTF-8 sequence,
+   when we convert wchar_t strings to UTF-8:
+     (_UTF8_NUM_TAIL_BYTES_MAX == 3) : (21-bits points) - Unicode
+     (_UTF8_NUM_TAIL_BYTES_MAX == 5) : (31-bits points) - UCS-4
+     (_UTF8_NUM_TAIL_BYTES_MAX == 6) : (36-bit hack)
+*/
+
+#define _UTF8_NUM_TAIL_BYTES_MAX 5
+#endif
+
+/*
 #define _UTF8_HEAD_PARSE \
+    UInt32 val = c; \
         _UTF8_HEAD_PARSE2(1) \
    else _UTF8_HEAD_PARSE2(2) \
    else _UTF8_HEAD_PARSE2(3) \
    else _UTF8_HEAD_PARSE2(4) \
    else _UTF8_HEAD_PARSE2(5) \
+  #if _UTF8_NUM_TAIL_BYTES_MAX >= 6
+    else _UTF8_HEAD_PARSE2(6)
+  #endif
+*/

-    // else _UTF8_HEAD_PARSE2(6)
+#define _UTF8_HEAD_PARSE_MAX_3_BYTES \
+    UInt32 val = c; \
+         _UTF8_HEAD_PARSE2(1) \
+    else _UTF8_HEAD_PARSE2(2) \
+    else { numBytes = 3; val -= _UTF8_START(3); }

-bool CheckUTF8(const char *src, bool allowReduced) throw()
+
+#define _UTF8_RANGE(n) (((UInt32)1) << ((n) * 5 + 6))
+
+
+#define START_POINT_FOR_SURROGATE 0x10000
+
+
+/* we use 128 bytes block in 16-bit BMP-PLANE to encode non-UTF-8 Escapes
+   Also we can use additional HIGH-PLANE (we use 21-bit points above 0x1f0000)
+   to simplify internal intermediate conversion in Linux:
+   RAW-UTF-8 <-> internal wchar_t utf-16 strings <-> RAW-UTF-UTF-8
+*/
+ 
+
+#if defined(_WCHART_IS_16BIT)
+
+#define UTF_ESCAPE_PLANE 0
+
+#else
+
+/*
+we can place 128 ESCAPE chars to
+   ef 80 -    ee be 80 (3-bytes utf-8) : similar to WSL
+   ef ff -    ee bf bf
+
+1f ef 80 - f7 be be 80 (4-bytes utf-8) : last  4-bytes utf-8 plane (out of Unicode)
+1f ef ff - f7 be bf bf (4-bytes utf-8) : last  4-bytes utf-8 plane (out of Unicode)
+*/
+
+// #define UTF_ESCAPE_PLANE_HIGH  (0x1f << 16)
+// #define UTF_ESCAPE_PLANE        UTF_ESCAPE_PLANE_HIGH
+#define UTF_ESCAPE_PLANE 0
+
+/*
+  if (UTF_FLAG__FROM_UTF8__USE_ESCAPE is set)
+  {
+    if (UTF_ESCAPE_PLANE is UTF_ESCAPE_PLANE_HIGH)
+    {
+      we can restore any 8-bit Escape from ESCAPE-PLANE-21 plane.
+      But ESCAPE-PLANE-21 point cannot be stored to utf-16 (7z archive)
+      So we still need a way to extract 8-bit Escapes and BMP-Escapes-8
+      from same BMP-Escapes-16 stored in 7z.
+      And if we want to restore any 8-bit from 7z archive,
+      we still must use UTF_FLAG__FROM_UTF8__BMP_ESCAPE_CONVERT for (utf-8 -> utf-16)
+      Also we need additional Conversions to tranform from utf-16 to utf-16-With-Escapes-21
+    }
+    else (UTF_ESCAPE_PLANE == 0)
+    {
+      we must convert original 3-bytes utf-8 BMP-Escape point to sequence
+      of 3 BMP-Escape-16 points with UTF_FLAG__FROM_UTF8__BMP_ESCAPE_CONVERT
+      so we can extract original RAW-UTF-8 from UTFD-16 later.
+    }
+  }
+*/
+
+#endif
+
+
+
+#define UTF_ESCAPE_BASE 0xef00
+
+
+#ifdef UTF_ESCAPE_BASE
+#define IS_ESCAPE_POINT(v, plane) (((v) & (UInt32)0xffffff80) == (plane) + UTF_ESCAPE_BASE + 0x80)
+#endif
+
+#define IS_SURROGATE_POINT(v)     (((v) & (UInt32)0xfffff800) == 0xd800)
+#define IS_LOW_SURROGATE_POINT(v) (((v) & (UInt32)0xfffffC00) == 0xdc00)
+
+
+#define _ERROR_UTF8_CHECK \
+  { NonUtf = true; continue; }
+
+void CUtf8Check::Check_Buf(const char *src, size_t size) throw()
 {
+  Clear();
+  // Byte maxByte = 0;
+
  for (;;)
  {
-    Byte c = *src++;
+    if (size == 0)
+      break;
+
+    const Byte c = (Byte)(*src++);
+    size--;
+
+    if (c == 0)
+    {
+      ZeroChar = true;
+      continue;
+    }
+
+    /*
+    if (c > maxByte)
+      maxByte = c;
+    */
+
+    if (c < 0x80)
+      continue;
+    
+    if (c < 0xc0 + 2)// it's limit for 0x140000 unicode codes : win32 compatibility
+      _ERROR_UTF8_CHECK
+
+    unsigned numBytes;
+
+    UInt32 val = c;
+         _UTF8_HEAD_PARSE2(1)
+    else _UTF8_HEAD_PARSE2(2)
+    else _UTF8_HEAD_PARSE2(4)
+    else _UTF8_HEAD_PARSE2(5)
+    else
+    {
+      _ERROR_UTF8_CHECK
+    }
+
+    unsigned pos = 0;
+    do
+    {
+      if (pos == size)
+        break;
+      unsigned c2 = (Byte)src[pos];
+      c2 -= 0x80;
+      if (c2 >= 0x40)
+        break;
+      val <<= 6;
+      val |= c2;
+      if (pos == 0)
+        if (val < (((unsigned)1 << 7) >> numBytes))
+          break;
+      pos++;
+    }
+    while (--numBytes);
+
+    if (numBytes != 0)
+    {
+      if (pos == size)
+        Truncated = true;
+      else
+        _ERROR_UTF8_CHECK
+    }
+
+    #ifdef UTF_ESCAPE_BASE
+      if (IS_ESCAPE_POINT(val, 0))
+        Escape = true;
+    #endif
+
+    if (MaxHighPoint < val)
+      MaxHighPoint = val;
+
+    if (IS_SURROGATE_POINT(val))
+      SingleSurrogate = true;
+    
+    src += pos;
+    size -= pos;
+  }
+
+  // MaxByte = maxByte;
+}
+
+bool Check_UTF8_Buf(const char *src, size_t size, bool allowReduced) throw()
+{
+  CUtf8Check check;
+  check.Check_Buf(src, size);
+  return check.IsOK(allowReduced);
+}
+
+/*
+bool CheckUTF8_chars(const char *src, bool allowReduced) throw()
+{
+  CUtf8Check check;
+  check.CheckBuf(src, strlen(src));
+  return check.IsOK(allowReduced);
+}
+*/
+
+bool CheckUTF8_AString(const AString &s) throw()
+{
+  CUtf8Check check;
+  check.Check_AString(s);
+  return check.IsOK();
+}
+
+
+/*
+bool CheckUTF8(const char *src, bool allowReduced) throw()
+{
+  // return Check_UTF8_Buf(src, strlen(src), allowReduced);
+
+  for (;;)
+  {
+    const Byte c = (Byte)(*src++);
    if (c == 0)
      return true;

    if (c < 0x80)
      continue;
-    if (c < 0xC0)   // (c < 0xC0 + 2) // if we support only optimal encoding chars
+    if (c < 0xC0 + 2 || c >= 0xf5)
      return false;
    
    unsigned numBytes;
    _UTF8_HEAD_PARSE
    else
      return false;
-    
-    UInt32 val = c;

+    unsigned pos = 0;
+    
    do
    {
-      Byte c2 = *src++;
+      Byte c2 = (Byte)(*src++);
      if (c2 < 0x80 || c2 >= 0xC0)
        return allowReduced && c2 == 0;
      val <<= 6;
      val |= (c2 - 0x80);
+      pos++;
    }
    while (--numBytes);
-    
+
+    if (val < _UTF8_RANGE(pos - 1))
+      return false;
+
    if (val >= 0x110000)
      return false;
  }
 }
+*/

+// in case of UTF-8 error we have two ways:
+// 21.01- : old : 0xfffd: REPLACEMENT CHARACTER : old version
+// 21.02+ : new : 0xef00 + (c) : similar to WSL scheme for low symbols
+
+#define UTF_REPLACEMENT_CHAR  0xfffd
+
+
+
+#define UTF_ESCAPE(c) \
+   ((flags & UTF_FLAG__FROM_UTF8__USE_ESCAPE) ? \
+    UTF_ESCAPE_PLANE + UTF_ESCAPE_BASE + (c) : UTF_REPLACEMENT_CHAR)
+
+/*
+#define _HARD_ERROR_UTF8
+  { if (dest) dest[destPos] = (wchar_t)UTF_ESCAPE(c); \
+    destPos++; ok = false; continue; }
+*/
+
+// we ignore utf errors, and don't change (ok) variable!

 #define _ERROR_UTF8 \
-  { if (dest) dest[destPos] = (wchar_t)0xFFFD; destPos++; ok = false; continue; }
+  { if (dest) dest[destPos] = (wchar_t)UTF_ESCAPE(c); \
+    destPos++; continue; }

-static bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, const char *srcLim) throw()
+// we store UTF-16 in wchar_t strings. So we use surrogates for big unicode points:
+
+// for debug puposes only we can store UTF-32 in wchar_t:
+// #define START_POINT_FOR_SURROGATE ((UInt32)0 - 1)
+
+
+/*
+  WIN32 MultiByteToWideChar(CP_UTF8) emits 0xfffd point, if utf-8 error was found.
+  Ant it can emit single 0xfffd from 2 src bytes.
+  It doesn't emit single 0xfffd from 3-4 src bytes.
+  We can
+    1) emit Escape point for each incorrect byte. So we can data recover later
+    2) emit 0xfffd for each incorrect byte.
+       That scheme is similar to Escape scheme, but we emit 0xfffd
+       instead of each Escape point.
+    3) emit single 0xfffd from 1-2 incorrect bytes, as WIN32 MultiByteToWideChar scheme
+*/
+
+static bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, const char *srcLim, unsigned flags) throw()
 {
  size_t destPos = 0;
  bool ok = true;

  for (;;)
  {
-    Byte c;
    if (src == srcLim)
    {
      *destLen = destPos;
      return ok;
    }
-    c = *src++;
+    
+    const Byte c = (Byte)(*src++);

    if (c < 0x80)
    {
@@ -98,68 +358,127 @@ static bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, const
      destPos++;
      continue;
    }
-    if (c < 0xC0)
+    
+    if (c < 0xc0 + 2
+      || c >= 0xf5) // it's limit for 0x140000 unicode codes : win32 compatibility
+    {
      _ERROR_UTF8
+    }

    unsigned numBytes;
-    _UTF8_HEAD_PARSE
-    else
-      _ERROR_UTF8
-    
-    UInt32 val = c;

+    _UTF8_HEAD_PARSE_MAX_3_BYTES
+
+    unsigned pos = 0;
    do
    {
-      Byte c2;
-      if (src == srcLim)
+      if (src + pos == srcLim)
        break;
-      c2 = *src;
-      if (c2 < 0x80 || c2 >= 0xC0)
+      unsigned c2 = (Byte)src[pos];
+      c2 -= 0x80;
+      if (c2 >= 0x40)
        break;
-      src++;
      val <<= 6;
-      val |= (c2 - 0x80);
+      val |= c2;
+      pos++;
+      if (pos == 1)
+      {
+        if (val < (((unsigned)1 << 7) >> numBytes))
+          break;
+        if (numBytes == 2)
+        {
+          if (flags & UTF_FLAG__FROM_UTF8__SURROGATE_ERROR)
+            if ((val & (0xF800 >> 6)) == (0xd800 >> 6))
+              break;
+        }
+        else if (numBytes == 3 && val >= (0x110000 >> 12))
+          break;
+      }
    }
    while (--numBytes);

    if (numBytes != 0)
-      _ERROR_UTF8
-
-    if (val < 0x10000)
    {
+      if ((flags & UTF_FLAG__FROM_UTF8__USE_ESCAPE) == 0)
+      {
+        // the following code to emit the 0xfffd chars as win32 Utf8 function.
+        // disable the folling line, if you need 0xfffd for each incorrect byte as in Escape mode
+        src += pos;
+      }
+      _ERROR_UTF8
+    }
+
+    /*
+    if (val < _UTF8_RANGE(pos - 1))
+      _ERROR_UTF8
+    */
+
+    #ifdef UTF_ESCAPE_BASE
+    
+      if ((flags & UTF_FLAG__FROM_UTF8__BMP_ESCAPE_CONVERT)
+          && IS_ESCAPE_POINT(val, 0))
+      {
+        // We will emit 3 utf16-Escape-16-21 points from one Escape-16 point (3 bytes)
+        _ERROR_UTF8
+      }
+    
+    #endif
+
+    /*
+       We don't expect virtual Escape-21 points in UTF-8 stream.
+       And we don't check for Escape-21.
+       So utf8-Escape-21 will be converted to another 3 utf16-Escape-21 points.
+       Maybe we could convert virtual utf8-Escape-21 to one utf16-Escape-21 point in some cases?
+    */
+    
+    if (val < START_POINT_FOR_SURROGATE)
+    {
+      /*
+      if ((flags & UTF_FLAG__FROM_UTF8__SURROGATE_ERROR)
+          && IS_SURROGATE_POINT(val))
+      {
+        // We will emit 3 utf16-Escape-16-21 points from one Surrogate-16 point (3 bytes)
+        _ERROR_UTF8
+      }
+      */
      if (dest)
        dest[destPos] = (wchar_t)val;
      destPos++;
    }
    else
    {
-      val -= 0x10000;
-      if (val >= 0x100000)
+      /*
+      if (val >= 0x110000)
+      {
+        // We will emit utf16-Escape-16-21 point from each source byte
        _ERROR_UTF8
+      }
+      */
      if (dest)
      {
-        dest[destPos + 0] = (wchar_t)(0xD800 + (val >> 10));
-        dest[destPos + 1] = (wchar_t)(0xDC00 + (val & 0x3FF));
+        dest[destPos + 0] = (wchar_t)(0xd800 - (0x10000 >> 10) + (val >> 10));
+        dest[destPos + 1] = (wchar_t)(0xdc00 + (val & 0x3ff));
      }
      destPos += 2;
    }
+    src += pos;
  }
 }

-#define _UTF8_RANGE(n) (((UInt32)1) << ((n) * 5 + 6))
+

 #define _UTF8_HEAD(n, val) ((char)(_UTF8_START(n) + (val >> (6 * (n)))))
 #define _UTF8_CHAR(n, val) ((char)(0x80 + (((val) >> (6 * (n))) & 0x3F)))

-static size_t Utf16_To_Utf8_Calc(const wchar_t *src, const wchar_t *srcLim)
+static size_t Utf16_To_Utf8_Calc(const wchar_t *src, const wchar_t *srcLim, unsigned flags)
 {
-  size_t size = srcLim - src;
+  size_t size = (size_t)(srcLim - src);
  for (;;)
  {
    if (src == srcLim)
      return size;
    
-    UInt32 val = *src++;
+    UInt32 val = (UInt32)(*src++);
   
    if (val < 0x80)
      continue;
@@ -170,15 +489,32 @@ static size_t Utf16_To_Utf8_Calc(const wchar_t *src, const wchar_t *srcLim)
      continue;
    }

-    if (val >= 0xD800 && val < 0xDC00 && src != srcLim)
-    {
-      UInt32 c2 = *src;
-      if (c2 >= 0xDC00 && c2 < 0xE000)
-      {
-        src++;
-        size += 2;
+    #ifdef UTF_ESCAPE_BASE
+    
+    #if UTF_ESCAPE_PLANE != 0
+    if (flags & UTF_FLAG__TO_UTF8__PARSE_HIGH_ESCAPE)
+      if (IS_ESCAPE_POINT(val, UTF_ESCAPE_PLANE))
        continue;
+    #endif
+    
+    if (flags & UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE)
+      if (IS_ESCAPE_POINT(val, 0))
+        continue;
+    
+    #endif
+
+    if (IS_SURROGATE_POINT(val))
+    {
+      // it's hack to UTF-8 encoding
+
+      if (val < 0xdc00 && src != srcLim)
+      {
+        const UInt32 c2 = (UInt32)*src;
+        if (c2 >= 0xdc00 && c2 < 0xe000)
+          src++;
      }
+      size += 2;
+      continue;
    }

    #ifdef _WCHART_IS_16BIT
@@ -191,20 +527,26 @@ static size_t Utf16_To_Utf8_Calc(const wchar_t *src, const wchar_t *srcLim)
    else if (val < _UTF8_RANGE(3)) size += 3;
    else if (val < _UTF8_RANGE(4)) size += 4;
    else if (val < _UTF8_RANGE(5)) size += 5;
-    else                           size += 6;
+    else
+    #if _UTF8_NUM_TAIL_BYTES_MAX >= 6
+      size += 6;
+    #else
+      size += 3;
+    #endif
    
    #endif
  }
 }

-static char *Utf16_To_Utf8(char *dest, const wchar_t *src, const wchar_t *srcLim)
+
+static char *Utf16_To_Utf8(char *dest, const wchar_t *src, const wchar_t *srcLim, unsigned flags)
 {
  for (;;)
  {
    if (src == srcLim)
      return dest;
    
-    UInt32 val = *src++;
+    UInt32 val = (UInt32)*src++;
    
    if (val < 0x80)
    {
@@ -220,22 +562,57 @@ static char *Utf16_To_Utf8(char *dest, const wchar_t *src, const wchar_t *srcLim
      continue;
    }

-    if (val >= 0xD800 && val < 0xDC00 && src != srcLim)
-    {
-      UInt32 c2 = *src;
-      if (c2 >= 0xDC00 && c2 < 0xE000)
+    #ifdef UTF_ESCAPE_BASE
+    
+    #if UTF_ESCAPE_PLANE != 0
+    /*
+       if (wchar_t is 32-bit)
+            && (UTF_FLAG__TO_UTF8__PARSE_HIGH_ESCAPE is set)
+            && (point is virtual escape plane)
+          we extract 8-bit byte from virtual HIGH-ESCAPE PLANE.
+    */
+    if (flags & UTF_FLAG__TO_UTF8__PARSE_HIGH_ESCAPE)
+      if (IS_ESCAPE_POINT(val, UTF_ESCAPE_PLANE))
      {
-        src++;
-        val = (((val - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000;
-        dest[0] = _UTF8_HEAD(3, val);
-        dest[1] = _UTF8_CHAR(2, val);
-        dest[2] = _UTF8_CHAR(1, val);
-        dest[3] = _UTF8_CHAR(0, val);
-        dest += 4;
+        *dest++ = (char)(val);
+        continue;
+      }
+    #endif // UTF_ESCAPE_PLANE != 0
+
+    /* if (UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE is defined)
+          we extract 8-bit byte from BMP-ESCAPE PLANE. */
+
+    if (flags & UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE)
+      if (IS_ESCAPE_POINT(val, 0))
+      {
+        *dest++ = (char)(val);
        continue;
      }
-    }
    
+    #endif // UTF_ESCAPE_BASE
+
+    if (IS_SURROGATE_POINT(val))
+    {
+      // it's hack to UTF-8 encoding
+      if (val < 0xdc00 && src != srcLim)
+      {
+        const UInt32 c2 = (UInt32)*src;
+        if (IS_LOW_SURROGATE_POINT(c2))
+        {
+          src++;
+          val = (((val - 0xd800) << 10) | (c2 - 0xdc00)) + 0x10000;
+          dest[0] = _UTF8_HEAD(3, val);
+          dest[1] = _UTF8_CHAR(2, val);
+          dest[2] = _UTF8_CHAR(1, val);
+          dest[3] = _UTF8_CHAR(0, val);
+          dest += 4;
+          continue;
+        }
+      }
+      if (flags & UTF_FLAG__TO_UTF8__SURROGATE_ERROR)
+        val = UTF_REPLACEMENT_CHAR; // WIN32 function does it
+    }
+
    #ifndef _WCHART_IS_16BIT
    if (val < _UTF8_RANGE(2))
    #endif
@@ -249,14 +626,25 @@ static char *Utf16_To_Utf8(char *dest, const wchar_t *src, const wchar_t *srcLim
    
    #ifndef _WCHART_IS_16BIT

-    UInt32 b;
+    // we don't expect this case. so we can throw exception
+    // throw 20210407;
+   
+    char b;
    unsigned numBits;
         if (val < _UTF8_RANGE(3)) { numBits = 6 * 3; b = _UTF8_HEAD(3, val); }
    else if (val < _UTF8_RANGE(4)) { numBits = 6 * 4; b = _UTF8_HEAD(4, val); }
    else if (val < _UTF8_RANGE(5)) { numBits = 6 * 5; b = _UTF8_HEAD(5, val); }
-    else                           { numBits = 6 * 6; b = _UTF8_START(6); }
-    
-    *dest++ = (Byte)b;
+    #if _UTF8_NUM_TAIL_BYTES_MAX >= 6
+    else                           { numBits = 6 * 6; b = (char)_UTF8_START(6); }
+    #else
+    else
+    {
+      val = UTF_REPLACEMENT_CHAR;
+                                   { numBits = 6 * 3; b = _UTF8_HEAD(3, val); }
+    }
+    #endif
+
+    *dest++ = b;
    
    do
    {
@@ -269,20 +657,207 @@ static char *Utf16_To_Utf8(char *dest, const wchar_t *src, const wchar_t *srcLim
  }
 }

-bool ConvertUTF8ToUnicode(const AString &src, UString &dest)
+bool Convert_UTF8_Buf_To_Unicode(const char *src, size_t srcSize, UString &dest, unsigned flags)
 {
  dest.Empty();
  size_t destLen = 0;
-  Utf8_To_Utf16(NULL, &destLen, src, src.Ptr(src.Len()));
-  bool res = Utf8_To_Utf16(dest.GetBuf((unsigned)destLen), &destLen, src, src.Ptr(src.Len()));
+  Utf8_To_Utf16(NULL, &destLen, src, src + srcSize, flags);
+  bool res = Utf8_To_Utf16(dest.GetBuf((unsigned)destLen), &destLen, src, src + srcSize, flags);
  dest.ReleaseBuf_SetEnd((unsigned)destLen);
  return res;
 }

+bool ConvertUTF8ToUnicode_Flags(const AString &src, UString &dest, unsigned flags)
+{
+  return Convert_UTF8_Buf_To_Unicode(src, src.Len(), dest,  flags);
+}
+
+
+static
+unsigned g_UTF8_To_Unicode_Flags =
+    UTF_FLAG__FROM_UTF8__USE_ESCAPE
+  #ifndef _WCHART_IS_16BIT
+    | UTF_FLAG__FROM_UTF8__SURROGATE_ERROR
+  #ifdef _UTF8_RAW_NON_UTF8_SUPPORTED
+    | UTF_FLAG__FROM_UTF8__BMP_ESCAPE_CONVERT
+  #endif
+  #endif
+    ;
+    
+
+/*
+bool ConvertUTF8ToUnicode_boolRes(const AString &src, UString &dest)
+{
+  return ConvertUTF8ToUnicode_Flags(src, dest, g_UTF8_To_Unicode_Flags);
+}
+*/
+
+bool ConvertUTF8ToUnicode(const AString &src, UString &dest)
+{
+  return ConvertUTF8ToUnicode_Flags(src, dest, g_UTF8_To_Unicode_Flags);
+}
+
+void Print_UString(const UString &a);
+
+void ConvertUnicodeToUTF8_Flags(const UString &src, AString &dest, unsigned flags)
+{
+  /*
+  if (src.Len()== 24)
+    throw "202104";
+  */
+  dest.Empty();
+  const size_t destLen = Utf16_To_Utf8_Calc(src, src.Ptr(src.Len()), flags);
+  char *destStart = dest.GetBuf((unsigned)destLen);
+  const char *destEnd = Utf16_To_Utf8(destStart, src, src.Ptr(src.Len()), flags);
+  dest.ReleaseBuf_SetEnd((unsigned)destLen);
+  // printf("\nlen = %d\n", src.Len());
+  if (destLen != (size_t)(destEnd - destStart))
+  {
+    /*
+    // dest.ReleaseBuf_SetEnd((unsigned)(destEnd - destStart));
+    printf("\nlen = %d\n", (unsigned)destLen);
+    printf("\n(destEnd - destStart) = %d\n", (unsigned)(destEnd - destStart));
+    printf("\n");
+    // Print_UString(src);
+    printf("\n");
+    // printf("\nlen = %d\n", destLen);
+    */
+    throw 20210406;
+  }
+}
+
+
+
+unsigned g_Unicode_To_UTF8_Flags =
+      // UTF_FLAG__TO_UTF8__PARSE_HIGH_ESCAPE
+      0
+  #ifndef _WIN32
+    #ifdef _UTF8_RAW_NON_UTF8_SUPPORTED
+      | UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE
+    #else
+      | UTF_FLAG__TO_UTF8__SURROGATE_ERROR;
+    #endif
+  #endif
+    ;
+
 void ConvertUnicodeToUTF8(const UString &src, AString &dest)
 {
-  dest.Empty();
-  size_t destLen = Utf16_To_Utf8_Calc(src, src.Ptr(src.Len()));
-  Utf16_To_Utf8(dest.GetBuf((unsigned)destLen), src, src.Ptr(src.Len()));
-  dest.ReleaseBuf_SetEnd((unsigned)destLen);
+  ConvertUnicodeToUTF8_Flags(src, dest, g_Unicode_To_UTF8_Flags);
 }
+
+void Convert_Unicode_To_UTF8_Buf(const UString &src, CByteBuffer &dest)
+{
+  const unsigned flags = g_Unicode_To_UTF8_Flags;
+  dest.Free();
+  const size_t destLen = Utf16_To_Utf8_Calc(src, src.Ptr(src.Len()), flags);
+  dest.Alloc(destLen);
+  const char *destEnd = Utf16_To_Utf8((char *)(void *)(Byte *)dest, src, src.Ptr(src.Len()), flags);
+  if (destLen != (size_t)(destEnd - (char *)(void *)(Byte *)dest))
+    throw 202104;
+}
+
+/*
+
+#ifndef _WIN32
+void Convert_UTF16_To_UTF32(const UString &src, UString &dest)
+{
+  dest.Empty();
+  for (size_t i = 0; i < src.Len();)
+  {
+    wchar_t c = src[i++];
+    if (c >= 0xd800 && c < 0xdc00 && i < src.Len())
+    {
+      const wchar_t c2 = src[i];
+      if (c2 >= 0xdc00 && c2 < 0x10000)
+      {
+        // printf("\nSurragate [%d]: %4x %4x -> ", i, (int)c, (int)c2);
+        c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff);
+        // printf("%4x\n", (int)c);
+        i++;
+      }
+    }
+    dest += c;
+  }
+}
+
+void Convert_UTF32_To_UTF16(const UString &src, UString &dest)
+{
+  dest.Empty();
+  for (size_t i = 0; i < src.Len();)
+  {
+    wchar_t w = src[i++];
+    if (w >= 0x10000 && w < 0x110000)
+    {
+      w -= 0x10000;
+      dest += (wchar_t)((unsigned)0xd800 + (((unsigned)w >> 10) & 0x3ff));
+      w = 0xdc00 + (w & 0x3ff);
+    }
+    dest += w;
+  }
+}
+
+bool UTF32_IsThere_BigPoint(const UString &src)
+{
+  for (size_t i = 0; i < src.Len();)
+  {
+    const UInt32 c = (UInt32)src[i++];
+    if (c >= 0x110000)
+      return true;
+  }
+  return false;
+}
+
+bool Unicode_IsThere_BmpEscape(const UString &src)
+{
+  for (size_t i = 0; i < src.Len();)
+  {
+    const UInt32 c = (UInt32)src[i++];
+    if (IS_ESCAPE_POINT(c, 0))
+      return true;
+  }
+  return false;
+}
+
+
+#endif
+
+bool Unicode_IsThere_Utf16SurrogateError(const UString &src)
+{
+  for (size_t i = 0; i < src.Len();)
+  {
+    const UInt32 val = (UInt32)src[i++];
+    if (IS_SURROGATE_POINT(val))
+    {
+      // it's hack to UTF-8 encoding
+      if (val >= 0xdc00 || i == src.Len())
+        return true;
+      const UInt32 c2 = (UInt32)*src;
+      if (!IS_LOW_SURROGATE_POINT(c2))
+        return true;
+    }
+  }
+  return false;
+}
+*/
+
+#ifndef _WCHART_IS_16BIT
+
+void Convert_UnicodeEsc16_To_UnicodeEscHigh
+#if UTF_ESCAPE_PLANE == 0
+    (UString &) {}
+#else
+    (UString &s)
+{
+  const unsigned len = s.Len();
+  for (unsigned i = 0; i < len; i++)
+  {
+    wchar_t c = s[i];
+    if (IS_ESCAPE_POINT(c, 0))
+    {
+      c += UTF_ESCAPE_PLANE;
+      s.ReplaceOneCharAtPos(i, c);
+    }
+  }
+}
+#endif
+#endif
--- a/CPP/Common/UTFConvert.h
+++ b/CPP/Common/UTFConvert.h
@@ -3,10 +3,382 @@
 #ifndef __COMMON_UTF_CONVERT_H
 #define __COMMON_UTF_CONVERT_H

+#include "MyBuffer.h"
 #include "MyString.h"

-bool CheckUTF8(const char *src, bool allowReduced = false) throw();
-bool ConvertUTF8ToUnicode(const AString &utfString, UString &resultString);
-void ConvertUnicodeToUTF8(const UString &unicodeString, AString &resultString);
+struct CUtf8Check
+{
+  // Byte MaxByte;     // in original src stream
+  bool NonUtf;
+  bool ZeroChar;
+  bool SingleSurrogate;
+  bool Escape;
+  bool Truncated;
+  UInt32 MaxHighPoint;  // only for points >= 0x80
+
+  CUtf8Check() { Clear(); }
+
+  void Clear()
+  {
+    // MaxByte = 0;
+    NonUtf = false;
+    ZeroChar = false;
+    SingleSurrogate = false;
+    Escape = false;
+    Truncated = false;
+    MaxHighPoint = 0;
+  }
+
+  void Update(const CUtf8Check &c)
+  {
+    if (c.NonUtf) NonUtf = true;
+    if (c.ZeroChar) ZeroChar = true;
+    if (c.SingleSurrogate) SingleSurrogate = true;
+    if (c.Escape) Escape = true;
+    if (c.Truncated) Truncated = true;
+    if (MaxHighPoint < c.MaxHighPoint) MaxHighPoint = c.MaxHighPoint;
+  }
+
+  void PrintStatus(AString &s) const
+  {
+    s.Empty();
+
+    // s.Add_OptSpaced("MaxByte=");
+    // s.Add_UInt32(MaxByte);
+
+    if (NonUtf)          s.Add_OptSpaced("non-UTF8");
+    if (ZeroChar)        s.Add_OptSpaced("ZeroChar");
+    if (SingleSurrogate) s.Add_OptSpaced("SingleSurrogate");
+    if (Escape)          s.Add_OptSpaced("Escape");
+    if (Truncated)       s.Add_OptSpaced("Truncated");
+
+    if (MaxHighPoint != 0)
+    {
+      s.Add_OptSpaced("MaxUnicode=");
+      s.Add_UInt32(MaxHighPoint);
+    }
+  }
+
+
+  bool IsOK(bool allowReduced = false) const
+  {
+    if (NonUtf || SingleSurrogate || ZeroChar)
+      return false;
+    if (MaxHighPoint >= 0x110000)
+      return false;
+    if (Truncated && !allowReduced)
+      return false;
+    return true;
+  }
+
+  // it checks full buffer as specified in (size) and it doesn't stop on zero char
+  void Check_Buf(const char *src, size_t size) throw();
+
+  void Check_AString(const AString &s) throw()
+  {
+    Check_Buf(s.Ptr(), s.Len());
+  }
+};
+
+/*
+if (allowReduced == false) - all UTF-8 character sequences must be finished.
+if (allowReduced == true)  - it allows truncated last character-Utf8-sequence
+*/
+
+bool Check_UTF8_Buf(const char *src, size_t size, bool allowReduced) throw();
+bool CheckUTF8_AString(const AString &s) throw();
+
+#define UTF_FLAG__FROM_UTF8__SURROGATE_ERROR    (1 << 0)
+#define UTF_FLAG__FROM_UTF8__USE_ESCAPE         (1 << 1)
+#define UTF_FLAG__FROM_UTF8__BMP_ESCAPE_CONVERT (1 << 2)
+
+/*
+UTF_FLAG__FROM_UTF8__SURROGATE_ERROR
+
+   if (flag is NOT set)
+   {
+     it processes SINGLE-SURROGATE-8 as valid Unicode point.
+     it converts  SINGLE-SURROGATE-8 to SINGLE-SURROGATE-16
+     Note: some sequencies of two SINGLE-SURROGATE-8 points
+           will generate correct SURROGATE-16-PAIR, and
+           that SURROGATE-16-PAIR later will be converted to correct
+           UTF8-SURROGATE-21 point. So we don't restore original
+           STR-8 sequence in that case.
+   }
+   
+   if (flag is set)
+   {
+     if (UTF_FLAG__FROM_UTF8__USE_ESCAPE is defined)
+        it generates ESCAPE for SINGLE-SURROGATE-8,
+     if (UTF_FLAG__FROM_UTF8__USE_ESCAPE is not defined)
+        it generates U+fffd for SINGLE-SURROGATE-8,
+   }
+
+
+UTF_FLAG__FROM_UTF8__USE_ESCAPE
+
+   if (flag is NOT set)
+     it generates (U+fffd) code for non-UTF-8 (invalid) characters
+
+   if (flag is set)
+   {
+     It generates (ESCAPE) codes for NON-UTF-8 (invalid) characters.
+     And later we can restore original UTF-8-RAW characters from (ESCAPE-16-21) codes.
+   }
+
+UTF_FLAG__FROM_UTF8__BMP_ESCAPE_CONVERT
+
+   if (flag is NOT set)
+   {
+     it process ESCAPE-8 points as another Unicode points.
+     In Linux: ESCAPE-16 will mean two different ESCAPE-8 seqences,
+       so we need HIGH-ESCAPE-PLANE-21 to restore UTF-8-RAW -> UTF-16 -> UTF-8-RAW
+   }
+
+   if (flag is set)
+   {
+     it generates ESCAPE-16-21 for ESCAPE-8 points
+     so we can restore UTF-8-RAW -> UTF-16 -> UTF-8-RAW without HIGH-ESCAPE-PLANE-21.
+   }
+
+
+Main USE CASES with UTF-8 <-> UTF-16 conversions:
+
+ WIN32:   UTF-16-RAW -> UTF-8 (Archive) -> UTF-16-RAW
+   {
+            set UTF_FLAG__FROM_UTF8__USE_ESCAPE
+     Do NOT set UTF_FLAG__FROM_UTF8__SURROGATE_ERROR
+     Do NOT set UTF_FLAG__FROM_UTF8__BMP_ESCAPE_CONVERT
+     
+     So we restore original SINGLE-SURROGATE-16 from single SINGLE-SURROGATE-8.
+   }
+
+ Linux:   UTF-8-RAW -> UTF-16 (Intermediate / Archive) -> UTF-8-RAW
+   {
+     we want restore original UTF-8-RAW sequence later from that ESCAPE-16.
+     Set the flags:
+       UTF_FLAG__FROM_UTF8__SURROGATE_ERROR
+       UTF_FLAG__FROM_UTF8__USE_ESCAPE
+       UTF_FLAG__FROM_UTF8__BMP_ESCAPE_CONVERT
+   }
+
+ MacOS:   UTF-8-RAW -> UTF-16 (Intermediate / Archive) -> UTF-8-RAW
+   {
+     we want to restore correct UTF-8 without any BMP processing:
+     Set the flags:
+       UTF_FLAG__FROM_UTF8__SURROGATE_ERROR
+       UTF_FLAG__FROM_UTF8__USE_ESCAPE
+   }
+
+*/
+
+// zero char is not allowed in (src) buf
+bool Convert_UTF8_Buf_To_Unicode(const char *src, size_t srcSize, UString &dest, unsigned flags = 0);
+
+bool ConvertUTF8ToUnicode_Flags(const AString &src, UString &dest, unsigned flags = 0);
+bool ConvertUTF8ToUnicode(const AString &src, UString &dest);
+
+#define UTF_FLAG__TO_UTF8__SURROGATE_ERROR    (1 << 8)
+#define UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE (1 << 9)
+// #define UTF_FLAG__TO_UTF8__PARSE_HIGH_ESCAPE  (1 << 10)
+
+/*
+UTF_FLAG__TO_UTF8__SURROGATE_ERROR
+
+  if (flag is NOT set)
+  {
+     we extract SINGLE-SURROGATE as normal UTF-8
+     
+     In Windows : for UTF-16-RAW <-> UTF-8 (archive) <-> UTF-16-RAW in .
+     
+     In Linux :
+       use-case-1: UTF-8 -> UTF-16 -> UTF-8  doesn't generate UTF-16 SINGLE-SURROGATE,
+                   if (UTF_FLAG__FROM_UTF8__SURROGATE_ERROR) is used.
+       use-case 2: UTF-16-7z (with SINGLE-SURROGATE from Windows) -> UTF-8 (Linux)
+                   will generate SINGLE-SURROGATE-UTF-8 here.
+  }
+
+  if (flag is set)
+  {
+     we generate UTF_REPLACEMENT_CHAR (0xfffd) for SINGLE_SURROGATE
+     it can be used for compatibility mode with WIN32 UTF function
+     or if we want UTF-8 stream without any errors
+  }
+
+
+UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE
+  
+  if (flag is NOT set) it doesn't extract  raw 8-bit symbol from Escape-Plane-16
+  if (flag is set)     it         extracts raw 8-bit symbol from Escape-Plane-16
+
+  in Linux we need some way to extract NON-UTF8 RAW 8-bits from BMP (UTF-16 7z archive):
+  if (we       use High-Escape-Plane), we can transfer BMP escapes to High-Escape-Plane.
+  if (we don't use High-Escape-Plane), we must use UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE.
+    
+
+UTF_FLAG__TO_UTF8__PARSE_HIGH_ESCAPE
+  // that flag affects the code only if (wchar_t is 32-bit)
+  // that mode with high-escape can be disabled now in UTFConvert.cpp
+  if (flag is NOT set)
+     it doesn't extract raw 8-bit symbol from High-Escape-Plane
+  if (flag is set)
+     it        extracts raw 8-bit symbol from High-Escape-Plane
+
+Main use cases:
+
+WIN32 : UTF-16-RAW -> UTF-8 (archive) -> UTF-16-RAW
+   {
+     Do NOT set UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE.
+     Do NOT set UTF_FLAG__TO_UTF8__SURROGATE_ERROR.
+     So we restore original UTF-16-RAW.
+   }
+
+Linix : UTF-8 with Escapes -> UTF-16 (7z archive) -> UTF-8 with Escapes
+     set UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE to extract non-UTF from 7z archive
+     set UTF_FLAG__TO_UTF8__PARSE_HIGH_ESCAPE for intermediate UTF-16.
+     Note: high esacape mode can be ignored now in UTFConvert.cpp
+
+macOS:
+     the system doesn't support incorrect UTF-8 in file names.
+     set UTF_FLAG__TO_UTF8__SURROGATE_ERROR
+*/
+
+extern unsigned g_Unicode_To_UTF8_Flags;
+
+void ConvertUnicodeToUTF8_Flags(const UString &src, AString &dest, unsigned flags = 0);
+void ConvertUnicodeToUTF8(const UString &src, AString &dest);
+
+void Convert_Unicode_To_UTF8_Buf(const UString &src, CByteBuffer &dest);
+
+/*
+#ifndef _WIN32
+void Convert_UTF16_To_UTF32(const UString &src, UString &dest);
+void Convert_UTF32_To_UTF16(const UString &src, UString &dest);
+bool UTF32_IsThere_BigPoint(const UString &src);
+bool Unicode_IsThere_BmpEscape(const UString &src);
+#endif
+
+bool Unicode_IsThere_Utf16SurrogateError(const UString &src);
+*/
+
+#ifdef _WCHART_IS_16BIT
+#define Convert_UnicodeEsc16_To_UnicodeEscHigh(s)
+#else
+void Convert_UnicodeEsc16_To_UnicodeEscHigh(UString &s);
+#endif
+
+/*
+// #include "../../C/CpuArch.h"
+
+// ---------- Utf16 Little endian functions ----------
+
+// We store 16-bit surrogates even in 32-bit WCHARs in Linux.
+// So now we don't use the following code:
+
+#if WCHAR_MAX > 0xffff
+
+// void *p     : pointer to src bytes stream
+// size_t len  : num Utf16 characters : it can include or not include NULL character
+
+inline size_t Utf16LE__Get_Num_WCHARs(const void *p, size_t len)
+{
+  #if WCHAR_MAX > 0xffff
+  size_t num_wchars = 0;
+  for (size_t i = 0; i < len; i++)
+  {
+    wchar_t c = GetUi16(p);
+    p = (const void *)((const Byte *)p + 2);
+    if (c >= 0xd800 && c < 0xdc00 && i + 1 != len)
+    {
+      wchar_t c2 = GetUi16(p);
+      if (c2 >= 0xdc00 && c2 < 0xe000)
+      {
+        c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff);
+        p = (const void *)((const Byte *)p + 2);
+        i++;
+      }
+    }
+    num_wchars++;
+  }
+  return num_wchars;
+  #else
+  UNUSED_VAR(p)
+  return len;
+  #endif
+}
+
+// #include <stdio.h>
+
+inline wchar_t *Utf16LE__To_WCHARs_Sep(const void *p, size_t len, wchar_t *dest)
+{
+  for (size_t i = 0; i < len; i++)
+  {
+    wchar_t c = GetUi16(p);
+    p = (const void *)((const Byte *)p + 2);
+    
+    #if WCHAR_PATH_SEPARATOR != L'/'
+    if (c == L'/')
+      c = WCHAR_PATH_SEPARATOR;
+    #endif
+    
+    #if WCHAR_MAX > 0xffff
+    
+    if (c >= 0xd800 && c < 0xdc00 && i + 1 != len)
+    {
+      wchar_t c2 = GetUi16(p);
+      if (c2 >= 0xdc00 && c2 < 0xe000)
+      {
+        // printf("\nSurragate : %4x %4x -> ", (int)c, (int)c2);
+        c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff);
+        p = (const void *)((const Byte *)p + 2);
+        i++;
+        // printf("%4x\n", (int)c);
+      }
+    }
+    
+    #endif
+    
+    *dest++ = c;
+  }
+  return dest;
+}
+
+
+inline size_t Get_Num_Utf16_chars_from_wchar_string(const wchar_t *p)
+{
+  size_t num = 0;
+  for (;;)
+  {
+    wchar_t c = *p++;
+    if (c == 0)
+      return num;
+    num += ((c >= 0x10000 && c < 0x110000) ? 2 : 1);
+  }
+  return num;
+}
+
+inline Byte *wchars_to_Utf16LE(const wchar_t *p, Byte *dest)
+{
+  for (;;)
+  {
+    wchar_t c = *p++;
+    if (c == 0)
+      return dest;
+    if (c >= 0x10000 && c < 0x110000)
+    {
+      SetUi16(dest    , (UInt16)(0xd800 + ((c >> 10) & 0x3FF)));
+      SetUi16(dest + 2, (UInt16)(0xdc00 + ( c        & 0x3FF)));
+      dest += 4;
+    }
+    else
+    {
+      SetUi16(dest, c);
+      dest += 2;
+    }
+  }
+}
+
+#endif
+*/

 #endif
--- a/CPP/Common/Wildcard.cpp
+++ b/CPP/Common/Wildcard.cpp
@@ -4,9 +4,17 @@

 #include "Wildcard.h"

+extern
+bool g_CaseSensitive;
 bool g_CaseSensitive =
  #ifdef _WIN32
    false;
+  #elif defined (__APPLE__)
+    #ifdef TARGET_OS_IPHONE
+      true;
+    #else
+      false;
+    #endif
  #else
    true;
  #endif
@@ -19,8 +27,16 @@ bool IsPath1PrefixedByPath2(const wchar_t *s1, const wchar_t *s2)
  return IsString1PrefixedByString2_NoCase(s1, s2);
 }

+// #include <stdio.h>
+
 int CompareFileNames(const wchar_t *s1, const wchar_t *s2) STRING_UNICODE_THROW
 {
+  /*
+  printf("\nCompareFileNames");
+  printf("\n S1: %ls", s1);
+  printf("\n S2: %ls", s2);
+  printf("\n");
+  */
  if (g_CaseSensitive)
    return MyStringCompare(s1, s2);
  return MyStringCompareNoCase(s1, s2);
@@ -131,7 +147,7 @@ UString ExtractDirPrefixFromPath(const UString &path)

 UString ExtractFileNameFromPath(const UString &path)
 {
-  return UString(path.Ptr(path.ReverseFind_PathSepar() + 1));
+  return UString(path.Ptr((unsigned)(path.ReverseFind_PathSepar() + 1)));
 }


@@ -229,12 +245,12 @@ bool CItem::CheckPath(const UStringVector &pathParts, bool isFile) const
    {
      if (WildcardMatching)
      {
-        if (!DoesWildcardMatchName(PathParts[i], pathParts[i + d]))
+        if (!DoesWildcardMatchName(PathParts[i], pathParts[i + (unsigned)d]))
          break;
      }
      else
      {
-        if (CompareFileNames(PathParts[i], pathParts[i + d]) != 0)
+        if (CompareFileNames(PathParts[i], pathParts[i + (unsigned)d]) != 0)
          break;
      }
    }
@@ -258,16 +274,14 @@ int CCensorNode::FindSubNode(const UString &name) const
 {
  FOR_VECTOR (i, SubNodes)
    if (CompareFileNames(SubNodes[i].Name, name) == 0)
-      return i;
+      return (int)i;
  return -1;
 }

 void CCensorNode::AddItemSimple(bool include, CItem &item)
 {
-  if (include)
-    IncludeItems.Add(item);
-  else
-    ExcludeItems.Add(item);
+  CObjectVector<CItem> &items = include ? IncludeItems : ExcludeItems;
+  items.Add(item);
 }

 void CCensorNode::AddItem(bool include, CItem &item, int ignoreWildcardIndex)
@@ -282,6 +296,7 @@ void CCensorNode::AddItem(bool include, CItem &item, int ignoreWildcardIndex)
    AddItemSimple(include, item);
    return;
  }
+
  const UString &front = item.PathParts.Front();
  
  // WIN32 doesn't support wildcards in file names
@@ -292,11 +307,9 @@ void CCensorNode::AddItem(bool include, CItem &item, int ignoreWildcardIndex)
    AddItemSimple(include, item);
    return;
  }
-  int index = FindSubNode(front);
-  if (index < 0)
-    index = SubNodes.Add(CCensorNode(front, this));
+  CCensorNode &subNode = Find_SubNode_Or_Add_New(front);
  item.PathParts.Delete(0);
-  SubNodes[index].AddItem(include, item, ignoreWildcardIndex - 1);
+  subNode.AddItem(include, item, ignoreWildcardIndex - 1);
 }

 void CCensorNode::AddItem(bool include, const UString &path, bool recursive, bool forFile, bool forDir, bool wildcardMatching)
@@ -347,18 +360,19 @@ bool CCensorNode::CheckPathVect(const UStringVector &pathParts, bool isFile, boo
    include = false;
    return true;
  }
-  include = true;
-  bool finded = CheckPathCurrent(true, pathParts, isFile);
-  if (pathParts.Size() <= 1)
-    return finded;
-  int index = FindSubNode(pathParts.Front());
-  if (index >= 0)
+  if (pathParts.Size() > 1)
  {
-    UStringVector pathParts2 = pathParts;
-    pathParts2.Delete(0);
-    if (SubNodes[index].CheckPathVect(pathParts2, isFile, include))
-      return true;
+    int index = FindSubNode(pathParts.Front());
+    if (index >= 0)
+    {
+      UStringVector pathParts2 = pathParts;
+      pathParts2.Delete(0);
+      if (SubNodes[(unsigned)index].CheckPathVect(pathParts2, isFile, include))
+        return true;
+    }
  }
+  bool finded = CheckPathCurrent(true, pathParts, isFile);
+  include = finded; // if (!finded), then (true) is allowed also
  return finded;
 }

@@ -394,14 +408,26 @@ bool CCensorNode::CheckPath(bool isAltStream, const UString &path, bool isFile)
 }
 */

-bool CCensorNode::CheckPathToRoot(bool include, UStringVector &pathParts, bool isFile) const
+bool CCensorNode::CheckPathToRoot_Change(bool include, UStringVector &pathParts, bool isFile) const
 {
  if (CheckPathCurrent(include, pathParts, isFile))
    return true;
-  if (Parent == 0)
+  if (!Parent)
    return false;
  pathParts.Insert(0, Name);
-  return Parent->CheckPathToRoot(include, pathParts, isFile);
+  return Parent->CheckPathToRoot_Change(include, pathParts, isFile);
+}
+
+bool CCensorNode::CheckPathToRoot(bool include, const UStringVector &pathParts, bool isFile) const
+{
+  if (CheckPathCurrent(include, pathParts, isFile))
+    return true;
+  if (!Parent)
+    return false;
+  UStringVector pathParts2;
+  pathParts2.Add(Name);
+  pathParts2 += pathParts;
+  return Parent->CheckPathToRoot_Change(include, pathParts2, isFile);
 }

 /*
@@ -434,18 +460,15 @@ void CCensorNode::ExtendExclude(const CCensorNode &fromNodes)
  FOR_VECTOR (i, fromNodes.SubNodes)
  {
    const CCensorNode &node = fromNodes.SubNodes[i];
-    int subNodeIndex = FindSubNode(node.Name);
-    if (subNodeIndex < 0)
-      subNodeIndex = SubNodes.Add(CCensorNode(node.Name, this));
-    SubNodes[subNodeIndex].ExtendExclude(node);
+    Find_SubNode_Or_Add_New(node.Name).ExtendExclude(node);
  }
 }

-int CCensor::FindPrefix(const UString &prefix) const
+int CCensor::FindPairForPrefix(const UString &prefix) const
 {
  FOR_VECTOR (i, Pairs)
    if (CompareFileNames(Pairs[i].Prefix, prefix) == 0)
-      return i;
+      return (int)i;
  return -1;
 }

@@ -454,7 +477,11 @@ int CCensor::FindPrefix(const UString &prefix) const
 bool IsDriveColonName(const wchar_t *s)
 {
  wchar_t c = s[0];
-  return c != 0 && s[1] == ':' && s[2] == 0 && (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z');
+  return c != 0
+    && s[1] == ':'
+    && s[2] == 0
+    && ((c >= 'a' && c <= 'z')
+     || (c >= 'A' && c <= 'Z'));
 }

 unsigned GetNumPrefixParts_if_DrivePath(UStringVector &pathParts)
@@ -571,14 +598,16 @@ void CCensor::AddItem(ECensorPathMode pathMode, bool include, const UString &pat
      {
        const UString &part = pathParts[i];
        if (part == L".." || part == L".")
-          dotsIndex = i;
+          dotsIndex = (int)i;
      }

      if (dotsIndex >= 0)
+      {
        if (dotsIndex == (int)pathParts.Size() - 1)
          numSkipParts = pathParts.Size();
        else
          numSkipParts = pathParts.Size() - 1;
+      }
    }

    for (unsigned i = 0; i < numSkipParts; i++)
@@ -596,13 +625,16 @@ void CCensor::AddItem(ECensorPathMode pathMode, bool include, const UString &pat
    }
  }

-  int index = FindPrefix(prefix);
+  int index = FindPairForPrefix(prefix);
  if (index < 0)
-    index = Pairs.Add(CPair(prefix));
+  {
+    index = (int)Pairs.Size();
+    Pairs.AddNew().Prefix = prefix;
+  }

  if (pathMode != k_AbsPath)
  {
-    if (pathParts.IsEmpty() || pathParts.Size() == 1 && pathParts[0].IsEmpty())
+    if (pathParts.IsEmpty() || (pathParts.Size() == 1 && pathParts[0].IsEmpty()))
    {
      // we create universal item, if we skip all parts as prefix (like \ or L:\ )
      pathParts.Clear();
@@ -619,7 +651,7 @@ void CCensor::AddItem(ECensorPathMode pathMode, bool include, const UString &pat
  item.ForFile = forFile;
  item.Recursive = recursive;
  item.WildcardMatching = wildcardMatching;
-  Pairs[index].Head.AddItem(include, item, ignoreWildcardIndex);
+  Pairs[(unsigned)index].Head.AddItem(include, item, ignoreWildcardIndex);
 }

 /*
--- a/CPP/Common/Wildcard.h
+++ b/CPP/Common/Wildcard.h
@@ -51,23 +51,34 @@ struct CItem
  bool CheckPath(const UStringVector &pathParts, bool isFile) const;
 };

-class CCensorNode
+
+class CCensorNode  MY_UNCOPYABLE
 {
  CCensorNode *Parent;
  
  bool CheckPathCurrent(bool include, const UStringVector &pathParts, bool isFile) const;
  void AddItemSimple(bool include, CItem &item);
 public:
-  bool CheckPathVect(const UStringVector &pathParts, bool isFile, bool &include) const;
-
-  CCensorNode(): Parent(0) { };
-  CCensorNode(const UString &name, CCensorNode *parent): Name(name), Parent(parent) { };
+  CCensorNode(): Parent(NULL) { };
+  CCensorNode(const UString &name, CCensorNode *parent): Parent(parent), Name(name)  { };

  UString Name; // WIN32 doesn't support wildcards in file names
  CObjectVector<CCensorNode> SubNodes;
  CObjectVector<CItem> IncludeItems;
  CObjectVector<CItem> ExcludeItems;

+  CCensorNode &Find_SubNode_Or_Add_New(const UString &name)
+  {
+    int i = FindSubNode(name);
+    if (i >= 0)
+      return SubNodes[(unsigned)i];
+    // return SubNodes.Add(CCensorNode(name, this));
+    CCensorNode &node = SubNodes.AddNew();
+    node.Parent = this;
+    node.Name = name;
+    return node;
+  }
+
  bool AreAllAllowed() const;

  int FindSubNode(const UString &path) const;
@@ -76,25 +87,41 @@ public:
  void AddItem(bool include, const UString &path, bool recursive, bool forFile, bool forDir, bool wildcardMatching);
  void AddItem2(bool include, const UString &path, bool recursive, bool wildcardMatching);

+  // NeedCheckSubDirs() returns true, if there are IncludeItems rules that affect items in subdirs
  bool NeedCheckSubDirs() const;
  bool AreThereIncludeItems() const;

+  /*
+  CheckPathVect() doesn't check path in Parent CCensorNode
+  so use CheckPathVect() for root CCensorNode
+  OUT:
+    returns (true) && (include = false) - file in exlude list
+    returns (true) && (include = true)  - file in include list and is not in exlude list
+    returns (false)  - file is not in (include/exlude) list
+  */
+  bool CheckPathVect(const UStringVector &pathParts, bool isFile, bool &include) const;
+
  // bool CheckPath2(bool isAltStream, const UString &path, bool isFile, bool &include) const;
  // bool CheckPath(bool isAltStream, const UString &path, bool isFile) const;

-  bool CheckPathToRoot(bool include, UStringVector &pathParts, bool isFile) const;
+  // CheckPathToRoot_Change() changes pathParts !!!
+  bool CheckPathToRoot_Change(bool include, UStringVector &pathParts, bool isFile) const;
+  bool CheckPathToRoot(bool include, const UStringVector &pathParts, bool isFile) const;
+
  // bool CheckPathToRoot(const UString &path, bool isFile, bool include) const;
  void ExtendExclude(const CCensorNode &fromNodes);
 };

-struct CPair
+
+struct CPair  MY_UNCOPYABLE
 {
  UString Prefix;
  CCensorNode Head;
  
-  CPair(const UString &prefix): Prefix(prefix) { };
+  // CPair(const UString &prefix): Prefix(prefix) { };
 };

+
 enum ECensorPathMode
 {
  k_RelatPath,  // absolute prefix as Prefix, remain path in Tree
@@ -102,6 +129,7 @@ enum ECensorPathMode
  k_AbsPath     // full path in Tree
 };

+
 struct CCensorPath
 {
  UString Path;
@@ -116,9 +144,10 @@ struct CCensorPath
    {}
 };

-class CCensor
+
+class CCensor  MY_UNCOPYABLE
 {
-  int FindPrefix(const UString &prefix) const;
+  int FindPairForPrefix(const UString &prefix) const;
 public:
  CObjectVector<CPair> Pairs;

@@ -143,7 +172,6 @@ public:
  }
 };

-
 }

 #endif