This commit is contained in:
Igor Pavlov
2021-07-22 23:00:14 +01:00
committed by Kornel
parent 4a960640a3
commit 585698650f
619 changed files with 34904 additions and 10859 deletions

View File

@@ -4,4 +4,4 @@
#include "../../C/7zCrc.h"
struct CCRCTableInit { CCRCTableInit() { CrcGenerateTable(); } } g_CRCTableInit;
static struct CCRCTableInit { CCRCTableInit() { CrcGenerateTable(); } } g_CRCTableInit;

View File

@@ -1,92 +1,3 @@
// Common/C_FileIO.cpp
#include "C_FileIO.h"
#include <fcntl.h>
#ifdef _WIN32
#include <io.h>
#else
#include <unistd.h>
#endif
namespace NC {
namespace NFile {
namespace NIO {
bool CFileBase::OpenBinary(const char *name, int flags)
{
#ifdef O_BINARY
flags |= O_BINARY;
#endif
Close();
_handle = ::open(name, flags, 0666);
return _handle != -1;
}
bool CFileBase::Close()
{
if (_handle == -1)
return true;
if (close(_handle) != 0)
return false;
_handle = -1;
return true;
}
bool CFileBase::GetLength(UInt64 &length) const
{
off_t curPos = Seek(0, SEEK_CUR);
off_t lengthTemp = Seek(0, SEEK_END);
Seek(curPos, SEEK_SET);
length = (UInt64)lengthTemp;
return true;
}
off_t CFileBase::Seek(off_t distanceToMove, int moveMethod) const
{
return ::lseek(_handle, distanceToMove, moveMethod);
}
/////////////////////////
// CInFile
bool CInFile::Open(const char *name)
{
return CFileBase::OpenBinary(name, O_RDONLY);
}
bool CInFile::OpenShared(const char *name, bool)
{
return Open(name);
}
ssize_t CInFile::Read(void *data, size_t size)
{
return read(_handle, data, size);
}
/////////////////////////
// COutFile
bool COutFile::Create(const char *name, bool createAlways)
{
if (createAlways)
{
Close();
_handle = ::creat(name, 0666);
return _handle != -1;
}
return OpenBinary(name, O_CREAT | O_EXCL | O_WRONLY);
}
bool COutFile::Open(const char *name, DWORD creationDisposition)
{
return Create(name, false);
}
ssize_t COutFile::Write(const void *data, size_t size)
{
return write(_handle, data, size);
}
}}}
#include "StdAfx.h"

View File

@@ -3,51 +3,4 @@
#ifndef __COMMON_C_FILEIO_H
#define __COMMON_C_FILEIO_H
#include <stdio.h>
#include <sys/types.h>
#include "MyTypes.h"
#include "MyWindows.h"
#ifdef _WIN32
#ifdef _MSC_VER
typedef size_t ssize_t;
#endif
#endif
namespace NC {
namespace NFile {
namespace NIO {
class CFileBase
{
protected:
int _handle;
bool OpenBinary(const char *name, int flags);
public:
CFileBase(): _handle(-1) {};
~CFileBase() { Close(); }
bool Close();
bool GetLength(UInt64 &length) const;
off_t Seek(off_t distanceToMove, int moveMethod) const;
};
class CInFile: public CFileBase
{
public:
bool Open(const char *name);
bool OpenShared(const char *name, bool shareForWrite);
ssize_t Read(void *data, size_t size);
};
class COutFile: public CFileBase
{
public:
bool Create(const char *name, bool createAlways);
bool Open(const char *name, DWORD creationDisposition);
ssize_t Write(const void *data, size_t size);
};
}}}
#endif

View File

@@ -84,7 +84,7 @@ bool CParser::ParseString(const UString &s, const CSwitchForm *switchForms, unsi
if (IsString1PrefixedByString2_NoCase_Ascii((const wchar_t *)s + pos, key))
{
switchIndex = i;
maxLen = switchLen;
maxLen = (int)switchLen;
}
}
@@ -94,7 +94,7 @@ bool CParser::ParseString(const UString &s, const CSwitchForm *switchForms, unsi
return false;
}
pos += maxLen;
pos += (unsigned)maxLen;
CSwitchResult &sw = _switches[switchIndex];
const CSwitchForm &form = switchForms[switchIndex];
@@ -107,7 +107,7 @@ bool CParser::ParseString(const UString &s, const CSwitchForm *switchForms, unsi
sw.ThereIs = true;
int rem = s.Len() - pos;
const unsigned rem = s.Len() - pos;
if (rem < form.MinLen)
{
ErrorMessage = "Too short switch:";
@@ -178,7 +178,7 @@ bool CParser::ParseStrings(const CSwitchForm *switchForms, unsigned numSwitches,
{
if (s.IsEqualTo(kStopSwitchParsing))
{
StopSwitchIndex = NonSwitchStrings.Size();
StopSwitchIndex = (int)NonSwitchStrings.Size();
continue;
}
if (!s.IsEmpty() && IsItSwitchChar(s[0]))

View File

@@ -38,7 +38,7 @@ struct CSwitchResult
int PostCharIndex;
UStringVector PostStrings;
CSwitchResult(): ThereIs(false) {};
CSwitchResult(): ThereIs(false) {}
};
class CParser

View File

@@ -40,4 +40,18 @@ you can change this h file or h files included in this file.
#define MY_ARRAY_NEW(p, T, size) p = new T[size];
#endif
#if (defined(__GNUC__) && (__GNUC__ >= 8))
#define MY_ATTR_NORETURN __attribute__((noreturn))
#elif (defined(__clang__) && (__clang_major__ >= 3))
#if __has_feature(cxx_attributes)
#define MY_ATTR_NORETURN [[noreturn]]
#else
#define MY_ATTR_NORETURN __attribute__ ((noreturn))
#endif
#elif (defined(_MSC_VER) && (_MSC_VER >= 1900))
#define MY_ATTR_NORETURN [[noreturn]]
#else
#define MY_ATTR_NORETURN
#endif
#endif

View File

@@ -16,8 +16,10 @@ typedef UInt32 (MY_FAST_CALL *CRC_FUNC)(UInt32 v, const void *data, size_t size,
UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table);
extern CRC_FUNC g_CrcUpdate;
extern CRC_FUNC g_CrcUpdateT8;
extern CRC_FUNC g_CrcUpdateT4;
extern CRC_FUNC g_CrcUpdateT8;
extern CRC_FUNC g_CrcUpdateT0_32;
extern CRC_FUNC g_CrcUpdateT0_64;
EXTERN_C_END
@@ -41,25 +43,20 @@ public:
bool CCrcHasher::SetFunctions(UInt32 tSize)
{
_updateFunc = g_CrcUpdate;
CRC_FUNC f = NULL;
if (tSize == 0) f = g_CrcUpdate;
else if (tSize == 1) f = CrcUpdateT1;
else if (tSize == 4) f = g_CrcUpdateT4;
else if (tSize == 8) f = g_CrcUpdateT8;
else if (tSize == 32) f = g_CrcUpdateT0_32;
else if (tSize == 64) f = g_CrcUpdateT0_64;
if (tSize == 1)
_updateFunc = CrcUpdateT1;
else if (tSize == 4)
if (!f)
{
if (g_CrcUpdateT4)
_updateFunc = g_CrcUpdateT4;
else
return false;
_updateFunc = g_CrcUpdate;
return false;
}
else if (tSize == 8)
{
if (g_CrcUpdateT8)
_updateFunc = g_CrcUpdateT8;
else
return false;
}
_updateFunc = f;
return true;
}

View File

@@ -10,6 +10,7 @@ template <class T> inline int MyCompare(T a, T b)
{ return a == b ? 0 : (a < b ? -1 : 1); }
inline int BoolToInt(bool v) { return (v ? 1 : 0); }
inline unsigned BoolToUInt(bool v) { return (v ? (unsigned)1 : (unsigned)0); }
inline bool IntToBool(int v) { return (v != 0); }
#endif

View File

@@ -51,7 +51,7 @@ CDynLimBuf & CDynLimBuf::operator+=(char c) throw()
_chars = newBuf;
_size = n;
}
_chars[_pos++] = c;
_chars[_pos++] = (Byte)c;
return *this;
}

View File

@@ -54,7 +54,7 @@ public:
memcpy(GetCurPtrAndGrow(size), data, size * sizeof(T));
}
const size_t GetPos() const { return _pos; }
size_t GetPos() const { return _pos; }
// void Empty() { _pos = 0; }
};

View File

@@ -10,20 +10,20 @@
unsigned char temp[tempSize]; unsigned i = 0; \
while (val >= 10) { temp[i++] = (unsigned char)('0' + (unsigned)(val % 10)); val /= 10; } \
*s++ = (charType)('0' + (unsigned)val); \
while (i != 0) { i--; *s++ = temp[i]; } \
*s = 0;
while (i != 0) { i--; *s++ = (charType)temp[i]; } \
*s = 0; \
return s;
void ConvertUInt32ToString(UInt32 val, char *s) throw()
char * ConvertUInt32ToString(UInt32 val, char *s) throw()
{
CONVERT_INT_TO_STR(char, 16);
}
void ConvertUInt64ToString(UInt64 val, char *s) throw()
char * ConvertUInt64ToString(UInt64 val, char *s) throw()
{
if (val <= (UInt32)0xFFFFFFFF)
{
ConvertUInt32ToString((UInt32)val, s);
return;
return ConvertUInt32ToString((UInt32)val, s);
}
CONVERT_INT_TO_STR(char, 24);
}
@@ -119,17 +119,16 @@ void ConvertUInt32ToHex8Digits(UInt32 val, wchar_t *s)
}
*/
void ConvertUInt32ToString(UInt32 val, wchar_t *s) throw()
wchar_t * ConvertUInt32ToString(UInt32 val, wchar_t *s) throw()
{
CONVERT_INT_TO_STR(wchar_t, 16);
}
void ConvertUInt64ToString(UInt64 val, wchar_t *s) throw()
wchar_t * ConvertUInt64ToString(UInt64 val, wchar_t *s) throw()
{
if (val <= (UInt32)0xFFFFFFFF)
{
ConvertUInt32ToString((UInt32)val, s);
return;
return ConvertUInt32ToString((UInt32)val, s);
}
CONVERT_INT_TO_STR(wchar_t, 24);
}
@@ -141,7 +140,7 @@ void ConvertInt64ToString(Int64 val, char *s) throw()
*s++ = '-';
val = -val;
}
ConvertUInt64ToString(val, s);
ConvertUInt64ToString((UInt64)val, s);
}
void ConvertInt64ToString(Int64 val, wchar_t *s) throw()
@@ -151,7 +150,7 @@ void ConvertInt64ToString(Int64 val, wchar_t *s) throw()
*s++ = L'-';
val = -val;
}
ConvertUInt64ToString(val, s);
ConvertUInt64ToString((UInt64)val, s);
}

View File

@@ -5,11 +5,13 @@
#include "MyTypes.h"
void ConvertUInt32ToString(UInt32 value, char *s) throw();
void ConvertUInt64ToString(UInt64 value, char *s) throw();
// return: the pointer to the "terminating" null character after written characters
void ConvertUInt32ToString(UInt32 value, wchar_t *s) throw();
void ConvertUInt64ToString(UInt64 value, wchar_t *s) throw();
char * ConvertUInt32ToString(UInt32 value, char *s) throw();
char * ConvertUInt64ToString(UInt64 value, char *s) throw();
wchar_t * ConvertUInt32ToString(UInt32 value, wchar_t *s) throw();
wchar_t * ConvertUInt64ToString(UInt64 value, wchar_t *s) throw();
void ConvertUInt64ToOct(UInt64 value, char *s) throw();

View File

@@ -31,7 +31,7 @@ bool CLang::OpenFromString(const AString &s2)
for (const char *p = kLangSignature;; i++)
{
Byte c = *p++;
Byte c = (Byte)(*p++);
if (c == 0)
break;
if (s[i] != c)
@@ -122,10 +122,10 @@ bool CLang::Open(CFSTR fileName, const char *id)
return false;
AString s;
unsigned len = (unsigned)length;
const unsigned len = (unsigned)length;
char *p = s.GetBuf(len);
UInt32 processed;
if (!file.Read(p, len, processed))
size_t processed;
if (!file.ReadFull(p, len, processed))
return false;
file.Close();
if (len != processed)
@@ -159,5 +159,5 @@ const wchar_t *CLang::Get(UInt32 id) const throw()
int index = _ids.FindInSorted(id);
if (index < 0)
return NULL;
return _text + (size_t)_offsets[index];
return _text + (size_t)_offsets[(unsigned)index];
}

View File

@@ -4,14 +4,19 @@
#include "../../C/CpuArch.h"
#include "../Windows/FileIO.h"
#include "ListFileUtils.h"
#include "MyBuffer.h"
#include "StringConvert.h"
#include "UTFConvert.h"
static const char kQuoteChar = '\"';
#include "../Windows/FileIO.h"
#define CSysInFile NWindows::NFile::NIO::CInFile
#define MY_GET_LAST_ERROR ::GetLastError()
#define kQuoteChar '\"'
static void AddName(UStringVector &strings, UString &s)
{
@@ -25,19 +30,37 @@ static void AddName(UStringVector &strings, UString &s)
strings.Add(s);
}
static bool My_File_Read(CSysInFile &file, void *data, size_t size, DWORD &lastError)
{
size_t processed;
if (!file.ReadFull(data, size, processed))
{
lastError = MY_GET_LAST_ERROR;
return false;
}
if (processed != size)
{
lastError = 1; // error: size of listfile was changed
return false;
}
return true;
}
bool ReadNamesFromListFile2(CFSTR fileName, UStringVector &strings, UINT codePage, DWORD &lastError)
{
lastError = 0;
NWindows::NFile::NIO::CInFile file;
CSysInFile file;
if (!file.Open(fileName))
{
lastError = ::GetLastError();
lastError = MY_GET_LAST_ERROR;
return false;
}
UInt64 fileSize;
if (!file.GetLength(fileSize))
{
lastError = ::GetLastError();
lastError = MY_GET_LAST_ERROR;
return false;
}
if (fileSize >= ((UInt32)1 << 31) - 32)
@@ -48,16 +71,12 @@ bool ReadNamesFromListFile2(CFSTR fileName, UStringVector &strings, UINT codePag
if ((fileSize & 1) != 0)
return false;
CByteArr buf((size_t)fileSize);
UInt32 processed;
if (!file.Read(buf, (UInt32)fileSize, processed))
{
lastError = ::GetLastError();
return false;
}
if (processed != fileSize)
if (!My_File_Read(file, buf, (size_t)fileSize, lastError))
return false;
file.Close();
unsigned num = (unsigned)fileSize / 2;
const unsigned num = (unsigned)fileSize / 2;
wchar_t *p = u.GetBuf(num);
if (codePage == MY__CP_UTF16)
for (unsigned i = 0; i < num; i++)
@@ -82,22 +101,21 @@ bool ReadNamesFromListFile2(CFSTR fileName, UStringVector &strings, UINT codePag
{
AString s;
char *p = s.GetBuf((unsigned)fileSize);
UInt32 processed;
if (!file.Read(p, (UInt32)fileSize, processed))
{
lastError = ::GetLastError();
return false;
}
if (processed != fileSize)
if (!My_File_Read(file, p, (size_t)fileSize, lastError))
return false;
file.Close();
s.ReleaseBuf_CalcLen((unsigned)processed);
if (s.Len() != processed)
s.ReleaseBuf_CalcLen((unsigned)fileSize);
if (s.Len() != fileSize)
return false;
// #ifdef CP_UTF8
if (codePage == CP_UTF8)
{
// we must check UTF8 here, if convert function doesn't check
if (!CheckUTF8_AString(s))
return false;
if (!ConvertUTF8ToUnicode(s, u))
return false;
}

View File

@@ -4,6 +4,7 @@
#define __COMMON_MY_BUFFER_H
#include "Defs.h"
#include "MyTypes.h"
/* 7-Zip now uses CBuffer only as CByteBuffer.
So there is no need to use MY_ARRAY_NEW macro in CBuffer code. */
@@ -91,6 +92,12 @@ public:
_size = newSize;
}
void Wipe()
{
if (_size != 0)
memset(_items, 0, _size * sizeof(T));
}
CBuffer& operator=(const CBuffer &buffer)
{
if (&buffer != this)
@@ -127,6 +134,17 @@ bool operator!=(const CBuffer<T>& b1, const CBuffer<T>& b2)
typedef CBuffer<unsigned char> CByteBuffer;
class CByteBuffer_Wipe: public CByteBuffer
{
CLASS_NO_COPY(CByteBuffer_Wipe)
public:
// CByteBuffer_Wipe(): CBuffer<unsigned char>() {}
CByteBuffer_Wipe(size_t size): CBuffer<unsigned char>(size) {}
~CByteBuffer_Wipe() { Wipe(); }
};
template <class T> class CObjArray
{
protected:

View File

@@ -57,6 +57,15 @@ public:
ISzAlloc_Free(&g_AlignedAlloc, _data);
}
CAlignedBuffer(size_t size): _size(0)
{
_data = NULL;
_data = (Byte *)ISzAlloc_Alloc(&g_AlignedAlloc, size);
if (!_data)
throw 1;
_size = size;
}
void Free()
{
ISzAlloc_Free(&g_AlignedAlloc, _data);

View File

@@ -4,6 +4,7 @@
#define __MY_COM_H
#include "MyWindows.h"
#include "MyTypes.h"
#ifndef RINOK
#define RINOK(x) { HRESULT __result_ = (x); if (__result_ != S_OK) return __result_; }
@@ -81,7 +82,7 @@ inline HRESULT StringToBstr(LPCOLESTR src, BSTR *bstr)
class CMyComBSTR
{
BSTR m_str;
CLASS_NO_COPY(CMyComBSTR)
public:
CMyComBSTR(): m_str(NULL) {}
~CMyComBSTR() { ::SysFreeString(m_str); }
@@ -89,13 +90,23 @@ public:
operator LPCOLESTR() const { return m_str; }
// operator bool() const { return m_str != NULL; }
// bool operator!() const { return m_str == NULL; }
void Wipe_and_Free()
{
if (m_str)
{
memset(m_str, 0, ::SysStringLen(m_str) * sizeof(*m_str));
Empty();
}
}
private:
// operator BSTR() const { return m_str; }
CMyComBSTR(LPCOLESTR src) { m_str = ::SysAllocString(src); }
// CMyComBSTR(int nSize) { m_str = ::SysAllocStringLen(NULL, nSize); }
// CMyComBSTR(int nSize, LPCOLESTR sz) { m_str = ::SysAllocStringLen(sz, nSize); }
CMyComBSTR(const CMyComBSTR& src) { m_str = src.MyCopy(); }
// CMyComBSTR(const CMyComBSTR& src) { m_str = src.MyCopy(); }
/*
CMyComBSTR(REFGUID src)
@@ -107,6 +118,7 @@ private:
}
*/
/*
CMyComBSTR& operator=(const CMyComBSTR& src)
{
if (m_str != src.m_str)
@@ -117,6 +129,7 @@ private:
}
return *this;
}
*/
CMyComBSTR& operator=(LPCOLESTR src)
{
@@ -158,6 +171,15 @@ private:
};
class CMyComBSTR_Wipe: public CMyComBSTR
{
CLASS_NO_COPY(CMyComBSTR_Wipe)
public:
CMyComBSTR_Wipe(): CMyComBSTR() {}
~CMyComBSTR_Wipe() { Wipe_and_Free(); }
};
/*
If CMyUnknownImp doesn't use virtual destructor, the code size is smaller.
@@ -168,17 +190,24 @@ private:
virtual ~class_1();
In that case, class_1::Release() calls correct destructor of class_2.
Also you can use virtual ~CMyUnknownImp(), if you want to disable warning
We use virtual ~CMyUnknownImp() to disable warning
"class has virtual functions, but destructor is not virtual".
also we can use virtual ~IUnknown() {} in MyWindows.h
*/
class CMyUnknownImp
{
CLASS_NO_COPY(CMyUnknownImp)
public:
ULONG __m_RefCount;
CMyUnknownImp(): __m_RefCount(0) {}
// virtual
#ifdef _WIN32
#if defined(__GNUC__) || defined(__clang__)
virtual // to disable GCC/CLANG varnings
#endif
#endif
~CMyUnknownImp() {}
};

View File

@@ -18,6 +18,9 @@ typedef struct {
#define REFGUID const GUID *
#endif
// typedef GUID IID;
typedef GUID CLSID;
#define REFCLSID REFGUID
#define REFIID REFGUID

View File

@@ -19,13 +19,17 @@ Also we need IID_IUnknown that is initialized in some file for linking:
Other: we define IID_IUnknown in this file
*/
#ifdef __clang__
#pragma clang diagnostic ignored "-Wmissing-variable-declarations"
#endif
#ifdef _WIN32
#ifdef UNDER_CE
#include <basetyps.h>
#endif
#include <initguid.h>
#include <InitGuid.h>
#ifdef UNDER_CE
DEFINE_GUID(IID_IUnknown,

View File

@@ -237,11 +237,25 @@ bool UString::IsPrefixedBy_Ascii_NoCase(const char *s) const throw()
}
}
bool StringsAreEqual_Ascii(const char *u, const char *a) throw()
{
for (;;)
{
char c = *a;
if (c != *u)
return false;
if (c == 0)
return true;
a++;
u++;
}
}
bool StringsAreEqual_Ascii(const wchar_t *u, const char *a) throw()
{
for (;;)
{
unsigned char c = *a;
unsigned char c = (unsigned char)*a;
if (c != *u)
return false;
if (c == 0)
@@ -632,9 +646,8 @@ AString &AString::operator+=(const AString &s)
void AString::Add_UInt32(UInt32 v)
{
char sz[16];
ConvertUInt32ToString(v, sz);
(*this) += sz;
Grow(10);
_len = (unsigned)(ConvertUInt32ToString(v, _chars + _len) - _chars);
}
void AString::SetFrom(const char *s, unsigned len) // no check
@@ -835,7 +848,7 @@ void AString::Replace(char oldChar, char newChar) throw()
char *chars = _chars;
while ((unsigned)pos < _len)
{
pos = Find(oldChar, pos);
pos = Find(oldChar, (unsigned)pos);
if (pos < 0)
break;
chars[(unsigned)pos] = newChar;
@@ -857,11 +870,11 @@ void AString::Replace(const AString &oldString, const AString &newString)
int pos = 0;
while ((unsigned)pos < _len)
{
pos = Find(oldString, pos);
pos = Find(oldString, (unsigned)pos);
if (pos < 0)
break;
Delete(pos, oldLen);
Insert(pos, newString);
Delete((unsigned)pos, oldLen);
Insert((unsigned)pos, newString);
pos += newLen;
// number++;
}
@@ -1150,9 +1163,31 @@ void UString::SetFrom(const wchar_t *s, unsigned len) // no check
_len = len;
}
void UString::SetFromBstr(BSTR s)
void UString::SetFromBstr(LPCOLESTR s)
{
unsigned len = ::SysStringLen(s);
unsigned len = ::SysStringLen((BSTR)(void *)(s));
/*
#if WCHAR_MAX > 0xffff
size_t num_wchars = 0;
for (size_t i = 0; i < len;)
{
wchar_t c = s[i++];
if (c >= 0xd800 && c < 0xdc00 && i + 1 != len)
{
wchar_t c2 = s[i];
if (c2 >= 0xdc00 && c2 < 0x10000)
{
c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff);
i++;
}
}
num_wchars++;
}
len = num_wchars;
#endif
*/
if (len > _limit)
{
wchar_t *newBuf = MY_STRING_NEW_wchar_t(len + 1);
@@ -1161,8 +1196,33 @@ void UString::SetFromBstr(BSTR s)
_limit = len;
}
_len = len;
/*
#if WCHAR_MAX > 0xffff
wchar_t *chars = _chars;
for (size_t i = 0; i <= len; i++)
{
wchar_t c = *s++;
if (c >= 0xd800 && c < 0xdc00 && i + 1 != len)
{
wchar_t c2 = *s;
if (c2 >= 0xdc00 && c2 < 0x10000)
{
s++;
c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff);
}
}
chars[i] = c;
}
#else
*/
// if (s)
wmemcpy(_chars, s, len + 1);
// #endif
}
UString &UString::operator=(const char *s)
@@ -1229,9 +1289,8 @@ UString &UString::operator+=(const char *s)
void UString::Add_UInt32(UInt32 v)
{
char sz[16];
ConvertUInt32ToString(v, sz);
(*this) += sz;
Grow(10);
_len = (unsigned)(ConvertUInt32ToString(v, _chars + _len) - _chars);
}
@@ -1341,7 +1400,7 @@ void UString::InsertAtFront(wchar_t c)
}
/*
void UString::Insert(unsigned index, wchar_t c)
void UString::Insert_wchar_t(unsigned index, wchar_t c)
{
InsertSpace(index, 1);
_chars[index] = c;
@@ -1409,7 +1468,7 @@ void UString::Replace(wchar_t oldChar, wchar_t newChar) throw()
wchar_t *chars = _chars;
while ((unsigned)pos < _len)
{
pos = Find(oldChar, pos);
pos = Find(oldChar, (unsigned)pos);
if (pos < 0)
break;
chars[(unsigned)pos] = newChar;
@@ -1431,11 +1490,11 @@ void UString::Replace(const UString &oldString, const UString &newString)
int pos = 0;
while ((unsigned)pos < _len)
{
pos = Find(oldString, pos);
pos = Find(oldString, (unsigned)pos);
if (pos < 0)
break;
Delete(pos, oldLen);
Insert(pos, newString);
Delete((unsigned)pos, oldLen);
Insert((unsigned)pos, newString);
pos += newLen;
// number++;
}
@@ -1609,6 +1668,8 @@ int MyStringCompareNoCase(const char *s1, const char *s2)
}
*/
#if !defined(USE_UNICODE_FSTRING) || !defined(_UNICODE)
static inline UINT GetCurrentCodePage()
{
#if defined(UNDER_CE) || !defined(_WIN32)
@@ -1618,6 +1679,8 @@ static inline UINT GetCurrentCodePage()
#endif
}
#endif
#ifdef USE_UNICODE_FSTRING
#ifndef _UNICODE
@@ -1637,9 +1700,9 @@ FString fas2fs(const AString &s)
return MultiByteToUnicodeString(s, GetCurrentCodePage());
}
#endif
#endif // _UNICODE
#else
#else // USE_UNICODE_FSTRING
UString fs2us(const FChar *s)
{
@@ -1656,4 +1719,4 @@ FString us2fs(const wchar_t *s)
return UnicodeStringToMultiByte(s, GetCurrentCodePage());
}
#endif
#endif // USE_UNICODE_FSTRING

View File

@@ -159,7 +159,7 @@ inline wchar_t MyCharUpper(wchar_t c) throw()
return (wchar_t)MyCharUpper_WIN(c);
#endif
#else
return (wchar_t)towupper(c);
return (wchar_t)towupper((wint_t)c);
#endif
}
@@ -207,6 +207,7 @@ int MyStringCompareNoCase(const wchar_t *s1, const wchar_t *s2) throw();
// ---------- ASCII ----------
// char values in ASCII strings must be less then 128
bool StringsAreEqual_Ascii(const char *u, const char *a) throw();
bool StringsAreEqual_Ascii(const wchar_t *u, const char *a) throw();
bool StringsAreEqualNoCase_Ascii(const char *s1, const char *s2) throw();
bool StringsAreEqualNoCase_Ascii(const wchar_t *s1, const char *s2) throw();
@@ -231,7 +232,7 @@ bool StringsAreEqualNoCase_Ascii(const wchar_t *s1, const wchar_t *s2) throw();
cls &operator=(const t *); \
cls &operator+=(t); \
cls &operator+=(const t *); \
FORBID_STRING_OPS_2(cls, t); \
FORBID_STRING_OPS_2(cls, t) \
/*
cls &operator+(t); \
@@ -266,7 +267,7 @@ class AString
AString(const AString &s, char c); // it's for String + char
AString(const char *s1, unsigned num1, const char *s2, unsigned num2);
friend AString operator+(const AString &s, char c) { return AString(s, c); } ;
friend AString operator+(const AString &s, char c) { return AString(s, c); }
// friend AString operator+(char c, const AString &s); // is not supported
friend AString operator+(const AString &s1, const AString &s2);
@@ -300,6 +301,7 @@ public:
void Empty() { _len = 0; _chars[0] = 0; }
operator const char *() const { return _chars; }
char *Ptr_non_const() const { return _chars; }
const char *Ptr() const { return _chars; }
const char *Ptr(unsigned pos) const { return _chars + pos; }
const char *RightPtr(unsigned num) const { return _chars + _len - num; }
@@ -438,8 +440,30 @@ public:
_chars[index] = 0;
}
}
void Wipe_and_Empty()
{
if (_chars)
{
memset(_chars, 0, (_limit + 1) * sizeof(*_chars));
_len = 0;
}
}
};
class AString_Wipe: public AString
{
CLASS_NO_COPY(AString_Wipe)
public:
AString_Wipe(): AString() {}
// AString_Wipe(const AString &s): AString(s) {}
// AString_Wipe &operator=(const AString &s) { AString::operator=(s); return *this; }
// AString_Wipe &operator=(const char *s) { AString::operator=(s); return *this; }
~AString_Wipe() { Wipe_and_Empty(); }
};
bool operator<(const AString &s1, const AString &s2);
bool operator>(const AString &s1, const AString &s2);
@@ -500,7 +524,7 @@ class UString
UString(const UString &s, wchar_t c); // it's for String + char
UString(const wchar_t *s1, unsigned num1, const wchar_t *s2, unsigned num2);
friend UString operator+(const UString &s, wchar_t c) { return UString(s, c); } ;
friend UString operator+(const UString &s, wchar_t c) { return UString(s, c); }
// friend UString operator+(wchar_t c, const UString &s); // is not supported
friend UString operator+(const UString &s1, const UString &s2);
@@ -539,6 +563,7 @@ public:
void Empty() { _len = 0; _chars[0] = 0; }
operator const wchar_t *() const { return _chars; }
wchar_t *Ptr_non_const() const { return _chars; }
const wchar_t *Ptr() const { return _chars; }
const wchar_t *Ptr(unsigned pos) const { return _chars + pos; }
const wchar_t *RightPtr(unsigned num) const { return _chars + _len - num; }
@@ -578,7 +603,7 @@ public:
UString &operator=(const wchar_t *s);
UString &operator=(const UString &s);
void SetFrom(const wchar_t *s, unsigned len); // no check
void SetFromBstr(BSTR s);
void SetFromBstr(LPCOLESTR s);
UString &operator=(const char *s);
UString &operator=(const AString &s) { return operator=(s.Ptr()); }
@@ -659,7 +684,7 @@ public:
}
void InsertAtFront(wchar_t c);
// void Insert(unsigned index, wchar_t c);
// void Insert_wchar_t(unsigned index, wchar_t c);
void Insert(unsigned index, const wchar_t *s);
void Insert(unsigned index, const UString &s);
@@ -680,8 +705,30 @@ public:
_chars[index] = 0;
}
}
void Wipe_and_Empty()
{
if (_chars)
{
memset(_chars, 0, (_limit + 1) * sizeof(*_chars));
_len = 0;
}
}
};
class UString_Wipe: public UString
{
CLASS_NO_COPY(UString_Wipe)
public:
UString_Wipe(): UString() {}
// UString_Wipe(const UString &s): UString(s) {}
// UString_Wipe &operator=(const UString &s) { UString::operator=(s); return *this; }
// UString_Wipe &operator=(const wchar_t *s) { UString::operator=(s); return *this; }
~UString_Wipe() { Wipe_and_Empty(); }
};
bool operator<(const UString &s1, const UString &s2);
bool operator>(const UString &s1, const UString &s2);
@@ -866,3 +913,20 @@ typedef const FChar *CFSTR;
typedef CObjectVector<FString> FStringVector;
#endif
#if defined(_WIN32)
// #include <wchar.h>
// WCHAR_MAX is defined as ((wchar_t)-1)
#define _WCHART_IS_16BIT 1
#elif (defined(WCHAR_MAX) && (WCHAR_MAX <= 0xffff)) \
|| (defined(__SIZEOF_WCHAR_T__) && (__SIZEOF_WCHAR_T__ == 2))
#define _WCHART_IS_16BIT 1
#endif
#if WCHAR_PATH_SEPARATOR == L'\\'
// WSL scheme
#define WCHAR_IN_FILE_NAME_BACKSLASH_REPLACEMENT ((wchar_t)((unsigned)(0xF000) + (unsigned)'\\'))
// #define WCHAR_IN_FILE_NAME_BACKSLASH_REPLACEMENT '_'
#endif

View File

@@ -32,4 +32,15 @@ struct CBoolPair
cls(const cls &); \
cls &operator=(const cls &);
class CUncopyable
{
protected:
CUncopyable() {} // allow constructor
// ~CUncopyable() {}
CLASS_NO_COPY(CUncopyable)
};
#define MY_UNCOPYABLE :private CUncopyable
// #define MY_UNCOPYABLE
#endif

View File

@@ -35,7 +35,7 @@ class CRecordVector
public:
CRecordVector(): _items(0), _size(0), _capacity(0) {}
CRecordVector(): _items(NULL), _size(0), _capacity(0) {}
CRecordVector(const CRecordVector &v): _items(0), _size(0), _capacity(0)
{
@@ -257,7 +257,7 @@ public:
unsigned mid = (left + right) / 2;
const T midVal = (*this)[mid];
if (item == midVal)
return mid;
return (int)mid;
if (item < midVal)
right = mid;
else
@@ -274,7 +274,7 @@ public:
const T& midVal = (*this)[mid];
int comp = item.Compare(midVal);
if (comp == 0)
return mid;
return (int)mid;
if (comp < 0)
right = mid;
else
@@ -428,7 +428,7 @@ public:
// void Reserve(unsigned newCapacity) { _v.Reserve(newCapacity); }
void ClearAndReserve(unsigned newCapacity) { Clear(); _v.ClearAndReserve(newCapacity); }
CObjectVector() {};
CObjectVector() {}
CObjectVector(const CObjectVector &v)
{
unsigned size = v.Size();
@@ -568,7 +568,7 @@ public:
const T& midVal = (*this)[mid];
int comp = item.Compare(midVal);
if (comp == 0)
return mid;
return (int)mid;
if (comp < 0)
right = mid;
else
@@ -624,9 +624,9 @@ public:
{ _v.Sort(compare, param); }
static int CompareObjectItems(void *const *a1, void *const *a2, void * /* param */)
{ return (*(*((const T **)a1))).Compare(*(*((const T **)a2))); }
{ return (*(*((const T *const *)a1))).Compare(*(*((const T *const *)a2))); }
void Sort() { _v.Sort(CompareObjectItems, 0); }
void Sort() { _v.Sort(CompareObjectItems, NULL); }
};
#define FOR_VECTOR(_i_, _v_) for (unsigned _i_ = 0; _i_ < (_v_).Size(); _i_++)

View File

@@ -5,6 +5,10 @@
#ifndef _WIN32
#include <stdlib.h>
#include <time.h>
#ifdef __GNUC__
#include <sys/time.h>
#endif
#include "MyWindows.h"
@@ -38,11 +42,11 @@ BSTR SysAllocStringByteLen(LPCSTR s, UINT len)
/* Original SysAllocStringByteLen in Win32 maybe fills only unaligned null OLECHAR at the end.
We provide also aligned null OLECHAR at the end. */
if (len >= (k_BstrSize_Max - sizeof(OLECHAR) - sizeof(OLECHAR) - sizeof(CBstrSizeType)))
if (len >= (k_BstrSize_Max - (UINT)sizeof(OLECHAR) - (UINT)sizeof(OLECHAR) - (UINT)sizeof(CBstrSizeType)))
return NULL;
UINT size = (len + sizeof(OLECHAR) + sizeof(OLECHAR) - 1) & ~(sizeof(OLECHAR) - 1);
void *p = AllocateForBSTR(size + sizeof(CBstrSizeType));
UINT size = (len + (UINT)sizeof(OLECHAR) + (UINT)sizeof(OLECHAR) - 1) & ~((UINT)sizeof(OLECHAR) - 1);
void *p = AllocateForBSTR(size + (UINT)sizeof(CBstrSizeType));
if (!p)
return NULL;
*(CBstrSizeType *)p = (CBstrSizeType)len;
@@ -56,11 +60,11 @@ BSTR SysAllocStringByteLen(LPCSTR s, UINT len)
BSTR SysAllocStringLen(const OLECHAR *s, UINT len)
{
if (len >= (k_BstrSize_Max - sizeof(OLECHAR) - sizeof(CBstrSizeType)) / sizeof(OLECHAR))
if (len >= (k_BstrSize_Max - (UINT)sizeof(OLECHAR) - (UINT)sizeof(CBstrSizeType)) / (UINT)sizeof(OLECHAR))
return NULL;
UINT size = len * sizeof(OLECHAR);
void *p = AllocateForBSTR(size + sizeof(CBstrSizeType) + sizeof(OLECHAR));
UINT size = len * (UINT)sizeof(OLECHAR);
void *p = AllocateForBSTR(size + (UINT)sizeof(CBstrSizeType) + (UINT)sizeof(OLECHAR));
if (!p)
return NULL;
*(CBstrSizeType *)p = (CBstrSizeType)size;
@@ -98,7 +102,7 @@ UINT SysStringLen(BSTR bstr)
{
if (!bstr)
return 0;
return *((CBstrSizeType *)bstr - 1) / sizeof(OLECHAR);
return *((CBstrSizeType *)bstr - 1) / (UINT)sizeof(OLECHAR);
}
@@ -139,7 +143,150 @@ LONG CompareFileTime(const FILETIME* ft1, const FILETIME* ft2)
DWORD GetLastError()
{
return 0;
return (DWORD)errno;
}
void SetLastError(DWORD dw)
{
errno = (int)dw;
}
static LONG TIME_GetBias()
{
time_t utc = time(NULL);
struct tm *ptm = localtime(&utc);
int localdaylight = ptm->tm_isdst; /* daylight for local timezone */
ptm = gmtime(&utc);
ptm->tm_isdst = localdaylight; /* use local daylight, not that of Greenwich */
LONG bias = (int)(mktime(ptm)-utc);
return bias;
}
#define TICKS_PER_SEC 10000000
/*
#define SECS_PER_DAY (24 * 60 * 60)
#define SECS_1601_TO_1970 ((369 * 365 + 89) * (UInt64)SECS_PER_DAY)
#define TICKS_1601_TO_1970 (SECS_1601_TO_1970 * TICKS_PER_SEC)
*/
#define GET_TIME_64(pft) ((pft)->dwLowDateTime | ((UInt64)(pft)->dwHighDateTime << 32))
#define SET_FILETIME(ft, v64) \
(ft)->dwLowDateTime = (DWORD)v64; \
(ft)->dwHighDateTime = (DWORD)(v64 >> 32);
BOOL WINAPI FileTimeToLocalFileTime(const FILETIME *fileTime, FILETIME *localFileTime)
{
UInt64 v = GET_TIME_64(fileTime);
v = (UInt64)((Int64)v - (Int64)TIME_GetBias() * TICKS_PER_SEC);
SET_FILETIME(localFileTime, v);
return TRUE;
}
BOOL WINAPI LocalFileTimeToFileTime(const FILETIME *localFileTime, FILETIME *fileTime)
{
UInt64 v = GET_TIME_64(localFileTime);
v = (UInt64)((Int64)v + (Int64)TIME_GetBias() * TICKS_PER_SEC);
SET_FILETIME(fileTime, v);
return TRUE;
}
/*
VOID WINAPI GetSystemTimeAsFileTime(FILETIME *ft)
{
UInt64 t = 0;
timeval tv;
if (gettimeofday(&tv, NULL) == 0)
{
t = tv.tv_sec * (UInt64)TICKS_PER_SEC + TICKS_1601_TO_1970;
t += tv.tv_usec * 10;
}
SET_FILETIME(ft, t);
}
*/
DWORD WINAPI GetTickCount(VOID)
{
#ifndef _WIN32
// gettimeofday() doesn't work in some MINGWs by unknown reason
timeval tv;
if (gettimeofday(&tv, NULL) == 0)
{
// tv_sec and tv_usec are (long)
return (DWORD)((UInt64)(Int64)tv.tv_sec * (UInt64)1000 + (UInt64)(Int64)tv.tv_usec / 1000);
}
#endif
return (DWORD)time(NULL) * 1000;
}
#define PERIOD_4 (4 * 365 + 1)
#define PERIOD_100 (PERIOD_4 * 25 - 1)
#define PERIOD_400 (PERIOD_100 * 4 + 1)
BOOL WINAPI FileTimeToSystemTime(const FILETIME *ft, SYSTEMTIME *st)
{
UInt32 v;
UInt64 v64 = GET_TIME_64(ft);
v64 /= 10000;
st->wMilliseconds = (WORD)(v64 % 1000); v64 /= 1000;
st->wSecond = (WORD)(v64 % 60); v64 /= 60;
st->wMinute = (WORD)(v64 % 60); v64 /= 60;
v = (UInt32)v64;
st->wHour = (WORD)(v % 24); v /= 24;
// 1601-01-01 was Monday
st->wDayOfWeek = (WORD)((v + 1) % 7);
UInt32 leaps, year, day, mon;
leaps = (3 * ((4 * v + (365 - 31 - 28) * 4 + 3) / PERIOD_400) + 3) / 4;
v += 28188 + leaps;
// leaps - the number of exceptions from PERIOD_4 rules starting from 1600-03-01
// (1959 / 64) - converts day from 03-01 to month
year = (20 * v - 2442) / (5 * PERIOD_4);
day = v - (year * PERIOD_4) / 4;
mon = (64 * day) / 1959;
st->wDay = (WORD)(day - (1959 * mon) / 64);
mon -= 1;
year += 1524;
if (mon > 12)
{
mon -= 12;
year++;
}
st->wMonth = (WORD)mon;
st->wYear = (WORD)year;
/*
unsigned year, mon;
unsigned char ms[] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
unsigned t;
year = (WORD)(1601 + v / PERIOD_400 * 400);
v %= PERIOD_400;
t = v / PERIOD_100; if (t == 4) t = 3; year += t * 100; v -= t * PERIOD_100;
t = v / PERIOD_4; if (t == 25) t = 24; year += t * 4; v -= t * PERIOD_4;
t = v / 365; if (t == 4) t = 3; year += t; v -= t * 365;
st->wYear = (WORD)year;
if (year % 4 == 0 && (year % 100 != 0 || year % 400 == 0))
ms[1] = 29;
for (mon = 0;; mon++)
{
unsigned d = ms[mon];
if (v < d)
break;
v -= d;
}
st->wDay = (WORD)(v + 1);
st->wMonth = (WORD)(mon + 1);
*/
return TRUE;
}
#endif

View File

@@ -5,14 +5,14 @@
#ifdef _WIN32
#include <windows.h>
#include <Windows.h>
#ifdef UNDER_CE
#undef VARIANT_TRUE
#define VARIANT_TRUE ((VARIANT_BOOL)-1)
#endif
#else
#else // _WIN32
#include <stddef.h> // for wchar_t
#include <string.h>
@@ -20,7 +20,9 @@
#include "MyGuidDef.h"
// WINAPI is __stdcall in Windows-MSVC in windef.h
#define WINAPI
#define EXTERN_C MY_EXTERN_C
typedef char CHAR;
typedef unsigned char UCHAR;
@@ -35,17 +37,12 @@ typedef unsigned short USHORT;
typedef unsigned short WORD;
typedef short VARIANT_BOOL;
typedef int INT;
typedef Int32 INT32;
typedef unsigned int UINT;
typedef UInt32 UINT32;
typedef INT32 LONG; // LONG, ULONG and DWORD must be 32-bit
typedef UINT32 ULONG;
#define LOWORD(l) ((WORD)((DWORD_PTR)(l) & 0xffff))
#define HIWORD(l) ((WORD)((DWORD_PTR)(l) >> 16))
#undef DWORD
typedef UINT32 DWORD;
typedef long BOOL;
// MS uses long for BOOL, but long is 32-bit in MS. So we use int.
// typedef long BOOL;
typedef int BOOL;
#ifndef FALSE
#define FALSE 0
@@ -53,7 +50,7 @@ typedef long BOOL;
#endif
// typedef size_t ULONG_PTR;
typedef size_t DWORD_PTR;
// typedef size_t DWORD_PTR;
// typedef uintptr_t UINT_PTR;
// typedef ptrdiff_t UINT_PTR;
@@ -80,28 +77,33 @@ typedef struct _FILETIME
} FILETIME;
#define HRESULT LONG
#define FAILED(Status) ((HRESULT)(Status)<0)
#define SUCCEEDED(hr) ((HRESULT)(hr) >= 0)
#define FAILED(hr) ((HRESULT)(hr) < 0)
typedef ULONG PROPID;
typedef LONG SCODE;
#define ERROR_NEGATIVE_SEEK 131L
#define S_OK ((HRESULT)0x00000000L)
#define S_FALSE ((HRESULT)0x00000001L)
#define E_NOTIMPL ((HRESULT)0x80004001L)
#define E_NOTIMPL ((HRESULT)0x80004001L)
#define E_NOINTERFACE ((HRESULT)0x80004002L)
#define E_ABORT ((HRESULT)0x80004004L)
#define E_FAIL ((HRESULT)0x80004005L)
#define STG_E_INVALIDFUNCTION ((HRESULT)0x80030001L)
#define E_OUTOFMEMORY ((HRESULT)0x8007000EL)
#define E_INVALIDARG ((HRESULT)0x80070057L)
#define E_ABORT ((HRESULT)0x80004004L)
#define E_FAIL ((HRESULT)0x80004005L)
#define STG_E_INVALIDFUNCTION ((HRESULT)0x80030001L)
#define CLASS_E_CLASSNOTAVAILABLE ((HRESULT)0x80040111L)
#ifdef _MSC_VER
#define STDMETHODCALLTYPE __stdcall
#define STDAPICALLTYPE __stdcall
#else
// do we need __export here?
#define STDMETHODCALLTYPE
#define STDAPICALLTYPE
#endif
#define STDAPI EXTERN_C HRESULT STDAPICALLTYPE
#define STDMETHOD_(t, f) virtual t STDMETHODCALLTYPE f
#define STDMETHOD(f) STDMETHOD_(HRESULT, f)
#define STDMETHODIMP_(type) type STDMETHODCALLTYPE
@@ -120,9 +122,8 @@ struct IUnknown
STDMETHOD(QueryInterface) (REFIID iid, void **outObject) PURE;
STDMETHOD_(ULONG, AddRef)() PURE;
STDMETHOD_(ULONG, Release)() PURE;
#ifndef _WIN32
virtual ~IUnknown() {}
#endif
// We use virtual ~IUnknown() here for binary compatibility with 7z.so from p7zip
};
typedef IUnknown *LPUNKNOWN;
@@ -214,8 +215,14 @@ MY_EXTERN_C UINT SysStringByteLen(BSTR bstr);
MY_EXTERN_C UINT SysStringLen(BSTR bstr);
MY_EXTERN_C DWORD GetLastError();
MY_EXTERN_C void SetLastError(DWORD dwCode);
MY_EXTERN_C LONG CompareFileTime(const FILETIME* ft1, const FILETIME* ft2);
MY_EXTERN_C DWORD GetCurrentThreadId();
MY_EXTERN_C DWORD GetCurrentProcessId();
#define MAX_PATH 1024
#define CP_ACP 0
#define CP_OEMCP 1
#define CP_UTF8 65001
@@ -227,5 +234,35 @@ typedef enum tagSTREAM_SEEK
STREAM_SEEK_END = 2
} STREAM_SEEK;
#endif
typedef struct _SYSTEMTIME
{
WORD wYear;
WORD wMonth;
WORD wDayOfWeek;
WORD wDay;
WORD wHour;
WORD wMinute;
WORD wSecond;
WORD wMilliseconds;
} SYSTEMTIME;
BOOL WINAPI FileTimeToLocalFileTime(const FILETIME *fileTime, FILETIME *localFileTime);
BOOL WINAPI LocalFileTimeToFileTime(const FILETIME *localFileTime, FILETIME *fileTime);
BOOL WINAPI FileTimeToSystemTime(const FILETIME *fileTime, SYSTEMTIME *systemTime);
// VOID WINAPI GetSystemTimeAsFileTime(FILETIME *systemTimeAsFileTime);
DWORD GetTickCount();
#define CREATE_NEW 1
#define CREATE_ALWAYS 2
#define OPEN_EXISTING 3
#define OPEN_ALWAYS 4
#define TRUNCATE_EXISTING 5
#endif // _WIN32
#endif

View File

@@ -7,9 +7,9 @@
static bool IsValidChar(char c)
{
return
c >= 'a' && c <= 'z' ||
c >= 'A' && c <= 'Z' ||
c >= '0' && c <= '9' ||
(c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') ||
c == '-';
}
@@ -24,7 +24,7 @@ int CXmlItem::FindProp(const char *propName) const throw()
{
FOR_VECTOR (i, Props)
if (Props[i].Name == propName)
return i;
return (int)i;
return -1;
}
@@ -32,7 +32,7 @@ AString CXmlItem::GetPropVal(const char *propName) const
{
int index = FindProp(propName);
if (index >= 0)
return Props[index].Value;
return Props[(unsigned)index].Value;
return AString();
}
@@ -45,7 +45,7 @@ int CXmlItem::FindSubTag(const char *tag) const throw()
{
FOR_VECTOR (i, SubItems)
if (SubItems[i].IsTagged(tag))
return i;
return (int)i;
return -1;
}
@@ -75,7 +75,7 @@ AString CXmlItem::GetSubStringForTag(const char *tag) const
{
int index = FindSubTag(tag);
if (index >= 0)
return SubItems[index].GetSubString();
return SubItems[(unsigned)index].GetSubString();
return AString();
}

View File

@@ -0,0 +1,7 @@
// Sha1Prepare.cpp
#include "StdAfx.h"
#include "../../C/Sha1.h"
static struct CSha1Prepare { CSha1Prepare() { Sha1Prepare(); } } g_Sha1Prepare;

View File

@@ -4,37 +4,67 @@
#include "../../C/Sha1.h"
#include "../Common/MyBuffer2.h"
#include "../Common/MyCom.h"
#include "../7zip/Common/RegisterCodec.h"
class CSha1Hasher:
public IHasher,
public ICompressSetCoderProperties,
public CMyUnknownImp
{
CSha1 _sha;
CAlignedBuffer _buf;
Byte mtDummy[1 << 7];
CSha1 *Sha() { return (CSha1 *)(void *)(Byte *)_buf; }
public:
CSha1Hasher() { Sha1_Init(&_sha); }
CSha1Hasher():
_buf(sizeof(CSha1))
{
Sha1_SetFunction(Sha(), 0);
Sha1_InitState(Sha());
}
MY_UNKNOWN_IMP1(IHasher)
MY_UNKNOWN_IMP2(IHasher, ICompressSetCoderProperties)
INTERFACE_IHasher(;)
STDMETHOD(SetCoderProperties)(const PROPID *propIDs, const PROPVARIANT *props, UInt32 numProps);
};
STDMETHODIMP_(void) CSha1Hasher::Init() throw()
{
Sha1_Init(&_sha);
Sha1_InitState(Sha());
}
STDMETHODIMP_(void) CSha1Hasher::Update(const void *data, UInt32 size) throw()
{
Sha1_Update(&_sha, (const Byte *)data, size);
Sha1_Update(Sha(), (const Byte *)data, size);
}
STDMETHODIMP_(void) CSha1Hasher::Final(Byte *digest) throw()
{
Sha1_Final(&_sha, digest);
Sha1_Final(Sha(), digest);
}
STDMETHODIMP CSha1Hasher::SetCoderProperties(const PROPID *propIDs, const PROPVARIANT *coderProps, UInt32 numProps)
{
unsigned algo = 0;
for (UInt32 i = 0; i < numProps; i++)
{
const PROPVARIANT &prop = coderProps[i];
if (propIDs[i] == NCoderPropID::kDefaultProp)
{
if (prop.vt != VT_UI4)
return E_INVALIDARG;
if (prop.ulVal > 2)
return E_NOTIMPL;
algo = (unsigned)prop.ulVal;
}
}
if (!Sha1_SetFunction(Sha(), algo))
return E_NOTIMPL;
return S_OK;
}
REGISTER_HASHER(CSha1Hasher, 0x201, "SHA1", SHA1_DIGEST_SIZE)

View File

@@ -0,0 +1,7 @@
// Sha256Prepare.cpp
#include "StdAfx.h"
#include "../../C/Sha256.h"
static struct CSha256Prepare { CSha256Prepare() { Sha256Prepare(); } } g_Sha256Prepare;

View File

@@ -4,37 +4,67 @@
#include "../../C/Sha256.h"
#include "../Common/MyBuffer2.h"
#include "../Common/MyCom.h"
#include "../7zip/Common/RegisterCodec.h"
class CSha256Hasher:
public IHasher,
public ICompressSetCoderProperties,
public CMyUnknownImp
{
CSha256 _sha;
CAlignedBuffer _buf;
Byte mtDummy[1 << 7];
CSha256 *Sha() { return (CSha256 *)(void *)(Byte *)_buf; }
public:
CSha256Hasher() { Sha256_Init(&_sha); }
CSha256Hasher():
_buf(sizeof(CSha256))
{
Sha256_SetFunction(Sha(), 0);
Sha256_InitState(Sha());
}
MY_UNKNOWN_IMP1(IHasher)
MY_UNKNOWN_IMP2(IHasher, ICompressSetCoderProperties)
INTERFACE_IHasher(;)
STDMETHOD(SetCoderProperties)(const PROPID *propIDs, const PROPVARIANT *props, UInt32 numProps);
};
STDMETHODIMP_(void) CSha256Hasher::Init() throw()
{
Sha256_Init(&_sha);
Sha256_InitState(Sha());
}
STDMETHODIMP_(void) CSha256Hasher::Update(const void *data, UInt32 size) throw()
{
Sha256_Update(&_sha, (const Byte *)data, size);
Sha256_Update(Sha(), (const Byte *)data, size);
}
STDMETHODIMP_(void) CSha256Hasher::Final(Byte *digest) throw()
{
Sha256_Final(&_sha, digest);
Sha256_Final(Sha(), digest);
}
STDMETHODIMP CSha256Hasher::SetCoderProperties(const PROPID *propIDs, const PROPVARIANT *coderProps, UInt32 numProps)
{
unsigned algo = 0;
for (UInt32 i = 0; i < numProps; i++)
{
const PROPVARIANT &prop = coderProps[i];
if (propIDs[i] == NCoderPropID::kDefaultProp)
{
if (prop.vt != VT_UI4)
return E_INVALIDARG;
if (prop.ulVal > 2)
return E_NOTIMPL;
algo = (unsigned)prop.ulVal;
}
}
if (!Sha256_SetFunction(Sha(), algo))
return E_NOTIMPL;
return S_OK;
}
REGISTER_HASHER(CSha256Hasher, 0xA, "SHA256", SHA256_DIGEST_SIZE)

View File

@@ -2,7 +2,9 @@
#include "StdAfx.h"
#ifdef _WIN32
#include <tchar.h>
#endif
#include "StdInStream.h"
#include "StringConvert.h"
@@ -14,14 +16,18 @@
#define kFileOpenMode TEXT("r")
extern int g_CodePage;
CStdInStream g_StdIn(stdin);
bool CStdInStream::Open(LPCTSTR fileName) throw()
{
Close();
_stream = _tfopen(fileName, kFileOpenMode);
_stream =
#ifdef _WIN32
_tfopen
#else
fopen
#endif
(fileName, kFileOpenMode);
_streamIsOpen = (_stream != 0);
return _streamIsOpen;
}
@@ -56,7 +62,7 @@ bool CStdInStream::ScanUStringUntilNewLine(UString &dest)
dest.Empty();
AString s;
bool res = ScanAStringUntilNewLine(s);
int codePage = g_CodePage;
int codePage = CodePage;
if (codePage == -1)
codePage = CP_OEMCP;
if (codePage == CP_UTF8)

View File

@@ -13,8 +13,14 @@ class CStdInStream
FILE *_stream;
bool _streamIsOpen;
public:
CStdInStream(): _stream(0), _streamIsOpen(false) {};
CStdInStream(FILE *stream): _stream(stream), _streamIsOpen(false) {};
int CodePage;
CStdInStream(FILE *stream = NULL):
_stream(stream),
_streamIsOpen(false),
CodePage(-1)
{};
~CStdInStream() { Close(); }
bool Open(LPCTSTR fileName) throw();

View File

@@ -2,7 +2,9 @@
#include "StdAfx.h"
#ifdef _WIN32
#include <tchar.h>
#endif
#include "IntToString.h"
#include "StdOutStream.h"
@@ -11,8 +13,6 @@
#define kFileOpenMode "wt"
extern int g_CodePage;
CStdOutStream g_StdOut(stdout);
CStdOutStream g_StdErr(stderr);
@@ -47,34 +47,29 @@ CStdOutStream & endl(CStdOutStream & outStream) throw()
CStdOutStream & CStdOutStream::operator<<(const wchar_t *s)
{
int codePage = g_CodePage;
if (codePage == -1)
codePage = CP_OEMCP;
AString dest;
if (codePage == CP_UTF8)
ConvertUnicodeToUTF8(s, dest);
else
UnicodeStringToMultiByte2(dest, s, (UINT)codePage);
return operator<<((const char *)dest);
}
void StdOut_Convert_UString_to_AString(const UString &s, AString &temp)
{
int codePage = g_CodePage;
if (codePage == -1)
codePage = CP_OEMCP;
if (codePage == CP_UTF8)
ConvertUnicodeToUTF8(s, temp);
else
UnicodeStringToMultiByte2(temp, s, (UINT)codePage);
AString temp;
UString s2(s);
PrintUString(s2, temp);
return *this;
}
void CStdOutStream::PrintUString(const UString &s, AString &temp)
{
StdOut_Convert_UString_to_AString(s, temp);
Convert_UString_to_AString(s, temp);
*this << (const char *)temp;
}
void CStdOutStream::Convert_UString_to_AString(const UString &src, AString &dest)
{
int codePage = CodePage;
if (codePage == -1)
codePage = CP_OEMCP;
if (codePage == CP_UTF8)
ConvertUnicodeToUTF8(src, dest);
else
UnicodeStringToMultiByte2(dest, src, (UINT)codePage);
}
static const wchar_t kReplaceChar = '_';

View File

@@ -14,9 +14,15 @@ class CStdOutStream
bool _streamIsOpen;
public:
bool IsTerminalMode;
int CodePage;
CStdOutStream(FILE *stream = 0):
_stream(stream),
_streamIsOpen(false),
IsTerminalMode(false),
CodePage(-1)
{};
CStdOutStream(): _stream(0), _streamIsOpen(false), IsTerminalMode(false) {};
CStdOutStream(FILE *stream): _stream(stream), _streamIsOpen(false) {};
~CStdOutStream() { Close(); }
// void AttachStdStream(FILE *stream) { _stream = stream; _streamIsOpen = false; }
@@ -52,6 +58,7 @@ public:
CStdOutStream & operator<<(const wchar_t *s);
void PrintUString(const UString &s, AString &temp);
void Convert_UString_to_AString(const UString &src, AString &dest);
void Normalize_UString__LF_Allowed(UString &s);
void Normalize_UString(UString &s);
@@ -66,6 +73,4 @@ CStdOutStream & endl(CStdOutStream & outStream) throw();
extern CStdOutStream g_StdOut;
extern CStdOutStream g_StdErr;
void StdOut_Convert_UString_to_AString(const UString &s, AString &temp);
#endif

View File

@@ -5,9 +5,18 @@
#include "StringConvert.h"
#ifndef _WIN32
// #include <stdio.h>
#include <stdlib.h>
#endif
#if !defined(_WIN32) || defined(ENV_HAVE_LOCALE)
#include "UTFConvert.h"
#endif
#ifdef ENV_HAVE_LOCALE
#include <locale.h>
#endif
static const char k_DefultChar = '_';
#ifdef _WIN32
@@ -71,7 +80,7 @@ void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
d[i] = 0;
dest.ReleaseBuf_SetLen(i);
*/
unsigned len = MultiByteToWideChar(codePage, 0, src, src.Len(), NULL, 0);
unsigned len = (unsigned)MultiByteToWideChar(codePage, 0, src, (int)src.Len(), NULL, 0);
if (len == 0)
{
if (GetLastError() != 0)
@@ -79,7 +88,7 @@ void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
}
else
{
len = MultiByteToWideChar(codePage, 0, src, src.Len(), dest.GetBuf(len), len);
len = (unsigned)MultiByteToWideChar(codePage, 0, src, (int)src.Len(), dest.GetBuf(len), (int)len);
if (len == 0)
throw 282228;
dest.ReleaseBuf_SetEnd(len);
@@ -175,7 +184,7 @@ static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT co
}
*/
unsigned len = WideCharToMultiByte(codePage, 0, src, src.Len(), NULL, 0, NULL, NULL);
unsigned len = (unsigned)WideCharToMultiByte(codePage, 0, src, (int)src.Len(), NULL, 0, NULL, NULL);
if (len == 0)
{
if (GetLastError() != 0)
@@ -186,8 +195,8 @@ static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT co
BOOL defUsed = FALSE;
bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7);
// defaultChar = defaultChar;
len = WideCharToMultiByte(codePage, 0, src, src.Len(),
dest.GetBuf(len), len,
len = (unsigned)WideCharToMultiByte(codePage, 0, src, (int)src.Len(),
dest.GetBuf(len), (int)len,
(isUtf ? NULL : &defaultChar),
(isUtf ? NULL : &defUsed)
);
@@ -213,23 +222,137 @@ AString SystemStringToOemString(const CSysString &src)
#endif
*/
#else
#else // _WIN32
void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT /* codePage */)
// #include <stdio.h>
/*
if (wchar_t is 32-bit (#if WCHAR_MAX > 0xffff),
and utf-8 string contains big unicode character > 0xffff),
then we still use 16-bit surrogate pair in UString.
It simplifies another code where utf-16 encoding is used.
So we use surrogate-conversion code only in is file.
*/
/*
mbstowcs() returns error if there is error in utf-8 stream,
mbstowcs() returns error if there is single surrogates point (d800-dfff) in utf-8 stream
*/
/*
static void MultiByteToUnicodeString2_Native(UString &dest, const AString &src)
{
dest.Empty();
if (src.IsEmpty())
return;
size_t limit = ((size_t)src.Len() + 1) * 2;
const size_t limit = ((size_t)src.Len() + 1) * 2;
wchar_t *d = dest.GetBuf((unsigned)limit);
size_t len = mbstowcs(d, src, limit);
const size_t len = mbstowcs(d, src, limit);
if (len != (size_t)-1)
{
dest.ReleaseBuf_SetEnd((unsigned)len);
return;
}
dest.ReleaseBuf_SetEnd(0);
}
*/
bool g_ForceToUTF8 = true; // false;
void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
{
dest.Empty();
if (src.IsEmpty())
return;
if (codePage == CP_UTF8 || g_ForceToUTF8)
{
ConvertUTF8ToUnicode(src, dest);
return;
}
const size_t limit = ((size_t)src.Len() + 1) * 2;
wchar_t *d = dest.GetBuf((unsigned)limit);
const size_t len = mbstowcs(d, src, limit);
if (len != (size_t)-1)
{
dest.ReleaseBuf_SetEnd((unsigned)len);
#if WCHAR_MAX > 0xffff
d = dest.GetBuf();
for (size_t i = 0;; i++)
{
// wchar_t c = dest[i];
wchar_t c = d[i];
if (c == 0)
break;
if (c >= 0x10000 && c < 0x110000)
{
/*
c -= 0x10000;
unsigned c0 = 0xd800 + ((c >> 10) & 0x3FF);
dest.ReplaceOneCharAtPos(i, c0);
i++;
c = 0xdc00 + (c & 0x3FF);
dest.Insert_wchar_t(i, c);
*/
UString temp = d + i;
for (size_t t = 0;; t++)
{
wchar_t w = temp[t];
if (w == 0)
break;
if (i == limit)
break; // unexpected error
if (w >= 0x10000 && w < 0x110000)
{
if (i + 1 == limit)
break; // unexpected error
w -= 0x10000;
d[i++] = (unsigned)0xd800 + (((unsigned)w >> 10) & 0x3FF);
w = 0xdc00 + (w & 0x3FF);
}
d[i++] = w;
}
dest.ReleaseBuf_SetEnd((unsigned)i);
}
}
#endif
/*
printf("\nMultiByteToUnicodeString2 (%d) %s\n", (int)src.Len(), src.Ptr());
printf("char: ");
for (unsigned i = 0; i < src.Len(); i++)
printf (" %02x", (int)(Byte)src[i]);
printf("\n");
printf("\n-> (%d) %ls\n", (int)dest.Len(), dest.Ptr());
printf("wchar_t: ");
for (unsigned i = 0; i < dest.Len(); i++)
{
printf (" %02x", (int)dest[i]);
}
printf("\n");
*/
return;
}
/* if there is mbstowcs() error, we have two ways:
1) change 0x80+ characters to some character: '_'
in that case we lose data, but we have correct UString()
and that scheme can show errors to user in early stages,
when file converted back to mbs() cannot be found
2) transfer bad characters in some UTF-16 range.
it can be non-original Unicode character.
but later we still can restore original character.
*/
// printf("\nmbstowcs ERROR !!!!!! s=%s\n", src.Ptr());
{
unsigned i;
const char *s = (const char *)src;
@@ -238,6 +361,8 @@ void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT /* codePa
Byte c = (Byte)s[i];
if (c == 0)
break;
// we can use ascii compatibilty character '_'
// if (c > 0x7F) c = '_'; // we replace "bad: character
d[i++] = (wchar_t)c;
}
d[i] = 0;
@@ -245,43 +370,131 @@ void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT /* codePa
}
}
static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT /* codePage */, char defaultChar, bool &defaultCharWasUsed)
static void UnicodeStringToMultiByte2_Native(AString &dest, const UString &src)
{
dest.Empty();
defaultCharWasUsed = false;
if (src.IsEmpty())
return;
size_t limit = ((size_t)src.Len() + 1) * 6;
const size_t limit = ((size_t)src.Len() + 1) * 6;
char *d = dest.GetBuf((unsigned)limit);
size_t len = wcstombs(d, src, limit);
const size_t len = wcstombs(d, src, limit);
if (len != (size_t)-1)
{
dest.ReleaseBuf_SetEnd((unsigned)len);
return;
}
dest.ReleaseBuf_SetEnd(0);
}
static void UnicodeStringToMultiByte2(AString &dest, const UString &src2, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
{
// if (codePage == 1234567) // for debug purposes
if (codePage == CP_UTF8 || g_ForceToUTF8)
{
defaultCharWasUsed = false;
ConvertUnicodeToUTF8(src2, dest);
return;
}
UString src = src2;
#if WCHAR_MAX > 0xffff
{
src.Empty();
for (unsigned i = 0; i < src2.Len();)
{
wchar_t c = src2[i];
if (c >= 0xd800 && c < 0xdc00 && i + 1 != src2.Len())
{
const wchar_t c2 = src2[i + 1];
if (c2 >= 0xdc00 && c2 < 0x10000)
{
// printf("\nSurragate [%d]: %4x %4x -> ", i, (int)c, (int)c2);
c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff);
// printf("%4x\n", (int)c);
i++;
}
}
src += c;
i++;
}
}
#endif
dest.Empty();
defaultCharWasUsed = false;
if (src.IsEmpty())
return;
const size_t len = wcstombs(NULL, src, 0);
if (len != (size_t)-1)
{
const unsigned limit = ((unsigned)len);
if (limit == len)
{
char *d = dest.GetBuf(limit);
/*
{
printf("\nwcstombs; len = %d %ls \n", (int)src.Len(), src.Ptr());
for (unsigned i = 0; i < src.Len(); i++)
printf (" %02x", (int)src[i]);
printf("\n");
printf("\ndest Limit = %d \n", limit);
}
*/
const size_t len2 = wcstombs(d, src, len + 1);
if (len2 != (size_t)-1 && len2 <= limit)
{
/*
printf("\nOK : destLen = %d : %s\n", (int)len, dest.Ptr());
for (unsigned i = 0; i < len2; i++)
printf(" %02x", (int)(Byte)dest[i]);
printf("\n");
*/
dest.ReleaseBuf_SetEnd((unsigned)len2);
return;
}
}
}
{
const wchar_t *s = (const wchar_t *)src;
char *d = dest.GetBuf(src.Len());
unsigned i;
for (i = 0;;)
{
wchar_t c = s[i];
if (c == 0)
break;
if (c >= 0x100)
if (c >=
0x100
// 0x80
)
{
c = defaultChar;
defaultCharWasUsed = true;
}
d[i++] = (char)c;
}
d[i] = 0;
dest.ReleaseBuf_SetLen(i);
/*
printf("\nUnicodeStringToMultiByte2; len = %d \n", (int)src.Len());
printf("ERROR: %s\n", dest.Ptr());
*/
}
}
#endif
#endif // _WIN32
UString MultiByteToUnicodeString(const AString &src, UINT codePage)
@@ -317,3 +530,228 @@ AString UnicodeStringToMultiByte(const UString &src, UINT codePage)
UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed);
return dest;
}
#ifdef _WIN32
#define U_to_A(a, b, c) UnicodeStringToMultiByte2
// #define A_to_U(a, b, c) MultiByteToUnicodeString2
#else
// void MultiByteToUnicodeString2_Native(UString &dest, const AString &src);
#define U_to_A(a, b, c) UnicodeStringToMultiByte2_Native(a, b)
// #define A_to_U(a, b, c) MultiByteToUnicodeString2_Native(a, b)
#endif
#if !defined(_WIN32) || defined(ENV_HAVE_LOCALE)
bool IsNativeUTF8()
{
UString u;
AString a, a2;
// for (unsigned c = 0x80; c < (UInt32)0x10000; c += (c >> 9) + 1)
for (unsigned c = 0x80; c < (UInt32)0xD000; c += (c >> 2) + 1)
{
u.Empty();
u += (wchar_t)c;
/*
if (Unicode_Is_There_Utf16SurrogateError(u))
continue;
#ifndef _WIN32
if (Unicode_Is_There_BmpEscape(u))
continue;
#endif
*/
ConvertUnicodeToUTF8(u, a);
U_to_A(a2, u, CP_OEMCP);
if (a != a2)
return false;
}
return true;
}
#endif
#ifdef ENV_HAVE_LOCALE
const char *GetLocale(void)
{
#ifdef ENV_HAVE_LOCALE
// printf("\n\nsetlocale(LC_CTYPE, NULL) : return : ");
const char *s = setlocale(LC_CTYPE, NULL);
if (!s)
{
// printf("[NULL]\n");
s = "C";
}
else
{
// ubuntu returns "C" after program start
// printf("\"%s\"\n", s);
}
return s;
#elif defined(LOCALE_IS_UTF8)
return "utf8";
#else
return "C";
#endif
}
#ifdef _WIN32
static void Set_ForceToUTF8(bool) {}
#else
static void Set_ForceToUTF8(bool val) { g_ForceToUTF8 = val; }
#endif
static bool Is_Default_Basic_Locale(const char *locale)
{
const AString a (locale);
if (a.IsEqualTo_Ascii_NoCase("")
|| a.IsEqualTo_Ascii_NoCase("C")
|| a.IsEqualTo_Ascii_NoCase("POSIX"))
return true;
return false;
}
static bool Is_Default_Basic_Locale()
{
return Is_Default_Basic_Locale(GetLocale());
}
void MY_SetLocale()
{
#ifdef ENV_HAVE_LOCALE
/*
{
const char *s = GetLocale();
printf("\nGetLocale() : returned : \"%s\"\n", s);
}
*/
unsigned start = 0;
// unsigned lim = 0;
unsigned lim = 3;
/*
#define MY_SET_LOCALE_FLAGS__FROM_ENV 1
#define MY_SET_LOCALE_FLAGS__TRY_UTF8 2
unsigned flags =
MY_SET_LOCALE_FLAGS__FROM_ENV |
MY_SET_LOCALE_FLAGS__TRY_UTF8
if (flags != 0)
{
if (flags & MY_SET_LOCALE_FLAGS__FROM_ENV)
lim = (flags & MY_SET_LOCALE_FLAGS__TRY_UTF8) ? 3 : 1;
else
{
start = 1;
lim = 2;
}
}
*/
for (unsigned i = start; i < lim; i++)
{
/*
man7: "If locale is an empty string, "", each part of the locale that
should be modified is set according to the environment variables.
for glibc: glibc, first from the user's environment variables:
1) the environment variable LC_ALL,
2) environment variable with the same name as the category (see the
3) the environment variable LANG
The locale "C" or "POSIX" is a portable locale; it exists on all conforming systems.
for WIN32 : MSDN :
Sets the locale to the default, which is the user-default
ANSI code page obtained from the operating system.
The locale name is set to the value returned by GetUserDefaultLocaleName.
The code page is set to the value returned by GetACP
*/
const char *newLocale = "";
#ifdef __APPLE__
/* look also CFLocale
there is no C.UTF-8 in macos
macos has UTF-8 locale only with some language like en_US.UTF-8
what is best way to set UTF-8 locale in macos? */
if (i == 1)
newLocale = "en_US.UTF-8";
/* file open with non-utf8 sequencies return
#define EILSEQ 92 // "Illegal byte sequence"
*/
#else
// newLocale = "C";
if (i == 1)
{
newLocale = "C.UTF-8"; // main UTF-8 locale in ubuntu
// newLocale = ".utf8"; // supported in new Windows 10 build 17134 (April 2018 Update), the Universal C Runtime
// newLocale = "en_US.utf8"; // supported by ubuntu ?
// newLocale = "en_US.UTF-8";
/* setlocale() in ubuntu allows locales with minor chracter changes in strings
"en_US.UTF-8" / "en_US.utf8" */
}
#endif
// printf("\nsetlocale(LC_ALL, \"%s\") : returned: ", newLocale);
// const char *s =
setlocale(LC_ALL, newLocale);
/*
if (!s)
printf("NULL: can't set locale");
else
printf("\"%s\"\n", s);
*/
// request curent locale of program
const char *locale = GetLocale();
if (locale)
{
AString a (locale);
a.MakeLower_Ascii();
// if (a.Find("utf") >= 0)
{
if (IsNativeUTF8())
{
Set_ForceToUTF8(true);
return;
}
}
if (!Is_Default_Basic_Locale(locale))
{
// if there is some non-default and non-utf locale, we want to use it
break; // comment it for debug
}
}
}
if (IsNativeUTF8())
{
Set_ForceToUTF8(true);
return;
}
if (Is_Default_Basic_Locale())
{
Set_ForceToUTF8(true);
return;
}
Set_ForceToUTF8(false);
#elif defined(LOCALE_IS_UTF8)
// assume LC_CTYPE="utf8"
#else
// assume LC_CTYPE="C"
#endif
}
#endif

View File

@@ -85,4 +85,26 @@ inline AString GetOemString(const UString &u)
AString SystemStringToOemString(const CSysString &src);
#endif
#ifdef _WIN32
/* we don't need locale functions in Windows
but we can define ENV_HAVE_LOCALE here for debug purposes */
// #define ENV_HAVE_LOCALE
#else
#define ENV_HAVE_LOCALE
#endif
#ifdef ENV_HAVE_LOCALE
void MY_SetLocale();
const char *GetLocale(void);
#endif
#if !defined(_WIN32) || defined(ENV_HAVE_LOCALE)
bool IsNativeUTF8();
#endif
#ifndef _WIN32
extern bool g_ForceToUTF8;
#endif
#endif

View File

@@ -17,7 +17,7 @@ static const UInt64 k_UInt64_max = UINT64_CONST(0xFFFFFFFFFFFFFFFF);
if (c < '0' || c > '9') { if (end) *end = s; return res; } \
if (res > (k_ ## uintType ## _max) / 10) return 0; \
res *= 10; \
unsigned v = (c - '0'); \
unsigned v = (unsigned)(c - '0'); \
if (res > (k_ ## uintType ## _max) - v) return 0; \
res += v; }}

View File

@@ -2,11 +2,17 @@
#include "StdAfx.h"
// #include <stdio.h>
#include "MyTypes.h"
#include "UTFConvert.h"
#ifdef _WIN32
#define _WCHART_IS_16BIT 1
#ifndef _WCHART_IS_16BIT
#ifndef __APPLE__
// we define it if the system supports files with non-utf8 symbols:
#define _UTF8_RAW_NON_UTF8_SUPPORTED
#endif
#endif
/*
@@ -18,78 +24,332 @@
1 : 0xC0 : 11 :
2 : 0xE0 : 16 : Basic Multilingual Plane
3 : 0xF0 : 21 : Unicode space
3 : 0xF8 : 26 :
5 : 0xFC : 31 : UCS-4
4 : 0xF8 : 26 :
5 : 0xFC : 31 : UCS-4 : wcstombs() in ubuntu is limited to that value
6 : 0xFE : 36 : We can use it, if we want to encode any 32-bit value
7 : 0xFF :
*/
#define _UTF8_START(n) (0x100 - (1 << (7 - (n))))
#define _UTF8_HEAD_PARSE2(n) if (c < _UTF8_START((n) + 1)) { numBytes = (n); c -= _UTF8_START(n); }
#define _UTF8_HEAD_PARSE2(n) \
if (c < _UTF8_START((n) + 1)) \
{ numBytes = (n); val -= _UTF8_START(n); }
#ifndef _WCHART_IS_16BIT
/*
if (wchar_t is 32-bit), we can support large points in long UTF-8 sequence,
when we convert wchar_t strings to UTF-8:
(_UTF8_NUM_TAIL_BYTES_MAX == 3) : (21-bits points) - Unicode
(_UTF8_NUM_TAIL_BYTES_MAX == 5) : (31-bits points) - UCS-4
(_UTF8_NUM_TAIL_BYTES_MAX == 6) : (36-bit hack)
*/
#define _UTF8_NUM_TAIL_BYTES_MAX 5
#endif
/*
#define _UTF8_HEAD_PARSE \
UInt32 val = c; \
_UTF8_HEAD_PARSE2(1) \
else _UTF8_HEAD_PARSE2(2) \
else _UTF8_HEAD_PARSE2(3) \
else _UTF8_HEAD_PARSE2(4) \
else _UTF8_HEAD_PARSE2(5) \
#if _UTF8_NUM_TAIL_BYTES_MAX >= 6
else _UTF8_HEAD_PARSE2(6)
#endif
*/
// else _UTF8_HEAD_PARSE2(6)
#define _UTF8_HEAD_PARSE_MAX_3_BYTES \
UInt32 val = c; \
_UTF8_HEAD_PARSE2(1) \
else _UTF8_HEAD_PARSE2(2) \
else { numBytes = 3; val -= _UTF8_START(3); }
bool CheckUTF8(const char *src, bool allowReduced) throw()
#define _UTF8_RANGE(n) (((UInt32)1) << ((n) * 5 + 6))
#define START_POINT_FOR_SURROGATE 0x10000
/* we use 128 bytes block in 16-bit BMP-PLANE to encode non-UTF-8 Escapes
Also we can use additional HIGH-PLANE (we use 21-bit points above 0x1f0000)
to simplify internal intermediate conversion in Linux:
RAW-UTF-8 <-> internal wchar_t utf-16 strings <-> RAW-UTF-UTF-8
*/
#if defined(_WCHART_IS_16BIT)
#define UTF_ESCAPE_PLANE 0
#else
/*
we can place 128 ESCAPE chars to
ef 80 - ee be 80 (3-bytes utf-8) : similar to WSL
ef ff - ee bf bf
1f ef 80 - f7 be be 80 (4-bytes utf-8) : last 4-bytes utf-8 plane (out of Unicode)
1f ef ff - f7 be bf bf (4-bytes utf-8) : last 4-bytes utf-8 plane (out of Unicode)
*/
// #define UTF_ESCAPE_PLANE_HIGH (0x1f << 16)
// #define UTF_ESCAPE_PLANE UTF_ESCAPE_PLANE_HIGH
#define UTF_ESCAPE_PLANE 0
/*
if (UTF_FLAG__FROM_UTF8__USE_ESCAPE is set)
{
if (UTF_ESCAPE_PLANE is UTF_ESCAPE_PLANE_HIGH)
{
we can restore any 8-bit Escape from ESCAPE-PLANE-21 plane.
But ESCAPE-PLANE-21 point cannot be stored to utf-16 (7z archive)
So we still need a way to extract 8-bit Escapes and BMP-Escapes-8
from same BMP-Escapes-16 stored in 7z.
And if we want to restore any 8-bit from 7z archive,
we still must use UTF_FLAG__FROM_UTF8__BMP_ESCAPE_CONVERT for (utf-8 -> utf-16)
Also we need additional Conversions to tranform from utf-16 to utf-16-With-Escapes-21
}
else (UTF_ESCAPE_PLANE == 0)
{
we must convert original 3-bytes utf-8 BMP-Escape point to sequence
of 3 BMP-Escape-16 points with UTF_FLAG__FROM_UTF8__BMP_ESCAPE_CONVERT
so we can extract original RAW-UTF-8 from UTFD-16 later.
}
}
*/
#endif
#define UTF_ESCAPE_BASE 0xef00
#ifdef UTF_ESCAPE_BASE
#define IS_ESCAPE_POINT(v, plane) (((v) & (UInt32)0xffffff80) == (plane) + UTF_ESCAPE_BASE + 0x80)
#endif
#define IS_SURROGATE_POINT(v) (((v) & (UInt32)0xfffff800) == 0xd800)
#define IS_LOW_SURROGATE_POINT(v) (((v) & (UInt32)0xfffffC00) == 0xdc00)
#define _ERROR_UTF8_CHECK \
{ NonUtf = true; continue; }
void CUtf8Check::Check_Buf(const char *src, size_t size) throw()
{
Clear();
// Byte maxByte = 0;
for (;;)
{
Byte c = *src++;
if (size == 0)
break;
const Byte c = (Byte)(*src++);
size--;
if (c == 0)
{
ZeroChar = true;
continue;
}
/*
if (c > maxByte)
maxByte = c;
*/
if (c < 0x80)
continue;
if (c < 0xc0 + 2)// it's limit for 0x140000 unicode codes : win32 compatibility
_ERROR_UTF8_CHECK
unsigned numBytes;
UInt32 val = c;
_UTF8_HEAD_PARSE2(1)
else _UTF8_HEAD_PARSE2(2)
else _UTF8_HEAD_PARSE2(4)
else _UTF8_HEAD_PARSE2(5)
else
{
_ERROR_UTF8_CHECK
}
unsigned pos = 0;
do
{
if (pos == size)
break;
unsigned c2 = (Byte)src[pos];
c2 -= 0x80;
if (c2 >= 0x40)
break;
val <<= 6;
val |= c2;
if (pos == 0)
if (val < (((unsigned)1 << 7) >> numBytes))
break;
pos++;
}
while (--numBytes);
if (numBytes != 0)
{
if (pos == size)
Truncated = true;
else
_ERROR_UTF8_CHECK
}
#ifdef UTF_ESCAPE_BASE
if (IS_ESCAPE_POINT(val, 0))
Escape = true;
#endif
if (MaxHighPoint < val)
MaxHighPoint = val;
if (IS_SURROGATE_POINT(val))
SingleSurrogate = true;
src += pos;
size -= pos;
}
// MaxByte = maxByte;
}
bool Check_UTF8_Buf(const char *src, size_t size, bool allowReduced) throw()
{
CUtf8Check check;
check.Check_Buf(src, size);
return check.IsOK(allowReduced);
}
/*
bool CheckUTF8_chars(const char *src, bool allowReduced) throw()
{
CUtf8Check check;
check.CheckBuf(src, strlen(src));
return check.IsOK(allowReduced);
}
*/
bool CheckUTF8_AString(const AString &s) throw()
{
CUtf8Check check;
check.Check_AString(s);
return check.IsOK();
}
/*
bool CheckUTF8(const char *src, bool allowReduced) throw()
{
// return Check_UTF8_Buf(src, strlen(src), allowReduced);
for (;;)
{
const Byte c = (Byte)(*src++);
if (c == 0)
return true;
if (c < 0x80)
continue;
if (c < 0xC0) // (c < 0xC0 + 2) // if we support only optimal encoding chars
if (c < 0xC0 + 2 || c >= 0xf5)
return false;
unsigned numBytes;
_UTF8_HEAD_PARSE
else
return false;
UInt32 val = c;
unsigned pos = 0;
do
{
Byte c2 = *src++;
Byte c2 = (Byte)(*src++);
if (c2 < 0x80 || c2 >= 0xC0)
return allowReduced && c2 == 0;
val <<= 6;
val |= (c2 - 0x80);
pos++;
}
while (--numBytes);
if (val < _UTF8_RANGE(pos - 1))
return false;
if (val >= 0x110000)
return false;
}
}
*/
// in case of UTF-8 error we have two ways:
// 21.01- : old : 0xfffd: REPLACEMENT CHARACTER : old version
// 21.02+ : new : 0xef00 + (c) : similar to WSL scheme for low symbols
#define UTF_REPLACEMENT_CHAR 0xfffd
#define UTF_ESCAPE(c) \
((flags & UTF_FLAG__FROM_UTF8__USE_ESCAPE) ? \
UTF_ESCAPE_PLANE + UTF_ESCAPE_BASE + (c) : UTF_REPLACEMENT_CHAR)
/*
#define _HARD_ERROR_UTF8
{ if (dest) dest[destPos] = (wchar_t)UTF_ESCAPE(c); \
destPos++; ok = false; continue; }
*/
// we ignore utf errors, and don't change (ok) variable!
#define _ERROR_UTF8 \
{ if (dest) dest[destPos] = (wchar_t)0xFFFD; destPos++; ok = false; continue; }
{ if (dest) dest[destPos] = (wchar_t)UTF_ESCAPE(c); \
destPos++; continue; }
static bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, const char *srcLim) throw()
// we store UTF-16 in wchar_t strings. So we use surrogates for big unicode points:
// for debug puposes only we can store UTF-32 in wchar_t:
// #define START_POINT_FOR_SURROGATE ((UInt32)0 - 1)
/*
WIN32 MultiByteToWideChar(CP_UTF8) emits 0xfffd point, if utf-8 error was found.
Ant it can emit single 0xfffd from 2 src bytes.
It doesn't emit single 0xfffd from 3-4 src bytes.
We can
1) emit Escape point for each incorrect byte. So we can data recover later
2) emit 0xfffd for each incorrect byte.
That scheme is similar to Escape scheme, but we emit 0xfffd
instead of each Escape point.
3) emit single 0xfffd from 1-2 incorrect bytes, as WIN32 MultiByteToWideChar scheme
*/
static bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, const char *srcLim, unsigned flags) throw()
{
size_t destPos = 0;
bool ok = true;
for (;;)
{
Byte c;
if (src == srcLim)
{
*destLen = destPos;
return ok;
}
c = *src++;
const Byte c = (Byte)(*src++);
if (c < 0x80)
{
@@ -98,68 +358,127 @@ static bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, const
destPos++;
continue;
}
if (c < 0xC0)
if (c < 0xc0 + 2
|| c >= 0xf5) // it's limit for 0x140000 unicode codes : win32 compatibility
{
_ERROR_UTF8
}
unsigned numBytes;
_UTF8_HEAD_PARSE
else
_ERROR_UTF8
UInt32 val = c;
_UTF8_HEAD_PARSE_MAX_3_BYTES
unsigned pos = 0;
do
{
Byte c2;
if (src == srcLim)
if (src + pos == srcLim)
break;
c2 = *src;
if (c2 < 0x80 || c2 >= 0xC0)
unsigned c2 = (Byte)src[pos];
c2 -= 0x80;
if (c2 >= 0x40)
break;
src++;
val <<= 6;
val |= (c2 - 0x80);
val |= c2;
pos++;
if (pos == 1)
{
if (val < (((unsigned)1 << 7) >> numBytes))
break;
if (numBytes == 2)
{
if (flags & UTF_FLAG__FROM_UTF8__SURROGATE_ERROR)
if ((val & (0xF800 >> 6)) == (0xd800 >> 6))
break;
}
else if (numBytes == 3 && val >= (0x110000 >> 12))
break;
}
}
while (--numBytes);
if (numBytes != 0)
_ERROR_UTF8
if (val < 0x10000)
{
if ((flags & UTF_FLAG__FROM_UTF8__USE_ESCAPE) == 0)
{
// the following code to emit the 0xfffd chars as win32 Utf8 function.
// disable the folling line, if you need 0xfffd for each incorrect byte as in Escape mode
src += pos;
}
_ERROR_UTF8
}
/*
if (val < _UTF8_RANGE(pos - 1))
_ERROR_UTF8
*/
#ifdef UTF_ESCAPE_BASE
if ((flags & UTF_FLAG__FROM_UTF8__BMP_ESCAPE_CONVERT)
&& IS_ESCAPE_POINT(val, 0))
{
// We will emit 3 utf16-Escape-16-21 points from one Escape-16 point (3 bytes)
_ERROR_UTF8
}
#endif
/*
We don't expect virtual Escape-21 points in UTF-8 stream.
And we don't check for Escape-21.
So utf8-Escape-21 will be converted to another 3 utf16-Escape-21 points.
Maybe we could convert virtual utf8-Escape-21 to one utf16-Escape-21 point in some cases?
*/
if (val < START_POINT_FOR_SURROGATE)
{
/*
if ((flags & UTF_FLAG__FROM_UTF8__SURROGATE_ERROR)
&& IS_SURROGATE_POINT(val))
{
// We will emit 3 utf16-Escape-16-21 points from one Surrogate-16 point (3 bytes)
_ERROR_UTF8
}
*/
if (dest)
dest[destPos] = (wchar_t)val;
destPos++;
}
else
{
val -= 0x10000;
if (val >= 0x100000)
/*
if (val >= 0x110000)
{
// We will emit utf16-Escape-16-21 point from each source byte
_ERROR_UTF8
}
*/
if (dest)
{
dest[destPos + 0] = (wchar_t)(0xD800 + (val >> 10));
dest[destPos + 1] = (wchar_t)(0xDC00 + (val & 0x3FF));
dest[destPos + 0] = (wchar_t)(0xd800 - (0x10000 >> 10) + (val >> 10));
dest[destPos + 1] = (wchar_t)(0xdc00 + (val & 0x3ff));
}
destPos += 2;
}
src += pos;
}
}
#define _UTF8_RANGE(n) (((UInt32)1) << ((n) * 5 + 6))
#define _UTF8_HEAD(n, val) ((char)(_UTF8_START(n) + (val >> (6 * (n)))))
#define _UTF8_CHAR(n, val) ((char)(0x80 + (((val) >> (6 * (n))) & 0x3F)))
static size_t Utf16_To_Utf8_Calc(const wchar_t *src, const wchar_t *srcLim)
static size_t Utf16_To_Utf8_Calc(const wchar_t *src, const wchar_t *srcLim, unsigned flags)
{
size_t size = srcLim - src;
size_t size = (size_t)(srcLim - src);
for (;;)
{
if (src == srcLim)
return size;
UInt32 val = *src++;
UInt32 val = (UInt32)(*src++);
if (val < 0x80)
continue;
@@ -170,15 +489,32 @@ static size_t Utf16_To_Utf8_Calc(const wchar_t *src, const wchar_t *srcLim)
continue;
}
if (val >= 0xD800 && val < 0xDC00 && src != srcLim)
{
UInt32 c2 = *src;
if (c2 >= 0xDC00 && c2 < 0xE000)
{
src++;
size += 2;
#ifdef UTF_ESCAPE_BASE
#if UTF_ESCAPE_PLANE != 0
if (flags & UTF_FLAG__TO_UTF8__PARSE_HIGH_ESCAPE)
if (IS_ESCAPE_POINT(val, UTF_ESCAPE_PLANE))
continue;
#endif
if (flags & UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE)
if (IS_ESCAPE_POINT(val, 0))
continue;
#endif
if (IS_SURROGATE_POINT(val))
{
// it's hack to UTF-8 encoding
if (val < 0xdc00 && src != srcLim)
{
const UInt32 c2 = (UInt32)*src;
if (c2 >= 0xdc00 && c2 < 0xe000)
src++;
}
size += 2;
continue;
}
#ifdef _WCHART_IS_16BIT
@@ -191,20 +527,26 @@ static size_t Utf16_To_Utf8_Calc(const wchar_t *src, const wchar_t *srcLim)
else if (val < _UTF8_RANGE(3)) size += 3;
else if (val < _UTF8_RANGE(4)) size += 4;
else if (val < _UTF8_RANGE(5)) size += 5;
else size += 6;
else
#if _UTF8_NUM_TAIL_BYTES_MAX >= 6
size += 6;
#else
size += 3;
#endif
#endif
}
}
static char *Utf16_To_Utf8(char *dest, const wchar_t *src, const wchar_t *srcLim)
static char *Utf16_To_Utf8(char *dest, const wchar_t *src, const wchar_t *srcLim, unsigned flags)
{
for (;;)
{
if (src == srcLim)
return dest;
UInt32 val = *src++;
UInt32 val = (UInt32)*src++;
if (val < 0x80)
{
@@ -220,22 +562,57 @@ static char *Utf16_To_Utf8(char *dest, const wchar_t *src, const wchar_t *srcLim
continue;
}
if (val >= 0xD800 && val < 0xDC00 && src != srcLim)
{
UInt32 c2 = *src;
if (c2 >= 0xDC00 && c2 < 0xE000)
#ifdef UTF_ESCAPE_BASE
#if UTF_ESCAPE_PLANE != 0
/*
if (wchar_t is 32-bit)
&& (UTF_FLAG__TO_UTF8__PARSE_HIGH_ESCAPE is set)
&& (point is virtual escape plane)
we extract 8-bit byte from virtual HIGH-ESCAPE PLANE.
*/
if (flags & UTF_FLAG__TO_UTF8__PARSE_HIGH_ESCAPE)
if (IS_ESCAPE_POINT(val, UTF_ESCAPE_PLANE))
{
src++;
val = (((val - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000;
dest[0] = _UTF8_HEAD(3, val);
dest[1] = _UTF8_CHAR(2, val);
dest[2] = _UTF8_CHAR(1, val);
dest[3] = _UTF8_CHAR(0, val);
dest += 4;
*dest++ = (char)(val);
continue;
}
#endif // UTF_ESCAPE_PLANE != 0
/* if (UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE is defined)
we extract 8-bit byte from BMP-ESCAPE PLANE. */
if (flags & UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE)
if (IS_ESCAPE_POINT(val, 0))
{
*dest++ = (char)(val);
continue;
}
}
#endif // UTF_ESCAPE_BASE
if (IS_SURROGATE_POINT(val))
{
// it's hack to UTF-8 encoding
if (val < 0xdc00 && src != srcLim)
{
const UInt32 c2 = (UInt32)*src;
if (IS_LOW_SURROGATE_POINT(c2))
{
src++;
val = (((val - 0xd800) << 10) | (c2 - 0xdc00)) + 0x10000;
dest[0] = _UTF8_HEAD(3, val);
dest[1] = _UTF8_CHAR(2, val);
dest[2] = _UTF8_CHAR(1, val);
dest[3] = _UTF8_CHAR(0, val);
dest += 4;
continue;
}
}
if (flags & UTF_FLAG__TO_UTF8__SURROGATE_ERROR)
val = UTF_REPLACEMENT_CHAR; // WIN32 function does it
}
#ifndef _WCHART_IS_16BIT
if (val < _UTF8_RANGE(2))
#endif
@@ -249,14 +626,25 @@ static char *Utf16_To_Utf8(char *dest, const wchar_t *src, const wchar_t *srcLim
#ifndef _WCHART_IS_16BIT
UInt32 b;
// we don't expect this case. so we can throw exception
// throw 20210407;
char b;
unsigned numBits;
if (val < _UTF8_RANGE(3)) { numBits = 6 * 3; b = _UTF8_HEAD(3, val); }
else if (val < _UTF8_RANGE(4)) { numBits = 6 * 4; b = _UTF8_HEAD(4, val); }
else if (val < _UTF8_RANGE(5)) { numBits = 6 * 5; b = _UTF8_HEAD(5, val); }
else { numBits = 6 * 6; b = _UTF8_START(6); }
*dest++ = (Byte)b;
#if _UTF8_NUM_TAIL_BYTES_MAX >= 6
else { numBits = 6 * 6; b = (char)_UTF8_START(6); }
#else
else
{
val = UTF_REPLACEMENT_CHAR;
{ numBits = 6 * 3; b = _UTF8_HEAD(3, val); }
}
#endif
*dest++ = b;
do
{
@@ -269,20 +657,207 @@ static char *Utf16_To_Utf8(char *dest, const wchar_t *src, const wchar_t *srcLim
}
}
bool ConvertUTF8ToUnicode(const AString &src, UString &dest)
bool Convert_UTF8_Buf_To_Unicode(const char *src, size_t srcSize, UString &dest, unsigned flags)
{
dest.Empty();
size_t destLen = 0;
Utf8_To_Utf16(NULL, &destLen, src, src.Ptr(src.Len()));
bool res = Utf8_To_Utf16(dest.GetBuf((unsigned)destLen), &destLen, src, src.Ptr(src.Len()));
Utf8_To_Utf16(NULL, &destLen, src, src + srcSize, flags);
bool res = Utf8_To_Utf16(dest.GetBuf((unsigned)destLen), &destLen, src, src + srcSize, flags);
dest.ReleaseBuf_SetEnd((unsigned)destLen);
return res;
}
bool ConvertUTF8ToUnicode_Flags(const AString &src, UString &dest, unsigned flags)
{
return Convert_UTF8_Buf_To_Unicode(src, src.Len(), dest, flags);
}
static
unsigned g_UTF8_To_Unicode_Flags =
UTF_FLAG__FROM_UTF8__USE_ESCAPE
#ifndef _WCHART_IS_16BIT
| UTF_FLAG__FROM_UTF8__SURROGATE_ERROR
#ifdef _UTF8_RAW_NON_UTF8_SUPPORTED
| UTF_FLAG__FROM_UTF8__BMP_ESCAPE_CONVERT
#endif
#endif
;
/*
bool ConvertUTF8ToUnicode_boolRes(const AString &src, UString &dest)
{
return ConvertUTF8ToUnicode_Flags(src, dest, g_UTF8_To_Unicode_Flags);
}
*/
bool ConvertUTF8ToUnicode(const AString &src, UString &dest)
{
return ConvertUTF8ToUnicode_Flags(src, dest, g_UTF8_To_Unicode_Flags);
}
void Print_UString(const UString &a);
void ConvertUnicodeToUTF8_Flags(const UString &src, AString &dest, unsigned flags)
{
/*
if (src.Len()== 24)
throw "202104";
*/
dest.Empty();
const size_t destLen = Utf16_To_Utf8_Calc(src, src.Ptr(src.Len()), flags);
char *destStart = dest.GetBuf((unsigned)destLen);
const char *destEnd = Utf16_To_Utf8(destStart, src, src.Ptr(src.Len()), flags);
dest.ReleaseBuf_SetEnd((unsigned)destLen);
// printf("\nlen = %d\n", src.Len());
if (destLen != (size_t)(destEnd - destStart))
{
/*
// dest.ReleaseBuf_SetEnd((unsigned)(destEnd - destStart));
printf("\nlen = %d\n", (unsigned)destLen);
printf("\n(destEnd - destStart) = %d\n", (unsigned)(destEnd - destStart));
printf("\n");
// Print_UString(src);
printf("\n");
// printf("\nlen = %d\n", destLen);
*/
throw 20210406;
}
}
unsigned g_Unicode_To_UTF8_Flags =
// UTF_FLAG__TO_UTF8__PARSE_HIGH_ESCAPE
0
#ifndef _WIN32
#ifdef _UTF8_RAW_NON_UTF8_SUPPORTED
| UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE
#else
| UTF_FLAG__TO_UTF8__SURROGATE_ERROR;
#endif
#endif
;
void ConvertUnicodeToUTF8(const UString &src, AString &dest)
{
dest.Empty();
size_t destLen = Utf16_To_Utf8_Calc(src, src.Ptr(src.Len()));
Utf16_To_Utf8(dest.GetBuf((unsigned)destLen), src, src.Ptr(src.Len()));
dest.ReleaseBuf_SetEnd((unsigned)destLen);
ConvertUnicodeToUTF8_Flags(src, dest, g_Unicode_To_UTF8_Flags);
}
void Convert_Unicode_To_UTF8_Buf(const UString &src, CByteBuffer &dest)
{
const unsigned flags = g_Unicode_To_UTF8_Flags;
dest.Free();
const size_t destLen = Utf16_To_Utf8_Calc(src, src.Ptr(src.Len()), flags);
dest.Alloc(destLen);
const char *destEnd = Utf16_To_Utf8((char *)(void *)(Byte *)dest, src, src.Ptr(src.Len()), flags);
if (destLen != (size_t)(destEnd - (char *)(void *)(Byte *)dest))
throw 202104;
}
/*
#ifndef _WIN32
void Convert_UTF16_To_UTF32(const UString &src, UString &dest)
{
dest.Empty();
for (size_t i = 0; i < src.Len();)
{
wchar_t c = src[i++];
if (c >= 0xd800 && c < 0xdc00 && i < src.Len())
{
const wchar_t c2 = src[i];
if (c2 >= 0xdc00 && c2 < 0x10000)
{
// printf("\nSurragate [%d]: %4x %4x -> ", i, (int)c, (int)c2);
c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff);
// printf("%4x\n", (int)c);
i++;
}
}
dest += c;
}
}
void Convert_UTF32_To_UTF16(const UString &src, UString &dest)
{
dest.Empty();
for (size_t i = 0; i < src.Len();)
{
wchar_t w = src[i++];
if (w >= 0x10000 && w < 0x110000)
{
w -= 0x10000;
dest += (wchar_t)((unsigned)0xd800 + (((unsigned)w >> 10) & 0x3ff));
w = 0xdc00 + (w & 0x3ff);
}
dest += w;
}
}
bool UTF32_IsThere_BigPoint(const UString &src)
{
for (size_t i = 0; i < src.Len();)
{
const UInt32 c = (UInt32)src[i++];
if (c >= 0x110000)
return true;
}
return false;
}
bool Unicode_IsThere_BmpEscape(const UString &src)
{
for (size_t i = 0; i < src.Len();)
{
const UInt32 c = (UInt32)src[i++];
if (IS_ESCAPE_POINT(c, 0))
return true;
}
return false;
}
#endif
bool Unicode_IsThere_Utf16SurrogateError(const UString &src)
{
for (size_t i = 0; i < src.Len();)
{
const UInt32 val = (UInt32)src[i++];
if (IS_SURROGATE_POINT(val))
{
// it's hack to UTF-8 encoding
if (val >= 0xdc00 || i == src.Len())
return true;
const UInt32 c2 = (UInt32)*src;
if (!IS_LOW_SURROGATE_POINT(c2))
return true;
}
}
return false;
}
*/
#ifndef _WCHART_IS_16BIT
void Convert_UnicodeEsc16_To_UnicodeEscHigh
#if UTF_ESCAPE_PLANE == 0
(UString &) {}
#else
(UString &s)
{
const unsigned len = s.Len();
for (unsigned i = 0; i < len; i++)
{
wchar_t c = s[i];
if (IS_ESCAPE_POINT(c, 0))
{
c += UTF_ESCAPE_PLANE;
s.ReplaceOneCharAtPos(i, c);
}
}
}
#endif
#endif

View File

@@ -3,10 +3,382 @@
#ifndef __COMMON_UTF_CONVERT_H
#define __COMMON_UTF_CONVERT_H
#include "MyBuffer.h"
#include "MyString.h"
bool CheckUTF8(const char *src, bool allowReduced = false) throw();
bool ConvertUTF8ToUnicode(const AString &utfString, UString &resultString);
void ConvertUnicodeToUTF8(const UString &unicodeString, AString &resultString);
struct CUtf8Check
{
// Byte MaxByte; // in original src stream
bool NonUtf;
bool ZeroChar;
bool SingleSurrogate;
bool Escape;
bool Truncated;
UInt32 MaxHighPoint; // only for points >= 0x80
CUtf8Check() { Clear(); }
void Clear()
{
// MaxByte = 0;
NonUtf = false;
ZeroChar = false;
SingleSurrogate = false;
Escape = false;
Truncated = false;
MaxHighPoint = 0;
}
void Update(const CUtf8Check &c)
{
if (c.NonUtf) NonUtf = true;
if (c.ZeroChar) ZeroChar = true;
if (c.SingleSurrogate) SingleSurrogate = true;
if (c.Escape) Escape = true;
if (c.Truncated) Truncated = true;
if (MaxHighPoint < c.MaxHighPoint) MaxHighPoint = c.MaxHighPoint;
}
void PrintStatus(AString &s) const
{
s.Empty();
// s.Add_OptSpaced("MaxByte=");
// s.Add_UInt32(MaxByte);
if (NonUtf) s.Add_OptSpaced("non-UTF8");
if (ZeroChar) s.Add_OptSpaced("ZeroChar");
if (SingleSurrogate) s.Add_OptSpaced("SingleSurrogate");
if (Escape) s.Add_OptSpaced("Escape");
if (Truncated) s.Add_OptSpaced("Truncated");
if (MaxHighPoint != 0)
{
s.Add_OptSpaced("MaxUnicode=");
s.Add_UInt32(MaxHighPoint);
}
}
bool IsOK(bool allowReduced = false) const
{
if (NonUtf || SingleSurrogate || ZeroChar)
return false;
if (MaxHighPoint >= 0x110000)
return false;
if (Truncated && !allowReduced)
return false;
return true;
}
// it checks full buffer as specified in (size) and it doesn't stop on zero char
void Check_Buf(const char *src, size_t size) throw();
void Check_AString(const AString &s) throw()
{
Check_Buf(s.Ptr(), s.Len());
}
};
/*
if (allowReduced == false) - all UTF-8 character sequences must be finished.
if (allowReduced == true) - it allows truncated last character-Utf8-sequence
*/
bool Check_UTF8_Buf(const char *src, size_t size, bool allowReduced) throw();
bool CheckUTF8_AString(const AString &s) throw();
#define UTF_FLAG__FROM_UTF8__SURROGATE_ERROR (1 << 0)
#define UTF_FLAG__FROM_UTF8__USE_ESCAPE (1 << 1)
#define UTF_FLAG__FROM_UTF8__BMP_ESCAPE_CONVERT (1 << 2)
/*
UTF_FLAG__FROM_UTF8__SURROGATE_ERROR
if (flag is NOT set)
{
it processes SINGLE-SURROGATE-8 as valid Unicode point.
it converts SINGLE-SURROGATE-8 to SINGLE-SURROGATE-16
Note: some sequencies of two SINGLE-SURROGATE-8 points
will generate correct SURROGATE-16-PAIR, and
that SURROGATE-16-PAIR later will be converted to correct
UTF8-SURROGATE-21 point. So we don't restore original
STR-8 sequence in that case.
}
if (flag is set)
{
if (UTF_FLAG__FROM_UTF8__USE_ESCAPE is defined)
it generates ESCAPE for SINGLE-SURROGATE-8,
if (UTF_FLAG__FROM_UTF8__USE_ESCAPE is not defined)
it generates U+fffd for SINGLE-SURROGATE-8,
}
UTF_FLAG__FROM_UTF8__USE_ESCAPE
if (flag is NOT set)
it generates (U+fffd) code for non-UTF-8 (invalid) characters
if (flag is set)
{
It generates (ESCAPE) codes for NON-UTF-8 (invalid) characters.
And later we can restore original UTF-8-RAW characters from (ESCAPE-16-21) codes.
}
UTF_FLAG__FROM_UTF8__BMP_ESCAPE_CONVERT
if (flag is NOT set)
{
it process ESCAPE-8 points as another Unicode points.
In Linux: ESCAPE-16 will mean two different ESCAPE-8 seqences,
so we need HIGH-ESCAPE-PLANE-21 to restore UTF-8-RAW -> UTF-16 -> UTF-8-RAW
}
if (flag is set)
{
it generates ESCAPE-16-21 for ESCAPE-8 points
so we can restore UTF-8-RAW -> UTF-16 -> UTF-8-RAW without HIGH-ESCAPE-PLANE-21.
}
Main USE CASES with UTF-8 <-> UTF-16 conversions:
WIN32: UTF-16-RAW -> UTF-8 (Archive) -> UTF-16-RAW
{
set UTF_FLAG__FROM_UTF8__USE_ESCAPE
Do NOT set UTF_FLAG__FROM_UTF8__SURROGATE_ERROR
Do NOT set UTF_FLAG__FROM_UTF8__BMP_ESCAPE_CONVERT
So we restore original SINGLE-SURROGATE-16 from single SINGLE-SURROGATE-8.
}
Linux: UTF-8-RAW -> UTF-16 (Intermediate / Archive) -> UTF-8-RAW
{
we want restore original UTF-8-RAW sequence later from that ESCAPE-16.
Set the flags:
UTF_FLAG__FROM_UTF8__SURROGATE_ERROR
UTF_FLAG__FROM_UTF8__USE_ESCAPE
UTF_FLAG__FROM_UTF8__BMP_ESCAPE_CONVERT
}
MacOS: UTF-8-RAW -> UTF-16 (Intermediate / Archive) -> UTF-8-RAW
{
we want to restore correct UTF-8 without any BMP processing:
Set the flags:
UTF_FLAG__FROM_UTF8__SURROGATE_ERROR
UTF_FLAG__FROM_UTF8__USE_ESCAPE
}
*/
// zero char is not allowed in (src) buf
bool Convert_UTF8_Buf_To_Unicode(const char *src, size_t srcSize, UString &dest, unsigned flags = 0);
bool ConvertUTF8ToUnicode_Flags(const AString &src, UString &dest, unsigned flags = 0);
bool ConvertUTF8ToUnicode(const AString &src, UString &dest);
#define UTF_FLAG__TO_UTF8__SURROGATE_ERROR (1 << 8)
#define UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE (1 << 9)
// #define UTF_FLAG__TO_UTF8__PARSE_HIGH_ESCAPE (1 << 10)
/*
UTF_FLAG__TO_UTF8__SURROGATE_ERROR
if (flag is NOT set)
{
we extract SINGLE-SURROGATE as normal UTF-8
In Windows : for UTF-16-RAW <-> UTF-8 (archive) <-> UTF-16-RAW in .
In Linux :
use-case-1: UTF-8 -> UTF-16 -> UTF-8 doesn't generate UTF-16 SINGLE-SURROGATE,
if (UTF_FLAG__FROM_UTF8__SURROGATE_ERROR) is used.
use-case 2: UTF-16-7z (with SINGLE-SURROGATE from Windows) -> UTF-8 (Linux)
will generate SINGLE-SURROGATE-UTF-8 here.
}
if (flag is set)
{
we generate UTF_REPLACEMENT_CHAR (0xfffd) for SINGLE_SURROGATE
it can be used for compatibility mode with WIN32 UTF function
or if we want UTF-8 stream without any errors
}
UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE
if (flag is NOT set) it doesn't extract raw 8-bit symbol from Escape-Plane-16
if (flag is set) it extracts raw 8-bit symbol from Escape-Plane-16
in Linux we need some way to extract NON-UTF8 RAW 8-bits from BMP (UTF-16 7z archive):
if (we use High-Escape-Plane), we can transfer BMP escapes to High-Escape-Plane.
if (we don't use High-Escape-Plane), we must use UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE.
UTF_FLAG__TO_UTF8__PARSE_HIGH_ESCAPE
// that flag affects the code only if (wchar_t is 32-bit)
// that mode with high-escape can be disabled now in UTFConvert.cpp
if (flag is NOT set)
it doesn't extract raw 8-bit symbol from High-Escape-Plane
if (flag is set)
it extracts raw 8-bit symbol from High-Escape-Plane
Main use cases:
WIN32 : UTF-16-RAW -> UTF-8 (archive) -> UTF-16-RAW
{
Do NOT set UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE.
Do NOT set UTF_FLAG__TO_UTF8__SURROGATE_ERROR.
So we restore original UTF-16-RAW.
}
Linix : UTF-8 with Escapes -> UTF-16 (7z archive) -> UTF-8 with Escapes
set UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE to extract non-UTF from 7z archive
set UTF_FLAG__TO_UTF8__PARSE_HIGH_ESCAPE for intermediate UTF-16.
Note: high esacape mode can be ignored now in UTFConvert.cpp
macOS:
the system doesn't support incorrect UTF-8 in file names.
set UTF_FLAG__TO_UTF8__SURROGATE_ERROR
*/
extern unsigned g_Unicode_To_UTF8_Flags;
void ConvertUnicodeToUTF8_Flags(const UString &src, AString &dest, unsigned flags = 0);
void ConvertUnicodeToUTF8(const UString &src, AString &dest);
void Convert_Unicode_To_UTF8_Buf(const UString &src, CByteBuffer &dest);
/*
#ifndef _WIN32
void Convert_UTF16_To_UTF32(const UString &src, UString &dest);
void Convert_UTF32_To_UTF16(const UString &src, UString &dest);
bool UTF32_IsThere_BigPoint(const UString &src);
bool Unicode_IsThere_BmpEscape(const UString &src);
#endif
bool Unicode_IsThere_Utf16SurrogateError(const UString &src);
*/
#ifdef _WCHART_IS_16BIT
#define Convert_UnicodeEsc16_To_UnicodeEscHigh(s)
#else
void Convert_UnicodeEsc16_To_UnicodeEscHigh(UString &s);
#endif
/*
// #include "../../C/CpuArch.h"
// ---------- Utf16 Little endian functions ----------
// We store 16-bit surrogates even in 32-bit WCHARs in Linux.
// So now we don't use the following code:
#if WCHAR_MAX > 0xffff
// void *p : pointer to src bytes stream
// size_t len : num Utf16 characters : it can include or not include NULL character
inline size_t Utf16LE__Get_Num_WCHARs(const void *p, size_t len)
{
#if WCHAR_MAX > 0xffff
size_t num_wchars = 0;
for (size_t i = 0; i < len; i++)
{
wchar_t c = GetUi16(p);
p = (const void *)((const Byte *)p + 2);
if (c >= 0xd800 && c < 0xdc00 && i + 1 != len)
{
wchar_t c2 = GetUi16(p);
if (c2 >= 0xdc00 && c2 < 0xe000)
{
c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff);
p = (const void *)((const Byte *)p + 2);
i++;
}
}
num_wchars++;
}
return num_wchars;
#else
UNUSED_VAR(p)
return len;
#endif
}
// #include <stdio.h>
inline wchar_t *Utf16LE__To_WCHARs_Sep(const void *p, size_t len, wchar_t *dest)
{
for (size_t i = 0; i < len; i++)
{
wchar_t c = GetUi16(p);
p = (const void *)((const Byte *)p + 2);
#if WCHAR_PATH_SEPARATOR != L'/'
if (c == L'/')
c = WCHAR_PATH_SEPARATOR;
#endif
#if WCHAR_MAX > 0xffff
if (c >= 0xd800 && c < 0xdc00 && i + 1 != len)
{
wchar_t c2 = GetUi16(p);
if (c2 >= 0xdc00 && c2 < 0xe000)
{
// printf("\nSurragate : %4x %4x -> ", (int)c, (int)c2);
c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff);
p = (const void *)((const Byte *)p + 2);
i++;
// printf("%4x\n", (int)c);
}
}
#endif
*dest++ = c;
}
return dest;
}
inline size_t Get_Num_Utf16_chars_from_wchar_string(const wchar_t *p)
{
size_t num = 0;
for (;;)
{
wchar_t c = *p++;
if (c == 0)
return num;
num += ((c >= 0x10000 && c < 0x110000) ? 2 : 1);
}
return num;
}
inline Byte *wchars_to_Utf16LE(const wchar_t *p, Byte *dest)
{
for (;;)
{
wchar_t c = *p++;
if (c == 0)
return dest;
if (c >= 0x10000 && c < 0x110000)
{
SetUi16(dest , (UInt16)(0xd800 + ((c >> 10) & 0x3FF)));
SetUi16(dest + 2, (UInt16)(0xdc00 + ( c & 0x3FF)));
dest += 4;
}
else
{
SetUi16(dest, c);
dest += 2;
}
}
}
#endif
*/
#endif

View File

@@ -4,9 +4,17 @@
#include "Wildcard.h"
extern
bool g_CaseSensitive;
bool g_CaseSensitive =
#ifdef _WIN32
false;
#elif defined (__APPLE__)
#ifdef TARGET_OS_IPHONE
true;
#else
false;
#endif
#else
true;
#endif
@@ -19,8 +27,16 @@ bool IsPath1PrefixedByPath2(const wchar_t *s1, const wchar_t *s2)
return IsString1PrefixedByString2_NoCase(s1, s2);
}
// #include <stdio.h>
int CompareFileNames(const wchar_t *s1, const wchar_t *s2) STRING_UNICODE_THROW
{
/*
printf("\nCompareFileNames");
printf("\n S1: %ls", s1);
printf("\n S2: %ls", s2);
printf("\n");
*/
if (g_CaseSensitive)
return MyStringCompare(s1, s2);
return MyStringCompareNoCase(s1, s2);
@@ -131,7 +147,7 @@ UString ExtractDirPrefixFromPath(const UString &path)
UString ExtractFileNameFromPath(const UString &path)
{
return UString(path.Ptr(path.ReverseFind_PathSepar() + 1));
return UString(path.Ptr((unsigned)(path.ReverseFind_PathSepar() + 1)));
}
@@ -229,12 +245,12 @@ bool CItem::CheckPath(const UStringVector &pathParts, bool isFile) const
{
if (WildcardMatching)
{
if (!DoesWildcardMatchName(PathParts[i], pathParts[i + d]))
if (!DoesWildcardMatchName(PathParts[i], pathParts[i + (unsigned)d]))
break;
}
else
{
if (CompareFileNames(PathParts[i], pathParts[i + d]) != 0)
if (CompareFileNames(PathParts[i], pathParts[i + (unsigned)d]) != 0)
break;
}
}
@@ -258,16 +274,14 @@ int CCensorNode::FindSubNode(const UString &name) const
{
FOR_VECTOR (i, SubNodes)
if (CompareFileNames(SubNodes[i].Name, name) == 0)
return i;
return (int)i;
return -1;
}
void CCensorNode::AddItemSimple(bool include, CItem &item)
{
if (include)
IncludeItems.Add(item);
else
ExcludeItems.Add(item);
CObjectVector<CItem> &items = include ? IncludeItems : ExcludeItems;
items.Add(item);
}
void CCensorNode::AddItem(bool include, CItem &item, int ignoreWildcardIndex)
@@ -282,6 +296,7 @@ void CCensorNode::AddItem(bool include, CItem &item, int ignoreWildcardIndex)
AddItemSimple(include, item);
return;
}
const UString &front = item.PathParts.Front();
// WIN32 doesn't support wildcards in file names
@@ -292,11 +307,9 @@ void CCensorNode::AddItem(bool include, CItem &item, int ignoreWildcardIndex)
AddItemSimple(include, item);
return;
}
int index = FindSubNode(front);
if (index < 0)
index = SubNodes.Add(CCensorNode(front, this));
CCensorNode &subNode = Find_SubNode_Or_Add_New(front);
item.PathParts.Delete(0);
SubNodes[index].AddItem(include, item, ignoreWildcardIndex - 1);
subNode.AddItem(include, item, ignoreWildcardIndex - 1);
}
void CCensorNode::AddItem(bool include, const UString &path, bool recursive, bool forFile, bool forDir, bool wildcardMatching)
@@ -347,18 +360,19 @@ bool CCensorNode::CheckPathVect(const UStringVector &pathParts, bool isFile, boo
include = false;
return true;
}
include = true;
bool finded = CheckPathCurrent(true, pathParts, isFile);
if (pathParts.Size() <= 1)
return finded;
int index = FindSubNode(pathParts.Front());
if (index >= 0)
if (pathParts.Size() > 1)
{
UStringVector pathParts2 = pathParts;
pathParts2.Delete(0);
if (SubNodes[index].CheckPathVect(pathParts2, isFile, include))
return true;
int index = FindSubNode(pathParts.Front());
if (index >= 0)
{
UStringVector pathParts2 = pathParts;
pathParts2.Delete(0);
if (SubNodes[(unsigned)index].CheckPathVect(pathParts2, isFile, include))
return true;
}
}
bool finded = CheckPathCurrent(true, pathParts, isFile);
include = finded; // if (!finded), then (true) is allowed also
return finded;
}
@@ -394,14 +408,26 @@ bool CCensorNode::CheckPath(bool isAltStream, const UString &path, bool isFile)
}
*/
bool CCensorNode::CheckPathToRoot(bool include, UStringVector &pathParts, bool isFile) const
bool CCensorNode::CheckPathToRoot_Change(bool include, UStringVector &pathParts, bool isFile) const
{
if (CheckPathCurrent(include, pathParts, isFile))
return true;
if (Parent == 0)
if (!Parent)
return false;
pathParts.Insert(0, Name);
return Parent->CheckPathToRoot(include, pathParts, isFile);
return Parent->CheckPathToRoot_Change(include, pathParts, isFile);
}
bool CCensorNode::CheckPathToRoot(bool include, const UStringVector &pathParts, bool isFile) const
{
if (CheckPathCurrent(include, pathParts, isFile))
return true;
if (!Parent)
return false;
UStringVector pathParts2;
pathParts2.Add(Name);
pathParts2 += pathParts;
return Parent->CheckPathToRoot_Change(include, pathParts2, isFile);
}
/*
@@ -434,18 +460,15 @@ void CCensorNode::ExtendExclude(const CCensorNode &fromNodes)
FOR_VECTOR (i, fromNodes.SubNodes)
{
const CCensorNode &node = fromNodes.SubNodes[i];
int subNodeIndex = FindSubNode(node.Name);
if (subNodeIndex < 0)
subNodeIndex = SubNodes.Add(CCensorNode(node.Name, this));
SubNodes[subNodeIndex].ExtendExclude(node);
Find_SubNode_Or_Add_New(node.Name).ExtendExclude(node);
}
}
int CCensor::FindPrefix(const UString &prefix) const
int CCensor::FindPairForPrefix(const UString &prefix) const
{
FOR_VECTOR (i, Pairs)
if (CompareFileNames(Pairs[i].Prefix, prefix) == 0)
return i;
return (int)i;
return -1;
}
@@ -454,7 +477,11 @@ int CCensor::FindPrefix(const UString &prefix) const
bool IsDriveColonName(const wchar_t *s)
{
wchar_t c = s[0];
return c != 0 && s[1] == ':' && s[2] == 0 && (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z');
return c != 0
&& s[1] == ':'
&& s[2] == 0
&& ((c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z'));
}
unsigned GetNumPrefixParts_if_DrivePath(UStringVector &pathParts)
@@ -571,14 +598,16 @@ void CCensor::AddItem(ECensorPathMode pathMode, bool include, const UString &pat
{
const UString &part = pathParts[i];
if (part == L".." || part == L".")
dotsIndex = i;
dotsIndex = (int)i;
}
if (dotsIndex >= 0)
{
if (dotsIndex == (int)pathParts.Size() - 1)
numSkipParts = pathParts.Size();
else
numSkipParts = pathParts.Size() - 1;
}
}
for (unsigned i = 0; i < numSkipParts; i++)
@@ -596,13 +625,16 @@ void CCensor::AddItem(ECensorPathMode pathMode, bool include, const UString &pat
}
}
int index = FindPrefix(prefix);
int index = FindPairForPrefix(prefix);
if (index < 0)
index = Pairs.Add(CPair(prefix));
{
index = (int)Pairs.Size();
Pairs.AddNew().Prefix = prefix;
}
if (pathMode != k_AbsPath)
{
if (pathParts.IsEmpty() || pathParts.Size() == 1 && pathParts[0].IsEmpty())
if (pathParts.IsEmpty() || (pathParts.Size() == 1 && pathParts[0].IsEmpty()))
{
// we create universal item, if we skip all parts as prefix (like \ or L:\ )
pathParts.Clear();
@@ -619,7 +651,7 @@ void CCensor::AddItem(ECensorPathMode pathMode, bool include, const UString &pat
item.ForFile = forFile;
item.Recursive = recursive;
item.WildcardMatching = wildcardMatching;
Pairs[index].Head.AddItem(include, item, ignoreWildcardIndex);
Pairs[(unsigned)index].Head.AddItem(include, item, ignoreWildcardIndex);
}
/*

View File

@@ -51,23 +51,34 @@ struct CItem
bool CheckPath(const UStringVector &pathParts, bool isFile) const;
};
class CCensorNode
class CCensorNode MY_UNCOPYABLE
{
CCensorNode *Parent;
bool CheckPathCurrent(bool include, const UStringVector &pathParts, bool isFile) const;
void AddItemSimple(bool include, CItem &item);
public:
bool CheckPathVect(const UStringVector &pathParts, bool isFile, bool &include) const;
CCensorNode(): Parent(0) { };
CCensorNode(const UString &name, CCensorNode *parent): Name(name), Parent(parent) { };
CCensorNode(): Parent(NULL) { };
CCensorNode(const UString &name, CCensorNode *parent): Parent(parent), Name(name) { };
UString Name; // WIN32 doesn't support wildcards in file names
CObjectVector<CCensorNode> SubNodes;
CObjectVector<CItem> IncludeItems;
CObjectVector<CItem> ExcludeItems;
CCensorNode &Find_SubNode_Or_Add_New(const UString &name)
{
int i = FindSubNode(name);
if (i >= 0)
return SubNodes[(unsigned)i];
// return SubNodes.Add(CCensorNode(name, this));
CCensorNode &node = SubNodes.AddNew();
node.Parent = this;
node.Name = name;
return node;
}
bool AreAllAllowed() const;
int FindSubNode(const UString &path) const;
@@ -76,25 +87,41 @@ public:
void AddItem(bool include, const UString &path, bool recursive, bool forFile, bool forDir, bool wildcardMatching);
void AddItem2(bool include, const UString &path, bool recursive, bool wildcardMatching);
// NeedCheckSubDirs() returns true, if there are IncludeItems rules that affect items in subdirs
bool NeedCheckSubDirs() const;
bool AreThereIncludeItems() const;
/*
CheckPathVect() doesn't check path in Parent CCensorNode
so use CheckPathVect() for root CCensorNode
OUT:
returns (true) && (include = false) - file in exlude list
returns (true) && (include = true) - file in include list and is not in exlude list
returns (false) - file is not in (include/exlude) list
*/
bool CheckPathVect(const UStringVector &pathParts, bool isFile, bool &include) const;
// bool CheckPath2(bool isAltStream, const UString &path, bool isFile, bool &include) const;
// bool CheckPath(bool isAltStream, const UString &path, bool isFile) const;
bool CheckPathToRoot(bool include, UStringVector &pathParts, bool isFile) const;
// CheckPathToRoot_Change() changes pathParts !!!
bool CheckPathToRoot_Change(bool include, UStringVector &pathParts, bool isFile) const;
bool CheckPathToRoot(bool include, const UStringVector &pathParts, bool isFile) const;
// bool CheckPathToRoot(const UString &path, bool isFile, bool include) const;
void ExtendExclude(const CCensorNode &fromNodes);
};
struct CPair
struct CPair MY_UNCOPYABLE
{
UString Prefix;
CCensorNode Head;
CPair(const UString &prefix): Prefix(prefix) { };
// CPair(const UString &prefix): Prefix(prefix) { };
};
enum ECensorPathMode
{
k_RelatPath, // absolute prefix as Prefix, remain path in Tree
@@ -102,6 +129,7 @@ enum ECensorPathMode
k_AbsPath // full path in Tree
};
struct CCensorPath
{
UString Path;
@@ -116,9 +144,10 @@ struct CCensorPath
{}
};
class CCensor
class CCensor MY_UNCOPYABLE
{
int FindPrefix(const UString &prefix) const;
int FindPairForPrefix(const UString &prefix) const;
public:
CObjectVector<CPair> Pairs;
@@ -143,7 +172,6 @@ public:
}
};
}
#endif