mirror of
https://github.com/Xevion/easy7zip.git
synced 2025-12-06 17:15:00 -06:00
Update to 7-Zip Version 21.02
This commit is contained in:
@@ -5,9 +5,18 @@
|
||||
#include "StringConvert.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
// #include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_WIN32) || defined(ENV_HAVE_LOCALE)
|
||||
#include "UTFConvert.h"
|
||||
#endif
|
||||
|
||||
#ifdef ENV_HAVE_LOCALE
|
||||
#include <locale.h>
|
||||
#endif
|
||||
|
||||
static const char k_DefultChar = '_';
|
||||
|
||||
#ifdef _WIN32
|
||||
@@ -71,7 +80,7 @@ void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
|
||||
d[i] = 0;
|
||||
dest.ReleaseBuf_SetLen(i);
|
||||
*/
|
||||
unsigned len = MultiByteToWideChar(codePage, 0, src, src.Len(), NULL, 0);
|
||||
unsigned len = (unsigned)MultiByteToWideChar(codePage, 0, src, (int)src.Len(), NULL, 0);
|
||||
if (len == 0)
|
||||
{
|
||||
if (GetLastError() != 0)
|
||||
@@ -79,7 +88,7 @@ void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
|
||||
}
|
||||
else
|
||||
{
|
||||
len = MultiByteToWideChar(codePage, 0, src, src.Len(), dest.GetBuf(len), len);
|
||||
len = (unsigned)MultiByteToWideChar(codePage, 0, src, (int)src.Len(), dest.GetBuf(len), (int)len);
|
||||
if (len == 0)
|
||||
throw 282228;
|
||||
dest.ReleaseBuf_SetEnd(len);
|
||||
@@ -175,7 +184,7 @@ static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT co
|
||||
}
|
||||
*/
|
||||
|
||||
unsigned len = WideCharToMultiByte(codePage, 0, src, src.Len(), NULL, 0, NULL, NULL);
|
||||
unsigned len = (unsigned)WideCharToMultiByte(codePage, 0, src, (int)src.Len(), NULL, 0, NULL, NULL);
|
||||
if (len == 0)
|
||||
{
|
||||
if (GetLastError() != 0)
|
||||
@@ -186,8 +195,8 @@ static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT co
|
||||
BOOL defUsed = FALSE;
|
||||
bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7);
|
||||
// defaultChar = defaultChar;
|
||||
len = WideCharToMultiByte(codePage, 0, src, src.Len(),
|
||||
dest.GetBuf(len), len,
|
||||
len = (unsigned)WideCharToMultiByte(codePage, 0, src, (int)src.Len(),
|
||||
dest.GetBuf(len), (int)len,
|
||||
(isUtf ? NULL : &defaultChar),
|
||||
(isUtf ? NULL : &defUsed)
|
||||
);
|
||||
@@ -213,23 +222,137 @@ AString SystemStringToOemString(const CSysString &src)
|
||||
#endif
|
||||
*/
|
||||
|
||||
#else
|
||||
#else // _WIN32
|
||||
|
||||
void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT /* codePage */)
|
||||
// #include <stdio.h>
|
||||
/*
|
||||
if (wchar_t is 32-bit (#if WCHAR_MAX > 0xffff),
|
||||
and utf-8 string contains big unicode character > 0xffff),
|
||||
then we still use 16-bit surrogate pair in UString.
|
||||
It simplifies another code where utf-16 encoding is used.
|
||||
So we use surrogate-conversion code only in is file.
|
||||
*/
|
||||
|
||||
/*
|
||||
mbstowcs() returns error if there is error in utf-8 stream,
|
||||
mbstowcs() returns error if there is single surrogates point (d800-dfff) in utf-8 stream
|
||||
*/
|
||||
|
||||
/*
|
||||
static void MultiByteToUnicodeString2_Native(UString &dest, const AString &src)
|
||||
{
|
||||
dest.Empty();
|
||||
if (src.IsEmpty())
|
||||
return;
|
||||
|
||||
size_t limit = ((size_t)src.Len() + 1) * 2;
|
||||
const size_t limit = ((size_t)src.Len() + 1) * 2;
|
||||
wchar_t *d = dest.GetBuf((unsigned)limit);
|
||||
size_t len = mbstowcs(d, src, limit);
|
||||
const size_t len = mbstowcs(d, src, limit);
|
||||
if (len != (size_t)-1)
|
||||
{
|
||||
dest.ReleaseBuf_SetEnd((unsigned)len);
|
||||
return;
|
||||
}
|
||||
dest.ReleaseBuf_SetEnd(0);
|
||||
}
|
||||
*/
|
||||
|
||||
bool g_ForceToUTF8 = true; // false;
|
||||
|
||||
void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
|
||||
{
|
||||
dest.Empty();
|
||||
if (src.IsEmpty())
|
||||
return;
|
||||
|
||||
if (codePage == CP_UTF8 || g_ForceToUTF8)
|
||||
{
|
||||
ConvertUTF8ToUnicode(src, dest);
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t limit = ((size_t)src.Len() + 1) * 2;
|
||||
wchar_t *d = dest.GetBuf((unsigned)limit);
|
||||
const size_t len = mbstowcs(d, src, limit);
|
||||
if (len != (size_t)-1)
|
||||
{
|
||||
dest.ReleaseBuf_SetEnd((unsigned)len);
|
||||
|
||||
#if WCHAR_MAX > 0xffff
|
||||
d = dest.GetBuf();
|
||||
for (size_t i = 0;; i++)
|
||||
{
|
||||
// wchar_t c = dest[i];
|
||||
wchar_t c = d[i];
|
||||
if (c == 0)
|
||||
break;
|
||||
if (c >= 0x10000 && c < 0x110000)
|
||||
{
|
||||
/*
|
||||
c -= 0x10000;
|
||||
unsigned c0 = 0xd800 + ((c >> 10) & 0x3FF);
|
||||
dest.ReplaceOneCharAtPos(i, c0);
|
||||
i++;
|
||||
c = 0xdc00 + (c & 0x3FF);
|
||||
dest.Insert_wchar_t(i, c);
|
||||
*/
|
||||
UString temp = d + i;
|
||||
|
||||
for (size_t t = 0;; t++)
|
||||
{
|
||||
wchar_t w = temp[t];
|
||||
if (w == 0)
|
||||
break;
|
||||
if (i == limit)
|
||||
break; // unexpected error
|
||||
if (w >= 0x10000 && w < 0x110000)
|
||||
{
|
||||
if (i + 1 == limit)
|
||||
break; // unexpected error
|
||||
w -= 0x10000;
|
||||
d[i++] = (unsigned)0xd800 + (((unsigned)w >> 10) & 0x3FF);
|
||||
w = 0xdc00 + (w & 0x3FF);
|
||||
}
|
||||
d[i++] = w;
|
||||
}
|
||||
dest.ReleaseBuf_SetEnd((unsigned)i);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
printf("\nMultiByteToUnicodeString2 (%d) %s\n", (int)src.Len(), src.Ptr());
|
||||
printf("char: ");
|
||||
for (unsigned i = 0; i < src.Len(); i++)
|
||||
printf (" %02x", (int)(Byte)src[i]);
|
||||
printf("\n");
|
||||
printf("\n-> (%d) %ls\n", (int)dest.Len(), dest.Ptr());
|
||||
printf("wchar_t: ");
|
||||
for (unsigned i = 0; i < dest.Len(); i++)
|
||||
{
|
||||
printf (" %02x", (int)dest[i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* if there is mbstowcs() error, we have two ways:
|
||||
|
||||
1) change 0x80+ characters to some character: '_'
|
||||
in that case we lose data, but we have correct UString()
|
||||
and that scheme can show errors to user in early stages,
|
||||
when file converted back to mbs() cannot be found
|
||||
|
||||
2) transfer bad characters in some UTF-16 range.
|
||||
it can be non-original Unicode character.
|
||||
but later we still can restore original character.
|
||||
*/
|
||||
|
||||
|
||||
// printf("\nmbstowcs ERROR !!!!!! s=%s\n", src.Ptr());
|
||||
{
|
||||
unsigned i;
|
||||
const char *s = (const char *)src;
|
||||
@@ -238,6 +361,8 @@ void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT /* codePa
|
||||
Byte c = (Byte)s[i];
|
||||
if (c == 0)
|
||||
break;
|
||||
// we can use ascii compatibilty character '_'
|
||||
// if (c > 0x7F) c = '_'; // we replace "bad: character
|
||||
d[i++] = (wchar_t)c;
|
||||
}
|
||||
d[i] = 0;
|
||||
@@ -245,43 +370,131 @@ void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT /* codePa
|
||||
}
|
||||
}
|
||||
|
||||
static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT /* codePage */, char defaultChar, bool &defaultCharWasUsed)
|
||||
static void UnicodeStringToMultiByte2_Native(AString &dest, const UString &src)
|
||||
{
|
||||
dest.Empty();
|
||||
defaultCharWasUsed = false;
|
||||
if (src.IsEmpty())
|
||||
return;
|
||||
|
||||
size_t limit = ((size_t)src.Len() + 1) * 6;
|
||||
const size_t limit = ((size_t)src.Len() + 1) * 6;
|
||||
char *d = dest.GetBuf((unsigned)limit);
|
||||
size_t len = wcstombs(d, src, limit);
|
||||
|
||||
const size_t len = wcstombs(d, src, limit);
|
||||
|
||||
if (len != (size_t)-1)
|
||||
{
|
||||
dest.ReleaseBuf_SetEnd((unsigned)len);
|
||||
return;
|
||||
}
|
||||
dest.ReleaseBuf_SetEnd(0);
|
||||
}
|
||||
|
||||
|
||||
static void UnicodeStringToMultiByte2(AString &dest, const UString &src2, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
|
||||
{
|
||||
// if (codePage == 1234567) // for debug purposes
|
||||
if (codePage == CP_UTF8 || g_ForceToUTF8)
|
||||
{
|
||||
defaultCharWasUsed = false;
|
||||
ConvertUnicodeToUTF8(src2, dest);
|
||||
return;
|
||||
}
|
||||
|
||||
UString src = src2;
|
||||
#if WCHAR_MAX > 0xffff
|
||||
{
|
||||
src.Empty();
|
||||
for (unsigned i = 0; i < src2.Len();)
|
||||
{
|
||||
wchar_t c = src2[i];
|
||||
if (c >= 0xd800 && c < 0xdc00 && i + 1 != src2.Len())
|
||||
{
|
||||
const wchar_t c2 = src2[i + 1];
|
||||
if (c2 >= 0xdc00 && c2 < 0x10000)
|
||||
{
|
||||
// printf("\nSurragate [%d]: %4x %4x -> ", i, (int)c, (int)c2);
|
||||
c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff);
|
||||
// printf("%4x\n", (int)c);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
src += c;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
dest.Empty();
|
||||
defaultCharWasUsed = false;
|
||||
if (src.IsEmpty())
|
||||
return;
|
||||
|
||||
const size_t len = wcstombs(NULL, src, 0);
|
||||
|
||||
if (len != (size_t)-1)
|
||||
{
|
||||
const unsigned limit = ((unsigned)len);
|
||||
if (limit == len)
|
||||
{
|
||||
char *d = dest.GetBuf(limit);
|
||||
|
||||
/*
|
||||
{
|
||||
printf("\nwcstombs; len = %d %ls \n", (int)src.Len(), src.Ptr());
|
||||
for (unsigned i = 0; i < src.Len(); i++)
|
||||
printf (" %02x", (int)src[i]);
|
||||
printf("\n");
|
||||
printf("\ndest Limit = %d \n", limit);
|
||||
}
|
||||
*/
|
||||
|
||||
const size_t len2 = wcstombs(d, src, len + 1);
|
||||
|
||||
if (len2 != (size_t)-1 && len2 <= limit)
|
||||
{
|
||||
/*
|
||||
printf("\nOK : destLen = %d : %s\n", (int)len, dest.Ptr());
|
||||
for (unsigned i = 0; i < len2; i++)
|
||||
printf(" %02x", (int)(Byte)dest[i]);
|
||||
printf("\n");
|
||||
*/
|
||||
dest.ReleaseBuf_SetEnd((unsigned)len2);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
const wchar_t *s = (const wchar_t *)src;
|
||||
char *d = dest.GetBuf(src.Len());
|
||||
|
||||
unsigned i;
|
||||
for (i = 0;;)
|
||||
{
|
||||
wchar_t c = s[i];
|
||||
if (c == 0)
|
||||
break;
|
||||
if (c >= 0x100)
|
||||
if (c >=
|
||||
0x100
|
||||
// 0x80
|
||||
)
|
||||
{
|
||||
c = defaultChar;
|
||||
defaultCharWasUsed = true;
|
||||
}
|
||||
|
||||
d[i++] = (char)c;
|
||||
}
|
||||
d[i] = 0;
|
||||
dest.ReleaseBuf_SetLen(i);
|
||||
/*
|
||||
printf("\nUnicodeStringToMultiByte2; len = %d \n", (int)src.Len());
|
||||
printf("ERROR: %s\n", dest.Ptr());
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif // _WIN32
|
||||
|
||||
|
||||
UString MultiByteToUnicodeString(const AString &src, UINT codePage)
|
||||
@@ -317,3 +530,228 @@ AString UnicodeStringToMultiByte(const UString &src, UINT codePage)
|
||||
UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed);
|
||||
return dest;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
#define U_to_A(a, b, c) UnicodeStringToMultiByte2
|
||||
// #define A_to_U(a, b, c) MultiByteToUnicodeString2
|
||||
#else
|
||||
// void MultiByteToUnicodeString2_Native(UString &dest, const AString &src);
|
||||
#define U_to_A(a, b, c) UnicodeStringToMultiByte2_Native(a, b)
|
||||
// #define A_to_U(a, b, c) MultiByteToUnicodeString2_Native(a, b)
|
||||
#endif
|
||||
|
||||
#if !defined(_WIN32) || defined(ENV_HAVE_LOCALE)
|
||||
|
||||
bool IsNativeUTF8()
|
||||
{
|
||||
UString u;
|
||||
AString a, a2;
|
||||
// for (unsigned c = 0x80; c < (UInt32)0x10000; c += (c >> 9) + 1)
|
||||
for (unsigned c = 0x80; c < (UInt32)0xD000; c += (c >> 2) + 1)
|
||||
{
|
||||
u.Empty();
|
||||
u += (wchar_t)c;
|
||||
/*
|
||||
if (Unicode_Is_There_Utf16SurrogateError(u))
|
||||
continue;
|
||||
#ifndef _WIN32
|
||||
if (Unicode_Is_There_BmpEscape(u))
|
||||
continue;
|
||||
#endif
|
||||
*/
|
||||
ConvertUnicodeToUTF8(u, a);
|
||||
U_to_A(a2, u, CP_OEMCP);
|
||||
if (a != a2)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef ENV_HAVE_LOCALE
|
||||
|
||||
const char *GetLocale(void)
|
||||
{
|
||||
#ifdef ENV_HAVE_LOCALE
|
||||
// printf("\n\nsetlocale(LC_CTYPE, NULL) : return : ");
|
||||
const char *s = setlocale(LC_CTYPE, NULL);
|
||||
if (!s)
|
||||
{
|
||||
// printf("[NULL]\n");
|
||||
s = "C";
|
||||
}
|
||||
else
|
||||
{
|
||||
// ubuntu returns "C" after program start
|
||||
// printf("\"%s\"\n", s);
|
||||
}
|
||||
return s;
|
||||
#elif defined(LOCALE_IS_UTF8)
|
||||
return "utf8";
|
||||
#else
|
||||
return "C";
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
static void Set_ForceToUTF8(bool) {}
|
||||
#else
|
||||
static void Set_ForceToUTF8(bool val) { g_ForceToUTF8 = val; }
|
||||
#endif
|
||||
|
||||
static bool Is_Default_Basic_Locale(const char *locale)
|
||||
{
|
||||
const AString a (locale);
|
||||
if (a.IsEqualTo_Ascii_NoCase("")
|
||||
|| a.IsEqualTo_Ascii_NoCase("C")
|
||||
|| a.IsEqualTo_Ascii_NoCase("POSIX"))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool Is_Default_Basic_Locale()
|
||||
{
|
||||
return Is_Default_Basic_Locale(GetLocale());
|
||||
}
|
||||
|
||||
|
||||
void MY_SetLocale()
|
||||
{
|
||||
#ifdef ENV_HAVE_LOCALE
|
||||
/*
|
||||
{
|
||||
const char *s = GetLocale();
|
||||
printf("\nGetLocale() : returned : \"%s\"\n", s);
|
||||
}
|
||||
*/
|
||||
|
||||
unsigned start = 0;
|
||||
// unsigned lim = 0;
|
||||
unsigned lim = 3;
|
||||
|
||||
/*
|
||||
#define MY_SET_LOCALE_FLAGS__FROM_ENV 1
|
||||
#define MY_SET_LOCALE_FLAGS__TRY_UTF8 2
|
||||
|
||||
unsigned flags =
|
||||
MY_SET_LOCALE_FLAGS__FROM_ENV |
|
||||
MY_SET_LOCALE_FLAGS__TRY_UTF8
|
||||
|
||||
if (flags != 0)
|
||||
{
|
||||
if (flags & MY_SET_LOCALE_FLAGS__FROM_ENV)
|
||||
lim = (flags & MY_SET_LOCALE_FLAGS__TRY_UTF8) ? 3 : 1;
|
||||
else
|
||||
{
|
||||
start = 1;
|
||||
lim = 2;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
for (unsigned i = start; i < lim; i++)
|
||||
{
|
||||
/*
|
||||
man7: "If locale is an empty string, "", each part of the locale that
|
||||
should be modified is set according to the environment variables.
|
||||
for glibc: glibc, first from the user's environment variables:
|
||||
1) the environment variable LC_ALL,
|
||||
2) environment variable with the same name as the category (see the
|
||||
3) the environment variable LANG
|
||||
The locale "C" or "POSIX" is a portable locale; it exists on all conforming systems.
|
||||
|
||||
for WIN32 : MSDN :
|
||||
Sets the locale to the default, which is the user-default
|
||||
ANSI code page obtained from the operating system.
|
||||
The locale name is set to the value returned by GetUserDefaultLocaleName.
|
||||
The code page is set to the value returned by GetACP
|
||||
*/
|
||||
const char *newLocale = "";
|
||||
|
||||
#ifdef __APPLE__
|
||||
|
||||
/* look also CFLocale
|
||||
there is no C.UTF-8 in macos
|
||||
macos has UTF-8 locale only with some language like en_US.UTF-8
|
||||
what is best way to set UTF-8 locale in macos? */
|
||||
if (i == 1)
|
||||
newLocale = "en_US.UTF-8";
|
||||
|
||||
/* file open with non-utf8 sequencies return
|
||||
#define EILSEQ 92 // "Illegal byte sequence"
|
||||
*/
|
||||
#else
|
||||
// newLocale = "C";
|
||||
if (i == 1)
|
||||
{
|
||||
newLocale = "C.UTF-8"; // main UTF-8 locale in ubuntu
|
||||
// newLocale = ".utf8"; // supported in new Windows 10 build 17134 (April 2018 Update), the Universal C Runtime
|
||||
// newLocale = "en_US.utf8"; // supported by ubuntu ?
|
||||
// newLocale = "en_US.UTF-8";
|
||||
/* setlocale() in ubuntu allows locales with minor chracter changes in strings
|
||||
"en_US.UTF-8" / "en_US.utf8" */
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// printf("\nsetlocale(LC_ALL, \"%s\") : returned: ", newLocale);
|
||||
|
||||
// const char *s =
|
||||
setlocale(LC_ALL, newLocale);
|
||||
|
||||
/*
|
||||
if (!s)
|
||||
printf("NULL: can't set locale");
|
||||
else
|
||||
printf("\"%s\"\n", s);
|
||||
*/
|
||||
|
||||
// request curent locale of program
|
||||
const char *locale = GetLocale();
|
||||
if (locale)
|
||||
{
|
||||
AString a (locale);
|
||||
a.MakeLower_Ascii();
|
||||
// if (a.Find("utf") >= 0)
|
||||
{
|
||||
if (IsNativeUTF8())
|
||||
{
|
||||
Set_ForceToUTF8(true);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (!Is_Default_Basic_Locale(locale))
|
||||
{
|
||||
// if there is some non-default and non-utf locale, we want to use it
|
||||
break; // comment it for debug
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (IsNativeUTF8())
|
||||
{
|
||||
Set_ForceToUTF8(true);
|
||||
return;
|
||||
}
|
||||
|
||||
if (Is_Default_Basic_Locale())
|
||||
{
|
||||
Set_ForceToUTF8(true);
|
||||
return;
|
||||
}
|
||||
|
||||
Set_ForceToUTF8(false);
|
||||
|
||||
#elif defined(LOCALE_IS_UTF8)
|
||||
// assume LC_CTYPE="utf8"
|
||||
#else
|
||||
// assume LC_CTYPE="C"
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user