1
0

filestr.cpp 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. #include "rar.hpp"
  2. bool ReadTextFile(
  3. const wchar *Name,
  4. StringList *List,
  5. bool Config,
  6. bool AbortOnError,
  7. RAR_CHARSET SrcCharset,
  8. bool Unquote,
  9. bool SkipComments,
  10. bool ExpandEnvStr)
  11. {
  12. wchar FileName[NM];
  13. *FileName=0;
  14. if (Name!=NULL)
  15. if (Config)
  16. GetConfigName(Name,FileName,ASIZE(FileName),true,false);
  17. else
  18. wcsncpyz(FileName,Name,ASIZE(FileName));
  19. File SrcFile;
  20. if (*FileName!=0)
  21. {
  22. bool OpenCode=AbortOnError ? SrcFile.WOpen(FileName):SrcFile.Open(FileName,0);
  23. if (!OpenCode)
  24. {
  25. if (AbortOnError)
  26. ErrHandler.Exit(RARX_OPEN);
  27. return false;
  28. }
  29. }
  30. else
  31. SrcFile.SetHandleType(FILE_HANDLESTD);
  32. uint DataSize=0,ReadSize;
  33. const int ReadBlock=4096;
  34. Array<byte> Data(ReadBlock);
  35. while ((ReadSize=SrcFile.Read(&Data[DataSize],ReadBlock))!=0)
  36. {
  37. DataSize+=ReadSize;
  38. Data.Add(ReadSize); // Always have ReadBlock available for next data.
  39. }
  40. // Set to really read size, so we can zero terminate it correctly.
  41. Data.Alloc(DataSize);
  42. int LittleEndian=DataSize>=2 && Data[0]==255 && Data[1]==254 ? 1:0;
  43. int BigEndian=DataSize>=2 && Data[0]==254 && Data[1]==255 ? 1:0;
  44. bool Utf8=DataSize>=3 && Data[0]==0xef && Data[1]==0xbb && Data[2]==0xbf;
  45. if (SrcCharset==RCH_DEFAULT)
  46. SrcCharset=DetectTextEncoding(&Data[0],DataSize);
  47. Array<wchar> DataW;
  48. if (SrcCharset==RCH_DEFAULT || SrcCharset==RCH_OEM || SrcCharset==RCH_ANSI)
  49. {
  50. Data.Push(0); // Zero terminate.
  51. #if defined(_WIN_ALL)
  52. if (SrcCharset==RCH_OEM)
  53. OemToCharA((char *)&Data[0],(char *)&Data[0]);
  54. #endif
  55. DataW.Alloc(Data.Size());
  56. CharToWide((char *)&Data[0],&DataW[0],DataW.Size());
  57. }
  58. if (SrcCharset==RCH_UNICODE)
  59. {
  60. size_t Start=2; // Skip byte order mark.
  61. if (!LittleEndian && !BigEndian) // No byte order mask.
  62. {
  63. Start=0;
  64. LittleEndian=1;
  65. }
  66. DataW.Alloc(Data.Size()/2+1);
  67. size_t End=Data.Size() & ~1; // We need even bytes number for UTF-16.
  68. for (size_t I=Start;I<End;I+=2)
  69. DataW[(I-Start)/2]=Data[I+BigEndian]+Data[I+LittleEndian]*256;
  70. DataW[(End-Start)/2]=0;
  71. }
  72. if (SrcCharset==RCH_UTF8)
  73. {
  74. Data.Push(0); // Zero terminate data.
  75. DataW.Alloc(Data.Size());
  76. UtfToWide((const char *)(Data+(Utf8 ? 3:0)),&DataW[0],DataW.Size());
  77. }
  78. wchar *CurStr=&DataW[0];
  79. while (*CurStr!=0)
  80. {
  81. wchar *NextStr=CurStr,*CmtPtr=NULL;
  82. while (*NextStr!='\r' && *NextStr!='\n' && *NextStr!=0)
  83. {
  84. if (SkipComments && NextStr[0]=='/' && NextStr[1]=='/')
  85. {
  86. *NextStr=0;
  87. CmtPtr=NextStr;
  88. }
  89. NextStr++;
  90. }
  91. bool Done=*NextStr==0;
  92. *NextStr=0;
  93. for (wchar *SpacePtr=(CmtPtr!=NULL ? CmtPtr:NextStr)-1;SpacePtr>=CurStr;SpacePtr--)
  94. {
  95. if (*SpacePtr!=' ' && *SpacePtr!='\t')
  96. break;
  97. *SpacePtr=0;
  98. }
  99. if (Unquote && *CurStr=='\"')
  100. {
  101. size_t Length=wcslen(CurStr);
  102. if (CurStr[Length-1]=='\"')
  103. {
  104. CurStr[Length-1]=0;
  105. CurStr++;
  106. }
  107. }
  108. bool Expanded=false;
  109. #if defined(_WIN_ALL)
  110. if (ExpandEnvStr && *CurStr=='%') // Expand environment variables in Windows.
  111. {
  112. wchar ExpName[NM];
  113. *ExpName=0;
  114. DWORD Result=ExpandEnvironmentStrings(CurStr,ExpName,ASIZE(ExpName));
  115. Expanded=Result!=0 && Result<ASIZE(ExpName);
  116. if (Expanded && *ExpName!=0)
  117. List->AddString(ExpName);
  118. }
  119. #endif
  120. if (!Expanded && *CurStr!=0)
  121. List->AddString(CurStr);
  122. if (Done)
  123. break;
  124. CurStr=NextStr+1;
  125. while (*CurStr=='\r' || *CurStr=='\n')
  126. CurStr++;
  127. }
  128. return true;
  129. }
  130. RAR_CHARSET DetectTextEncoding(const byte *Data,size_t DataSize)
  131. {
  132. if (DataSize>3 && Data[0]==0xef && Data[1]==0xbb && Data[2]==0xbf &&
  133. IsTextUtf8(Data+3,DataSize-3))
  134. return RCH_UTF8;
  135. bool LittleEndian=DataSize>2 && Data[0]==255 && Data[1]==254;
  136. bool BigEndian=DataSize>2 && Data[0]==254 && Data[1]==255;
  137. if (LittleEndian || BigEndian)
  138. for (size_t I=LittleEndian ? 3 : 2;I<DataSize;I+=2)
  139. if (Data[I]<32 && Data[I]!='\r' && Data[I]!='\n')
  140. return RCH_UNICODE; // High byte in UTF-16 char is found.
  141. return RCH_DEFAULT;
  142. }