textsubfile.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. #include "subfiles/textsubfile.h"
  2. #include "EasyLogging++/easylogging++.h"
  3. #include <codecvt>
  4. std::u16string to_utf16(long long x) {
  5. std::u16string res;
  6. while (x > 0) {
  7. res += char16_t(u'0' + x % 10);
  8. x /= 10ll;
  9. }
  10. std::reverse(res.begin(), res.end());
  11. return res;
  12. }
  13. long long from_utf16(const std::u16string &num) {
  14. long long res = 0;
  15. for (auto c : num) {
  16. res = res * 10ll + (c - u'0');
  17. }
  18. return res;
  19. }
  20. std::string argumentsFromUtf16(const std::u16string &args) {
  21. std::string res;
  22. size_t pointer = 0;
  23. while (pointer < args.length()) {
  24. size_t pointer1 = args.find(u'-', pointer);
  25. if (pointer1 == std::u16string::npos)
  26. pointer1 = args.length();
  27. if (!res.empty())
  28. res += "-";
  29. res += std::to_string(from_utf16(args.substr(pointer, pointer1 - pointer)));
  30. pointer = pointer1 + 1;
  31. }
  32. return res;
  33. }
  34. namespace LOTRO_DAT {
  35. namespace Subfiles {
  36. SubfileData Subfile<TEXT>::BuildForExport(const BinaryData &file_data) {
  37. SubfileData result;
  38. long long offset = 9; // first 4 bytes - file_id, then 4 bytes - unknown, then 1 byte - unknown
  39. auto text_fragment_num = unsigned(file_data.ToNumber<1>(offset));
  40. if ((text_fragment_num & 0x80u) != 0) {
  41. text_fragment_num = (((text_fragment_num ^ 0x80u) << 8u) | unsigned(file_data.ToNumber<1>(offset + 1)));
  42. offset += 1;
  43. }
  44. offset += 1;
  45. for (long long i = 0; i < text_fragment_num; i++) {
  46. long long fragment_id = file_data.ToNumber<8>(offset);
  47. offset += 8;
  48. // Making pieces
  49. auto pieces = MakePieces(file_data, offset);
  50. std::u16string text = u"[";
  51. for (size_t j = 0; j + 1 < pieces.size(); j++)
  52. text += pieces[j] + u"<--DO_NOT_TOUCH!-->";
  53. text += pieces[pieces.size() - 1] + u"]";
  54. // Making argument references
  55. auto arg_refs = MakeArgumentReferences(file_data, offset);
  56. std::u16string arguments;
  57. for (size_t j = 0; j + 1 < arg_refs.size(); j++)
  58. arguments += to_utf16(arg_refs[j]) + u"-";
  59. if (!arg_refs.empty())
  60. arguments += to_utf16(arg_refs[arg_refs.size() - 1]);
  61. // Through argument strings are not used, we need to call this function to correctly move offset
  62. MakeArgumentStrings(file_data, offset);
  63. if (result.text_data.length() > 0)
  64. result.text_data += u"|||";
  65. result.text_data += to_utf16(fragment_id) + u":::";
  66. result.text_data += arguments + u":::";
  67. result.text_data += text;
  68. }
  69. result.options["ext"] = ".txt";
  70. return result;
  71. }
  72. BinaryData Subfile<TEXT>::BuildForImport(const BinaryData &old_data, const SubfileData &data) {
  73. BinaryData new_file_data;
  74. long long offset = 9; // first 8 bytes - file_info. After them:
  75. // first 4 bytes - file_id, then 4 bytes - unknown, then 1 byte - unknown
  76. auto text_fragment_num = unsigned(old_data.ToNumber<1>(offset));
  77. if ((text_fragment_num & 0x80u) != 0) {
  78. text_fragment_num = (((text_fragment_num ^ 0x80u) << 8u) | unsigned(old_data.ToNumber<1>(offset + 1)));
  79. offset += 1;
  80. }
  81. offset += 1;
  82. new_file_data = new_file_data + old_data.CutData(0, offset); // Adding file info
  83. auto patch_fragments = ParsePatchFragments(data);
  84. for (long long i = 0; i < text_fragment_num; i++) {
  85. long long fragment_id = old_data.ToNumber<8>(offset);
  86. offset += 8;
  87. new_file_data = new_file_data + BinaryData::FromNumber<8>(fragment_id);
  88. TextFragment id_comp;
  89. id_comp.fragment_id = fragment_id;
  90. auto fragment_iterator = std::lower_bound(patch_fragments.begin(), patch_fragments.end(), id_comp);
  91. if (fragment_iterator == patch_fragments.end() || fragment_iterator->fragment_id != id_comp.fragment_id) {
  92. // Retrieving old pieces
  93. new_file_data = new_file_data + GetPieceData(old_data, offset);
  94. // Retrieving old references
  95. new_file_data = new_file_data + GetArgumentReferenceData(old_data, offset);
  96. // Retrieving old ref_strings
  97. new_file_data = new_file_data + GetArgumentStringsData(old_data, offset);
  98. } else {
  99. // Making and adding new pieces
  100. new_file_data = new_file_data + BuildPieces(old_data, *fragment_iterator, offset);
  101. // Making and adding new references
  102. new_file_data = new_file_data + BuildArgumentReferences(old_data, *fragment_iterator, offset);
  103. // Making and adding new strings
  104. new_file_data = new_file_data + BuildArgumentStrings(old_data, *fragment_iterator, offset);
  105. }
  106. }
  107. new_file_data = new_file_data + old_data.CutData(offset); // Adding elapsed file data
  108. return new_file_data;
  109. }
  110. std::vector<TextFragment> Subfile<TEXT>::ParsePatchFragments(const SubfileData &data) {
  111. std::vector<TextFragment> result;
  112. size_t pointer = 0;
  113. while (pointer < data.text_data.length()) {
  114. // Parsing fragment_id
  115. size_t pointer1 = data.text_data.find(u":::", pointer);
  116. long long fragment_id = from_utf16(data.text_data.substr(pointer, pointer1 - pointer));
  117. pointer = pointer1 + 3;
  118. TextFragment fragment;
  119. fragment.fragment_id = fragment_id;
  120. // Parsing arguments
  121. pointer1 = data.text_data.find(u":::", pointer);
  122. std::u16string arguments = data.text_data.substr(pointer, pointer1 - pointer);
  123. pointer = pointer1 + 3;
  124. if (arguments.length() > 0) {
  125. fragment.args = argumentsFromUtf16(arguments);
  126. }
  127. // Parsing text
  128. pointer1 = data.text_data.find(u"|||", pointer);
  129. if (pointer1 == std::u16string::npos)
  130. pointer1 = data.text_data.length();
  131. fragment.text = data.text_data.substr(pointer, pointer1 - pointer);
  132. pointer = pointer1 + 3;
  133. result.push_back(fragment);
  134. }
  135. std::sort(result.begin(), result.end());
  136. return result;
  137. }
  138. std::vector<std::u16string> Subfile<TEXT>::MakePieces(const BinaryData &data, long long &offset) {
  139. std::vector<std::u16string> result;
  140. auto num_pieces = unsigned(data.ToNumber<4>(offset));
  141. offset += 4;
  142. result.resize(num_pieces);
  143. for (long long j = 0; j < num_pieces; j++) {
  144. auto piece_size = unsigned(data.ToNumber<1>(offset));
  145. if ((piece_size & 128u) != 0) {
  146. piece_size = (((piece_size ^ 128u) << 8u) | unsigned(data.ToNumber<1>(offset + 1)));
  147. offset += 1;
  148. }
  149. offset += 1;
  150. BinaryData piece_data = data.CutData(offset, offset + piece_size * 2);
  151. std::u16string piece;
  152. for (unsigned k = 0; k < piece_size; k++) {
  153. auto c = char16_t((unsigned(piece_data[2u * k + 1u])) << 8u); // First byte
  154. c |= (short(piece_data[2u * k])); // Second byte
  155. piece += c;
  156. }
  157. result[j] = piece;
  158. offset += piece_size * 2;
  159. }
  160. return result;
  161. }
  162. std::vector<long long> Subfile<TEXT>::MakeArgumentReferences(const BinaryData &data, long long &offset) {
  163. std::vector<long long> result;
  164. auto num_references = unsigned(data.ToNumber<4>(offset));
  165. offset += 4;
  166. result.resize(num_references);
  167. for (long long j = 0; j < num_references; j++) {
  168. result[j] = data.ToNumber<4>(offset);
  169. offset += 4;
  170. }
  171. return result;
  172. }
  173. std::vector<std::vector<BinaryData>> Subfile<TEXT>::MakeArgumentStrings(const BinaryData &data, long long &offset) {
  174. std::vector<std::vector<BinaryData>> result;
  175. auto num_arg_strings = unsigned(data.ToNumber<1>(offset));
  176. offset += 1;
  177. result.resize(num_arg_strings);
  178. for (long long j = 0; j < num_arg_strings; j++) {
  179. auto num_args = unsigned(data.ToNumber<4>(offset));
  180. offset += 4;
  181. result[j].resize(num_args);
  182. for (long long k = 0; k < num_args; k++) {
  183. auto string_size = unsigned(data.ToNumber<1>(offset));
  184. if ((string_size & 0x80u) != 0) {
  185. string_size = (((string_size ^ 0x80u) << 8u) | unsigned(data.ToNumber<1>(offset + 1)));
  186. offset += 1;
  187. }
  188. offset += 1;
  189. result[j][k] = data.CutData(offset, offset + string_size * 2);
  190. offset += string_size * 2;
  191. }
  192. }
  193. return result;
  194. }
  195. BinaryData Subfile<TEXT>::BuildPieces(const BinaryData& data, const TextFragment& new_data, long long& offset) {
  196. // Moving &offset pointer in &data
  197. GetPieceData(data, offset);
  198. // Deleting '[' and ']' brackets
  199. std::u16string text_data = new_data.text.substr(1, new_data.text.size() - 2);
  200. std::vector<std::u16string> text_pieces;
  201. const std::u16string DNT = u"<--DO_NOT_TOUCH!-->";
  202. size_t prev = 0;
  203. size_t next = text_data.find(DNT, prev);
  204. while (next != std::string::npos) {
  205. std::u16string piece = (next - prev == 0) ? u"" : text_data.substr(prev, next - prev);
  206. text_pieces.push_back(piece);
  207. prev = next + DNT.length();
  208. next = text_data.find(DNT, prev);
  209. }
  210. text_pieces.push_back(text_data.substr(prev));
  211. // Building BinaryData from pieces
  212. BinaryData result;
  213. result = result + BinaryData::FromNumber<4>(text_pieces.size());
  214. for (const std::u16string &piece : text_pieces) {
  215. unsigned piece_size = piece.length();
  216. if (piece_size < 128) {
  217. result = result + BinaryData::FromNumber<1>(piece_size);
  218. } else {
  219. result = result + BinaryData::FromNumberRAW<2>((piece_size | 32768u));
  220. }
  221. for (long long j = 0; j < piece_size; j++) {
  222. result = result + BinaryData::FromNumber<2>(short(piece[j]));
  223. }
  224. }
  225. return result;
  226. }
  227. BinaryData Subfile<TEXT>::BuildArgumentReferences(const BinaryData& data, const TextFragment& new_data,
  228. long long &offset) {
  229. // Moving &offset pointer in &data
  230. GetArgumentReferenceData(data, offset);
  231. // If there are no args - making 4 null-bytes and return;
  232. if (new_data.args.empty()) {
  233. BinaryData result = BinaryData::FromNumber<4>(0);
  234. return result;
  235. }
  236. // Parsing arguments from list in options["args"]
  237. std::string args_list = new_data.args;
  238. std::vector<long long> argument_references;
  239. size_t prev = 0;
  240. size_t next = args_list.find('-', prev);
  241. while (next != std::string::npos) {
  242. std::string argument = args_list.substr(prev, next - prev);
  243. argument_references.push_back(std::stoll(argument));
  244. prev = next + 1;
  245. next = args_list.find('-', prev);
  246. }
  247. std::string argument = args_list.substr(prev);
  248. argument_references.push_back(std::stoll(argument));
  249. BinaryData result;
  250. result = result + BinaryData::FromNumber<4>(argument_references.size());
  251. for (const long long &arg_reference : argument_references) {
  252. result = result + BinaryData::FromNumber<4>(arg_reference);
  253. }
  254. return result;
  255. }
  256. BinaryData Subfile<TEXT>::BuildArgumentStrings(const BinaryData& data, const TextFragment&, long long& offset) {
  257. // TODO: IMPLEMENT (never user)
  258. GetArgumentStringsData(data, offset);
  259. return BinaryData::FromNumber<1>(0);
  260. }
  261. // Get BinaryData contents of pieces/arguments/argument strings
  262. BinaryData Subfile<TEXT>::GetPieceData(const BinaryData& data, long long& offset) {
  263. long long old_offset = offset;
  264. long long num_pieces = data.ToNumber<4>(offset);
  265. offset += 4;
  266. for (long long j = 0; j < num_pieces; j++) {
  267. auto piece_size = unsigned(data.ToNumber<1>(offset));
  268. if ((piece_size & 128u) != 0) {
  269. piece_size = (((piece_size ^ 128u) << 8u) | unsigned(data.ToNumber<1>(offset + 1)));
  270. offset += 1;
  271. }
  272. offset += 1;
  273. offset += piece_size * 2;
  274. }
  275. return data.CutData(old_offset, offset);
  276. }
  277. BinaryData Subfile<TEXT>::GetArgumentReferenceData(const BinaryData& data, long long& offset) {
  278. long long old_offset = offset;
  279. long long num_references = data.ToNumber<4>(offset);
  280. offset += 4;
  281. offset += 4 * num_references;
  282. return data.CutData(old_offset, offset);
  283. }
  284. BinaryData Subfile<TEXT>::GetArgumentStringsData(const BinaryData& data, long long& offset) {
  285. long long old_offset = offset;
  286. long long num_arg_strings = data.ToNumber<1>(offset);
  287. offset += 1;
  288. for (long long j = 0; j < num_arg_strings; j++) {
  289. long long num_args = data.ToNumber<4>(offset);
  290. offset += 4;
  291. for (long long k = 0; k < num_args; k++) {
  292. auto string_size = unsigned(data.ToNumber<1>(offset));
  293. if ((string_size & 0x80u) != 0) {
  294. string_size = (((string_size ^ 0x80u) << 8u) | unsigned(data.ToNumber<1>(offset + 1)));
  295. offset += 1;
  296. }
  297. offset += 1;
  298. offset += string_size * 2;
  299. }
  300. }
  301. return data.CutData(old_offset, offset);
  302. }
  303. }; // namespace Subfiles
  304. }; // namespace LOTRO_DAT