TextSubFile.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. //
  2. // Created by Иван_Архипов on 24.11.2017.
  3. //
  4. #include "Subfiles/TextSubFile.h"
  5. #include "BinaryData.h"
  6. #include "DatFile.h"
  7. #include "SubfileData.h"
  8. #include "EasyLogging++/easylogging++.h"
  9. #include <codecvt>
  10. #include <locale>
  11. std::u16string to_utf16(long long x) {
  12. std::u16string res;
  13. while (x > 0) {
  14. res += char16_t(u'0' + x % 10);
  15. x /= 10ll;
  16. }
  17. std::reverse(res.begin(), res.end());
  18. return res;
  19. }
  20. long long from_utf16(const std::u16string &num) {
  21. long long res = 0;
  22. for (auto c : num) {
  23. res = res * 10ll + (c - u'0');
  24. }
  25. return res;
  26. }
  27. std::string argumentsFromUtf16(const std::u16string &args) {
  28. std::string res;
  29. size_t pointer = 0;
  30. while (pointer < args.length()) {
  31. size_t pointer1 = args.find(u'-', pointer);
  32. if (pointer1 == std::u16string::npos)
  33. pointer1 = args.length();
  34. if (!res.empty())
  35. res += "-";
  36. res += std::to_string(from_utf16(args.substr(pointer, pointer1 - pointer)));
  37. pointer = pointer1 + 1;
  38. }
  39. return res;
  40. }
  41. namespace LOTRO_DAT {
  42. BinaryData TextSubFile::buffer_ = BinaryData(10 * 1024 * 1024);
  43. TextSubFile::TextSubFile(DatFile &dat, long long dictionary_offset, long long unknown1,
  44. long long file_id, long long file_offset, long long file_size,
  45. long long timestamp, long long version, long long block_size, long long unknown2)
  46. : SubFile(dat, dictionary_offset, unknown1, file_id, file_offset, file_size,
  47. timestamp, version, block_size, unknown2) {
  48. }
  49. FILE_TYPE TextSubFile::FileType() const {
  50. return TEXT;
  51. }
  52. std::string TextSubFile::Extension() const {
  53. return std::string(".txt");
  54. }
  55. SubfileData TextSubFile::PrepareForExport(const BinaryData &file_data) {
  56. if (file_data.Empty()) {
  57. patch_fragments_.clear();
  58. text_pieces_.clear();
  59. argument_references_.clear();
  60. argument_strings_.clear();
  61. return SubfileData();
  62. }
  63. SubfileData result;
  64. long long offset = 9; // first 4 bytes - file_id, then 4 bytes - unknown, then 1 byte - unknown
  65. long long text_fragment_num = file_data.ToNumber<1>(offset);
  66. if ((text_fragment_num & 0x80) != 0) {
  67. text_fragment_num = (((text_fragment_num ^ 0x80) << 8) | file_data.ToNumber<1>(offset + 1));
  68. offset += 1;
  69. }
  70. offset += 1;
  71. for (long long i = 0; i < text_fragment_num; i++) {
  72. long long fragment_id = file_data.ToNumber<8>(offset);
  73. offset += 8;
  74. MakePieces(file_data, offset);
  75. MakeArgumentReferences(file_data, offset);
  76. MakeArgumentStrings(file_data, offset);
  77. std::u16string text = u"[";
  78. for (size_t j = 0; j + 1 < text_pieces_.size(); j++)
  79. text += text_pieces_[j] + u"<--DO_NOT_TOUCH!-->";
  80. text += text_pieces_[text_pieces_.size() - 1] + u"]";
  81. std::u16string arguments;
  82. for (size_t j = 0; j + 1 < argument_references_.size(); j++)
  83. arguments += to_utf16(argument_references_[j]) + u"-";
  84. if (!argument_references_.empty())
  85. arguments += to_utf16(argument_references_[argument_references_.size() - 1]);
  86. if (result.text_data.length() > 0)
  87. result.text_data += u"|||";
  88. result.text_data += to_utf16(fragment_id) + u":::";
  89. result.text_data += arguments + u":::";
  90. result.text_data += text;
  91. }
  92. result.options["fid"] = file_id();
  93. result.options["ext"] = Extension();
  94. patch_fragments_.clear();
  95. text_pieces_.clear();
  96. argument_references_.clear();
  97. argument_strings_.clear();
  98. return result;
  99. }
  100. BinaryData TextSubFile::MakeForImport(const BinaryData &old_data, const SubfileData &data) {
  101. LOG(DEBUG) << "Preparing text file " << file_id() << " for import.";
  102. ParsePatchFragments(data);
  103. if (file_size() <= 10 + 8) {// File is empty, nothing to do;
  104. patch_fragments_.clear();
  105. text_pieces_.clear();
  106. argument_references_.clear();
  107. argument_strings_.clear();
  108. return old_data;
  109. }
  110. BinaryData new_data;
  111. long long offset = 9 + 8; // first 8 bytes - file_info. After them:
  112. // first 4 bytes - file_id, then 4 bytes - unknown, then 1 byte - unknown
  113. long long text_fragment_num = old_data.ToNumber<1>(offset);
  114. if ((text_fragment_num & 0x80) != 0) {
  115. text_fragment_num = (((text_fragment_num ^ 0x80) << 8) | old_data.ToNumber<1>(offset + 1));
  116. offset += 1;
  117. }
  118. offset += 1;
  119. new_data = new_data + old_data.CutData(0, offset); // Adding file info
  120. for (long long i = 0; i < text_fragment_num; i++) {
  121. long long fragment_id = old_data.ToNumber<8>(offset);
  122. offset += 8;
  123. new_data = new_data + old_data.CutData(offset - 8, offset);
  124. TextFragment id_comp;
  125. id_comp.fragment_id = fragment_id;
  126. auto fragment_iterator = std::lower_bound(patch_fragments_.begin(), patch_fragments_.end(), id_comp);
  127. if (fragment_iterator == patch_fragments_.end()) {
  128. // Retrieving old pieces
  129. new_data = new_data + GetPieceData(old_data, offset);
  130. // Retrieving old references
  131. new_data = new_data + GetArgumentReferenceData(old_data, offset);
  132. // Retrieving old ref_strings
  133. new_data = new_data + GetArgumentStringsData(old_data, offset);
  134. } else {
  135. // Making and adding new pieces
  136. new_data = new_data + BuildPieces(old_data, *fragment_iterator, offset);
  137. // Making and adding new references
  138. new_data = new_data + BuildArgumentReferences(old_data, *fragment_iterator, offset);
  139. // Making and adding new strings
  140. new_data = new_data + BuildArgumentStrings(old_data, *fragment_iterator, offset);
  141. }
  142. }
  143. new_data = new_data + old_data.CutData(offset); // Adding elapsed file data
  144. patch_fragments_.clear();
  145. text_pieces_.clear();
  146. argument_references_.clear();
  147. argument_strings_.clear();
  148. return new_data;
  149. }
  150. void TextSubFile::ParsePatchFragments(const SubfileData &data) {
  151. LOG(DEBUG) << "Started parsing patch fragments";
  152. size_t pointer = 0;
  153. while (pointer < data.text_data.length()) {
  154. // Parsing fragment_id
  155. size_t pointer1 = data.text_data.find(u":::", pointer);
  156. if (pointer1 == std::u16string::npos) {
  157. LOG(ERROR) << "Unable to parse fragment id! Cannot find '...' divider. File_id = " << file_id_;
  158. return;
  159. }
  160. long long fragment_id = from_utf16(data.text_data.substr(pointer, pointer1 - pointer));
  161. pointer = pointer1 + 3;
  162. TextFragment fragment;
  163. fragment.fragment_id = fragment_id;
  164. // Parsing arguments
  165. pointer1 = data.text_data.find(u":::", pointer);
  166. if (pointer1 == std::u16string::npos) {
  167. LOG(ERROR) << "Unable to parse arguments! Cannot find '...' divider. File_id = " << file_id_;
  168. return;
  169. }
  170. std::u16string arguments = data.text_data.substr(pointer, pointer1 - pointer);
  171. pointer = pointer1 + 3;
  172. if (arguments.length() > 0) {
  173. fragment.args = argumentsFromUtf16(arguments);
  174. }
  175. // Parsing text
  176. pointer1 = data.text_data.find(u"|||", pointer);
  177. if (pointer1 == std::u16string::npos)
  178. pointer1 = data.text_data.length();
  179. fragment.text = data.text_data.substr(pointer, pointer1 - pointer);
  180. pointer = pointer1 + 3;
  181. patch_fragments_.push_back(fragment);
  182. }
  183. std::sort(patch_fragments_.begin(), patch_fragments_.end());
  184. LOG(DEBUG) << "Finished parsing text patch fragments";
  185. }
  186. // Make pieces/arguments/argument strings functions
  187. void TextSubFile::MakePieces(const BinaryData &data, long long &offset) {
  188. LOG(DEBUG) << "Started making pieces";
  189. long long num_pieces = data.ToNumber<4>(offset);
  190. offset += 4;
  191. text_pieces_.resize(num_pieces);
  192. for (long long j = 0; j < num_pieces; j++) {
  193. long long piece_size = data.ToNumber<1>(offset);
  194. if ((piece_size & 128) != 0) {
  195. piece_size = (((piece_size ^ 128) << 8) | data.ToNumber<1>(offset + 1));
  196. offset += 1;
  197. }
  198. offset += 1;
  199. BinaryData piece_data = data.CutData(offset, offset + piece_size * 2);
  200. std::u16string piece;
  201. for (long long k = 0; k < piece_size; k++) {
  202. char16_t c = char16_t(
  203. ((short(piece_data[2 * unsigned(k) + 1])) << 8) | (short(piece_data[2 * unsigned(k)])));
  204. piece += c;
  205. }
  206. text_pieces_[j] = piece;
  207. offset += piece_size * 2;
  208. }
  209. LOG(DEBUG) << "Finished making pieces";
  210. }
  211. void TextSubFile::MakeArgumentReferences(const BinaryData &data, long long &offset) {
  212. LOG(DEBUG) << "Started making argument references";
  213. long long num_references = data.ToNumber<4>(offset);
  214. offset += 4;
  215. argument_references_.resize(num_references);
  216. for (long long j = 0; j < num_references; j++) {
  217. argument_references_[j] = data.ToNumber<4>(offset);
  218. offset += 4;
  219. }
  220. LOG(DEBUG) << "Finished making argument references";
  221. }
  222. void TextSubFile::MakeArgumentStrings(const BinaryData &data, long long &offset) {
  223. LOG(DEBUG) << "Started making argument strings";
  224. long long num_arg_strings = data.ToNumber<1>(offset);
  225. offset += 1;
  226. argument_strings_.resize(num_arg_strings);
  227. for (long long j = 0; j < num_arg_strings; j++) {
  228. long long num_args = data.ToNumber<4>(offset);
  229. offset += 4;
  230. argument_strings_[j].resize(num_args);
  231. for (long long k = 0; k < num_args; k++) {
  232. long long string_size = data.ToNumber<1>(offset);
  233. if ((string_size & 0x80) != 0) {
  234. string_size = (((string_size ^ 0x80) << 8) | data.ToNumber<1>(offset + 1));
  235. offset += 1;
  236. }
  237. offset += 1;
  238. argument_strings_[j][k] = data.CutData(offset, offset + string_size * 2);
  239. offset += string_size * 2;
  240. }
  241. }
  242. LOG(DEBUG) << "Finished making argument strings";
  243. }
  244. // Build pieces/arguments/argument strings functions from fragment SubfileData
  245. BinaryData TextSubFile::BuildPieces(const BinaryData &data, const TextFragment &new_data, long long &offset) {
  246. LOG(DEBUG) << "Started building pieces";
  247. // Moving &offset pointer in &data
  248. GetPieceData(data, offset);
  249. std::u16string file_data = u" fid:" + to_utf16(file_id_)
  250. + u" gid:" + to_utf16(new_data.fragment_id);
  251. // Deleting '[' and ']' brackets
  252. std::u16string text_data = new_data.text.substr(1, new_data.text.size() - 2) + file_data;
  253. text_pieces_.clear();
  254. const std::u16string DNT = u"<--DO_NOT_TOUCH!-->";
  255. size_t prev = 0;
  256. size_t next = text_data.find(DNT, prev);
  257. while (next != std::string::npos) {
  258. std::u16string piece = text_data.substr(prev, next - prev);
  259. text_pieces_.push_back(piece);
  260. prev = next + DNT.length();
  261. next = text_data.find(DNT, prev);
  262. }
  263. text_pieces_.push_back(text_data.substr(prev));
  264. // Building BinaryData from pieces
  265. unsigned buffer_offset = 0;
  266. buffer_.Append(BinaryData::FromNumber<4>(text_pieces_.size()), buffer_offset);
  267. buffer_offset += 4;
  268. for (const std::u16string &piece : text_pieces_) {
  269. long long piece_size = piece.length();
  270. if (piece_size < 128) {
  271. buffer_.Append(BinaryData::FromNumber<1>(piece_size), buffer_offset);
  272. buffer_offset += 1;
  273. } else {
  274. buffer_.Append(BinaryData::FromNumberRAW<2>((piece_size | 32768)), buffer_offset);
  275. buffer_offset += 2;
  276. }
  277. for (long long j = 0; j < piece_size; j++) {
  278. buffer_.Append(BinaryData::FromNumber<2>(short(piece[j])), buffer_offset);
  279. buffer_offset += 2;
  280. }
  281. }
  282. LOG(DEBUG) << "Pieces built successfully";
  283. return buffer_.CutData(0, buffer_offset);
  284. }
  285. BinaryData TextSubFile::BuildArgumentReferences(const BinaryData &data, const TextFragment &new_data,
  286. long long &offset) {
  287. LOG(DEBUG) << "Started building argument refs";
  288. // Moving &offset pointer in &data
  289. GetArgumentReferenceData(data, offset);
  290. // If there are no args - making 4 null-bytes and return;
  291. if (new_data.args.empty()) {
  292. BinaryData result = BinaryData::FromNumber<4>(0);
  293. return result;
  294. }
  295. // Parsing arguments from list in options["args"]
  296. std::string args_list = new_data.args;
  297. argument_references_.clear();
  298. size_t prev = 0;
  299. size_t next = args_list.find('-', prev);
  300. while (next != std::string::npos) {
  301. std::string argument = args_list.substr(prev, next - prev);
  302. argument_references_.push_back(std::stoll(argument));
  303. prev = next + 1;
  304. next = args_list.find('-', prev);
  305. }
  306. std::string argument = args_list.substr(prev);
  307. argument_references_.push_back(std::stoll(argument));
  308. BinaryData result;
  309. BinaryData temp_data = BinaryData::FromNumber<4>(argument_references_.size());
  310. result = result + temp_data;
  311. for (const long long &arg_reference : argument_references_) {
  312. temp_data = BinaryData::FromNumber<4>(arg_reference);
  313. result = result + temp_data;
  314. }
  315. LOG(DEBUG) << "Argument refs built successfully";
  316. return result;
  317. }
  318. BinaryData TextSubFile::BuildArgumentStrings(const BinaryData &data, const TextFragment &, long long &offset) {
  319. LOG(DEBUG) << "Started building argument strings";
  320. LOG(DEBUG) << "Built arg strings successfully";
  321. return GetArgumentStringsData(data, offset);
  322. }
  323. // Get BinaryData contents of pieces/arguments/argument strings
  324. BinaryData TextSubFile::GetPieceData(const BinaryData &data, long long &offset) const {
  325. LOG(DEBUG) << "Started getting piece data";
  326. long long old_offset = offset;
  327. long long num_pieces = data.ToNumber<4>(offset);
  328. offset += 4;
  329. for (long long j = 0; j < num_pieces; j++) {
  330. long long piece_size = data.ToNumber<1>(offset);
  331. if ((piece_size & 128) != 0) {
  332. piece_size = (((piece_size ^ 128) << 8) | data.ToNumber<1>(offset + 1));
  333. offset += 1;
  334. }
  335. offset += 1;
  336. offset += piece_size * 2;
  337. }
  338. LOG(DEBUG) << "Got piece data";
  339. return data.CutData(old_offset, offset);
  340. }
  341. BinaryData TextSubFile::GetArgumentReferenceData(const BinaryData &data, long long &offset) const {
  342. LOG(DEBUG) << "Started getting arg refs data";
  343. long long old_offset = offset;
  344. long long num_references = data.ToNumber<4>(offset);
  345. offset += 4;
  346. offset += 4 * num_references;
  347. LOG(DEBUG) << "Finished getting arg refs data";
  348. return data.CutData(old_offset, offset);
  349. }
  350. BinaryData TextSubFile::GetArgumentStringsData(const BinaryData &data, long long &offset) const {
  351. LOG(DEBUG) << "Started getting arg strings data";
  352. long long old_offset = offset;
  353. long long num_arg_strings = data.ToNumber<1>(offset);
  354. offset += 1;
  355. for (long long j = 0; j < num_arg_strings; j++) {
  356. long long num_args = data.ToNumber<4>(offset);
  357. offset += 4;
  358. for (long long k = 0; k < num_args; k++) {
  359. long long string_size = data.ToNumber<1>(offset);
  360. if ((string_size & 0x80) != 0) {
  361. string_size = (((string_size ^ 0x80) << 8) | data.ToNumber<1>(offset + 1));
  362. offset += 1;
  363. }
  364. offset += 1;
  365. offset += string_size * 2;
  366. }
  367. }
  368. LOG(DEBUG) << "Finished getting arg strings data";
  369. return data.CutData(old_offset, offset);
  370. }
  371. };