Browse Source

added toUnicode script

Ivan Arkhipov 6 years ago
parent
commit
6ec7f61b87
1 changed files with 17 additions and 0 deletions
  1. 17 0
      assets/toUnicode.py

+ 17 - 0
assets/toUnicode.py

@@ -0,0 +1,17 @@
+import glob
+import os
+import sys
+import codecs 
+
+rootdir = "./"
+
+source_encoding = "us-ascii"
+target_encoding = "utf-8"
+
+for root, subdirs, files in os.walk(rootdir):
+	for filename in files:
+		if filename[-4:] == ".txt":
+			print(os.path.join(root, filename))
+			source = codecs.open(os.path.join(root, filename), 'r', source_encoding).read()
+			print(source)
+