gachiemchiep · jinmingteo · Jan 3, 2018 · Jan 3, 2018 · Jan 17, 2018 · Sep 4, 2019
diff --git a/.idea/SynthText.iml b/.idea/SynthText.iml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,20 @@
+FROM python:3.7-buster
+
+# For japanese
+RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git libmecab2 libmecab-dev mecab mecab-ipadic mecab-ipadic-utf8 mecab-utils vim \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# jupyter notebook libs
+RUN pip install jupyterlab
+RUN pip install traitlets==5.1.1
+RUN pip install "ipykernel<5.5.2"
+
+WORKDIR /workspace
+COPY ./ /workspace/
+RUN python -m pip install -r /workspace/requirements.txt
+
+ENV TZ=Asia/Singapore
+RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+
+WORKDIR /workspace
diff --git a/README.md b/README.md
@@ -28,7 +28,9 @@ Add support for chinese
 ![Japanese example 4](results/sample4.png "Synthetic Japanese Text Samples 4")
 
 
-The library is written in Python. The main dependencies are:
+The code in the `master` branch is for Python2. Python3 is supported in the `python3` branch.
+
+The main dependencies are:
 
 ```
 pygame, opencv (version 3.3), PIL (Image), numpy, matplotlib, h5py, scipy
@@ -119,20 +121,29 @@ For an explanation of the fields in `dset.h5` (e.g.: `seg`,`area`,`label`), plea
 
 ### Pre-processed Background Images
 The 8,000 background images used in the paper, along with their segmentation and depth masks, have been uploaded here:
-`http://zeus.robots.ox.ac.uk/textspot/static/db/<filename>`, where, `<filename>` can be:
+`http://www.robots.ox.ac.uk/~vgg/data/scenetext/preproc/<filename>`, where, `<filename>` can be:
+
+|    filenames    | size |                      description                     |             md5 hash             |
+|:--------------- | ----:|:---------------------------------------------------- |:-------------------------------- |
+| `imnames.cp`    | 180K | names of images which do not contain background text |                                  |
+| `bg_img.tar.gz` | 8.9G | images (filter these using `imnames.cp`)             | 3eac26af5f731792c9d95838a23b5047 |
+| `depth.h5`      |  15G | depth maps                                           | af97f6e6c9651af4efb7b1ff12a5dc1b |
+| `seg.h5`        | 6.9G | segmentation maps                                    | 1605f6e629b2524a3902a5ea729e86b2 |
+
+Note: due to large size, `depth.h5` is also available for download as 3-part split-files of 5G each.
+These part files are named: `depth.h5-00, depth.h5-01, depth.h5-02`. Download using the path above, and put them together using `cat depth.h5-0* > depth.h5`.
 
-- `imnames.cp` [180K]: names of filtered files, i.e., those files which do not contain text
-- `bg_img.tar.gz` [8.9G]: compressed image files (more than 8000, so only use the filtered ones in imnames.cp)
-- `depth.h5` [15G]: depth maps
-- `seg.h5` [6.9G]: segmentation maps
+[`use_preproc_bg.py`](https://github.com/ankush-me/SynthText/blob/master/use_preproc_bg.py) provides sample code for reading this data.
 
 Note: I do not own the copyright to these images.
 
 ### Generating Samples with Text in non-Latin (English) Scripts
-@JarveeLee has modified the pipeline for generating samples with Chinese text [here](https://github.com/JarveeLee/SynthText_Chinese_version).
-@gachiemchiep has modified the pipeline for generating samples with Japanese text [here](https://github.com/gachiemchiep/SynthText).
-
+- @JarveeLee has modified the pipeline for generating samples with Chinese text [here](https://github.com/JarveeLee/SynthText_Chinese_version).
+- @adavoudi has modified it for arabic/persian script, which flows from right-to-left [here](https://github.com/adavoudi/SynthText).
+- @MichalBusta has adapted it for a number of languages (e.g. Bangla, Arabic, Chinese, Japanese, Korean) [here](https://github.com/MichalBusta/E2E-MLT).
+- @gachiemchiep has adapted for Japanese [here](https://github.com/gachiemchiep/SynthText).
+- @gungui98 has adapted for Vietnamese [here](https://github.com/gungui98/SynthText).
+- @youngkyung has adapted for Korean [here](https://github.com/youngkyung/SynthText_kr).
 
 ### Further Information
 Please refer to the paper for more information, or contact me (email address in the paper).
-
diff --git a/colorize3_poisson.py b/colorize3_poisson.py
@@ -6,11 +6,12 @@
 import scipy.ndimage.interpolation as sii
 import os
 import os.path as osp
-import cPickle as cp
+#import cPickle as cp
+import _pickle as cp
 #import Image
 from PIL import Image
 from poisson_reconstruct import blit_images
-
+import pickle
 
 def sample_weighted(p_dict):
     ps = p_dict.keys()
@@ -38,14 +39,18 @@ def __init__(self,alpha,color):
         elif color.ndim==3: #rgb image
             self.color = color.copy().astype('uint8')
         else:
-            print color.shape
+            print (color.shape)
             raise Exception("color datatype not understood")
 
 class FontColor(object):
 
     def __init__(self, col_file):
-        with open(col_file,'r') as f:
-            self.colorsRGB = cp.load(f)
+        with open(col_file,'rb') as f:
+            #self.colorsRGB = cp.load(f)
+            u = pickle._Unpickler(f)
+            u.encoding = 'latin1'
+            p = u.load()
+            self.colorsRGB = p
         self.ncol = self.colorsRGB.shape[0]
 
         # convert color-means from RGB to LAB for better nearest neighbour
@@ -402,7 +407,7 @@ def check_perceptible(self, txt_mask, bg, txt_bg):
 
         diff = np.linalg.norm(bg_px-txt_px,ord=None,axis=1)
         diff = np.percentile(diff,[10,30,50,70,90])
-        print "color diff percentile :", diff
+        print ("color diff percentile :", diff)
         return diff, (bgo,txto)
 
     def color(self, bg_arr, text_arr, hs, place_order=None, pad=20):
@@ -425,7 +430,7 @@ def color(self, bg_arr, text_arr, hs, place_order=None, pad=20):
 
         # initialize the placement order:
         if place_order is None:
-            place_order = np.array(xrange(len(text_arr)))
+            place_order = np.array(range(len(text_arr)))
 
         rendered = []
         for i in place_order[::-1]:

diff --git a/common.py b/common.py
@@ -25,17 +25,17 @@ def colorprint(colorcode, text, o=sys.stdout, bold=False):
     o.write(colorize(colorcode, text, bold=bold))
 
 def warn(msg):
-    print colorize(Color.YELLOW, msg)
+    print (colorize(Color.YELLOW, msg))
 
 def error(msg):
-    print colorize(Color.RED, msg)
+    print (colorize(Color.RED, msg))
 
 # http://stackoverflow.com/questions/366682/how-to-limit-execution-time-of-a-function-call-in-python
 class TimeoutException(Exception): pass
 @contextmanager
 def time_limit(seconds):
     def signal_handler(signum, frame):
-        raise TimeoutException, colorize(Color.RED, "   *** Timed out!", highlight=True)
+        raise TimeoutException(colorize(Color.RED, "   *** Timed out!", highlight=True))
     signal.signal(signal.SIGALRM, signal_handler)
     signal.alarm(seconds)
     try:

diff --git a/gen.py b/gen.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-#-*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
 
 # Author: Ankush Gupta
 # Date: 2015
@@ -37,26 +37,26 @@
 OUT_FILE = 'results/SynthText.h5'
 OUT_DIR = 'results'
 
+
 def get_data():
     """
-    Download the image,depth and segmentation data:
-    Returns, the h5 database.
-    """
+  Download the image,depth and segmentation data:
+  Returns, the h5 database.
+  """
     if not osp.exists(DB_FNAME):
         try:
-            colorprint(Color.BLUE, '\tdownloading data (56 M) from: ' + DATA_URL, bold=True)
-            print
+            print('\tdownloading data (56 M) from: ' + DATA_URL)
             sys.stdout.flush()
             out_fname = 'data.tar.gz'
             wget.download(DATA_URL, out=out_fname)
             tar = tarfile.open(out_fname)
             tar.extractall()
             tar.close()
             os.remove(out_fname)
-            colorprint(Color.BLUE, '\n\tdata saved at:' + DB_FNAME, bold=True)
+            print('\n\tdata saved at:' + DB_FNAME)
             sys.stdout.flush()
         except:
-            print colorize(Color.RED, 'Data not found and have problems downloading.', bold=True)
+            print('Data not found and have problems downloading.')
             sys.stdout.flush()
             sys.exit(-1)
     # open the h5 file and return:
@@ -65,43 +65,41 @@ def get_data():
 
 def add_res_to_db(imgname, res, db):
     """
-    Add the synthetically generated text image instance
-    and other metadata to the dataset.
-    """
+  Add the synthetically generated text image instance
+  and other metadata to the dataset.
+  """
     ninstance = len(res)
-    for i in xrange(ninstance):
+    for i in range(ninstance):
         dname = "%s_%d" % (imgname, i)
         db['data'].create_dataset(dname, data=res[i]['img'])
         db['data'][dname].attrs['charBB'] = res[i]['charBB']
         db['data'][dname].attrs['wordBB'] = res[i]['wordBB']
-
+        db['data'][dname].attrs['txt'] = res[i]['txt']
         text_utf8 = [char.encode('utf8') for char in res[i]['txt']]
-        db['data'][dname].attrs['txt'] = text_utf8
-
+        db['data'][dname].attrs['txt_utf8'] = text_utf8
 
 def save_res_to_imgs(imgname, res):
     """
     Add the synthetically generated text image instance
     and other metadata to the dataset.
     """
     ninstance = len(res)
-    for i in xrange(ninstance):
+    for i in range(ninstance):
         filename = "{}/{}_{}.png".format(OUT_DIR, imgname, i)
         # Swap bgr to rgb so we can save into image file
         img = res[i]['img'][..., [2, 1, 0]]
         cv2.imwrite(filename, img)
 
-
 def main(viz=False):
     # open databases:
-    print colorize(Color.BLUE, 'getting data..', bold=True)
+    print('getting data..')
     db = get_data()
-    print colorize(Color.BLUE, '\t-> done', bold=True)
+    print('\t-> done')
 
     # open the output h5 file:
-    out_db = h5py.File(OUT_FILE,'w')
+    out_db = h5py.File(OUT_FILE, 'w')
     out_db.create_group('/data')
-    print colorize(Color.GREEN,'Storing the output in: '+OUT_FILE, bold=True)
+    print('Storing the output in: ' + OUT_FILE)
 
     # get the names of the image files in the dataset:
     imnames = sorted(db['image'].keys())
@@ -112,7 +110,7 @@ def main(viz=False):
     start_idx, end_idx = 0, min(NUM_IMG, N)
 
     RV3 = RendererV3(DATA_PATH, max_time=SECS_PER_IMG, lang=args.lang)
-    for i in xrange(start_idx, end_idx):
+    for i in range(start_idx, end_idx):
         imname = imnames[i]
         try:
             # get the image:
@@ -130,23 +128,23 @@ def main(viz=False):
 
             # re-size uniformly:
             sz = depth.shape[:2][::-1]
-            img = np.array(img.resize(sz, Image.ANTIALIAS))
-            seg = np.array(Image.fromarray(seg).resize(sz, Image.NEAREST))
+            img = np.array(img.resize(sz, Image.Resampling.LANCZOS))
+            seg = np.array(Image.fromarray(seg).resize(sz, Image.Resampling.NEAREST))
 
-            print colorize(Color.RED, '%d of %d' % (i, end_idx - 1), bold=True)
+            print('%d of %d' % (i, end_idx - 1))
             res = RV3.render_text(img, depth, seg, area, label,
                                   ninstance=INSTANCE_PER_IMAGE, viz=viz)
             if len(res) > 0:
                 # non-empty : successful in placing text:
-                add_res_to_db(imname,res,out_db)
+                add_res_to_db(imname, res, out_db)
             # visualize the output:
             if viz:
                 save_res_to_imgs(imname, res)
-                if 'q' in raw_input(colorize(Color.RED, 'continue? (enter to continue, q to exit): ', True)):
+                if 'q' in input('continue? (enter to continue, q to exit): '):
                     break
         except:
             traceback.print_exc()
-            print colorize(Color.GREEN, '>>>> CONTINUING....', bold=True)
+            print('>>>> CONTINUING....')
             continue
     db.close()
     out_db.close()
@@ -155,10 +153,10 @@ def main(viz=False):
 if __name__ == '__main__':
     import argparse
 
-    parser = argparse.ArgumentParser(description='Genereate Synthetic Scene-Text Images')
+    parser = argparse.ArgumentParser(description='Generate Synthetic Scene-Text Images')
     parser.add_argument('--viz', action='store_true', dest='viz', default=False,
                         help='flag for turning on visualizations')
     parser.add_argument('--lang', default='ENG',
-                        help='Select language : ENG/JPN')
+                        help='Select language : ENG or JPN')
     args = parser.parse_args()
     main(args.viz)
diff --git a/invert_font_size.py b/invert_font_size.py
@@ -18,9 +18,8 @@
 models = {}  # linear model
 
 FS = FontState()
-# plt.figure()
-# plt.hold(True)
-for i in xrange(len(FS.fonts)):
+
+for i in range(len(FS.fonts)):
     font = freetype.Font(FS.fonts[i], size=12)
     h = []
     for y in ys:

diff --git a/poisson_reconstruct.py b/poisson_reconstruct.py
@@ -99,7 +99,7 @@ def blit_images(im_top,im_back,scale_grad=1.0,mode='max'):
     im_res = np.zeros_like(im_top)
 
     # frac of gradients which come from source:
-    for ch in xrange(im_top.shape[2]):
+    for ch in range(im_top.shape[2]):
         ims = im_top[:,:,ch]
         imd = im_back[:,:,ch]
 
@@ -203,7 +203,7 @@ def contiguous_regions(mask):
 
     # plt.imshow(im_alpha_L)
     # plt.show()
-    for i in xrange(500,im_alpha_L.shape[1],5):
+    for i in range(500,im_alpha_L.shape[1],5):
         l_actual = im_actual_L[i,:]#-im_actual_L[i,:-1]
         l_alpha = im_alpha_L[i,:]#-im_alpha_L[i,:-1]
         l_poisson = im_poisson_L[i,:]#-im_poisson_L[i,:-1]
@@ -212,7 +212,6 @@ def contiguous_regions(mask):
         with sns.axes_style("darkgrid"):
             plt.subplot(2,1,2)
             plt.plot(l_alpha,label='alpha')
-            plt.hold(True)
             plt.plot(l_poisson,label='poisson')
             plt.plot(l_actual,label='actual')
             plt.legend()
@@ -227,7 +226,6 @@ def contiguous_regions(mask):
         with sns.axes_style("white"):
             plt.subplot(2,1,1)
             plt.imshow(im_alpha[:,:,::-1].astype('uint8'))
-            plt.hold(True)
             plt.plot([0,im_alpha_L.shape[0]-1],[i,i],'r')
             plt.axis('image')
             plt.show()