Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions .idea/SynthText.iml

This file was deleted.

4 changes: 0 additions & 4 deletions .idea/misc.xml

This file was deleted.

8 changes: 0 additions & 8 deletions .idea/modules.xml

This file was deleted.

6 changes: 0 additions & 6 deletions .idea/vcs.xml

This file was deleted.

20 changes: 20 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
FROM python:3.7-buster

# For japanese
RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git libmecab2 libmecab-dev mecab mecab-ipadic mecab-ipadic-utf8 mecab-utils vim \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# jupyter notebook libs
RUN pip install jupyterlab
RUN pip install traitlets==5.1.1
RUN pip install "ipykernel<5.5.2"

WORKDIR /workspace
COPY ./ /workspace/
RUN python -m pip install -r /workspace/requirements.txt

ENV TZ=Asia/Singapore
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

WORKDIR /workspace
31 changes: 21 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ Add support for chinese
![Japanese example 4](results/sample4.png "Synthetic Japanese Text Samples 4")


The library is written in Python. The main dependencies are:
The code in the `master` branch is for Python2. Python3 is supported in the `python3` branch.

The main dependencies are:

```
pygame, opencv (version 3.3), PIL (Image), numpy, matplotlib, h5py, scipy
Expand Down Expand Up @@ -119,20 +121,29 @@ For an explanation of the fields in `dset.h5` (e.g.: `seg`,`area`,`label`), plea

### Pre-processed Background Images
The 8,000 background images used in the paper, along with their segmentation and depth masks, have been uploaded here:
`http://zeus.robots.ox.ac.uk/textspot/static/db/<filename>`, where, `<filename>` can be:
`http://www.robots.ox.ac.uk/~vgg/data/scenetext/preproc/<filename>`, where, `<filename>` can be:

| filenames | size | description | md5 hash |
|:--------------- | ----:|:---------------------------------------------------- |:-------------------------------- |
| `imnames.cp` | 180K | names of images which do not contain background text | |
| `bg_img.tar.gz` | 8.9G | images (filter these using `imnames.cp`) | 3eac26af5f731792c9d95838a23b5047 |
| `depth.h5` | 15G | depth maps | af97f6e6c9651af4efb7b1ff12a5dc1b |
| `seg.h5` | 6.9G | segmentation maps | 1605f6e629b2524a3902a5ea729e86b2 |

Note: due to large size, `depth.h5` is also available for download as 3-part split-files of 5G each.
These part files are named: `depth.h5-00, depth.h5-01, depth.h5-02`. Download using the path above, and put them together using `cat depth.h5-0* > depth.h5`.

- `imnames.cp` [180K]: names of filtered files, i.e., those files which do not contain text
- `bg_img.tar.gz` [8.9G]: compressed image files (more than 8000, so only use the filtered ones in imnames.cp)
- `depth.h5` [15G]: depth maps
- `seg.h5` [6.9G]: segmentation maps
[`use_preproc_bg.py`](https://github.com/ankush-me/SynthText/blob/master/use_preproc_bg.py) provides sample code for reading this data.

Note: I do not own the copyright to these images.

### Generating Samples with Text in non-Latin (English) Scripts
@JarveeLee has modified the pipeline for generating samples with Chinese text [here](https://github.com/JarveeLee/SynthText_Chinese_version).
@gachiemchiep has modified the pipeline for generating samples with Japanese text [here](https://github.com/gachiemchiep/SynthText).

- @JarveeLee has modified the pipeline for generating samples with Chinese text [here](https://github.com/JarveeLee/SynthText_Chinese_version).
- @adavoudi has modified it for arabic/persian script, which flows from right-to-left [here](https://github.com/adavoudi/SynthText).
- @MichalBusta has adapted it for a number of languages (e.g. Bangla, Arabic, Chinese, Japanese, Korean) [here](https://github.com/MichalBusta/E2E-MLT).
- @gachiemchiep has adapted for Japanese [here](https://github.com/gachiemchiep/SynthText).
- @gungui98 has adapted for Vietnamese [here](https://github.com/gungui98/SynthText).
- @youngkyung has adapted for Korean [here](https://github.com/youngkyung/SynthText_kr).

### Further Information
Please refer to the paper for more information, or contact me (email address in the paper).

19 changes: 12 additions & 7 deletions colorize3_poisson.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@
import scipy.ndimage.interpolation as sii
import os
import os.path as osp
import cPickle as cp
#import cPickle as cp
import _pickle as cp
#import Image
from PIL import Image
from poisson_reconstruct import blit_images

import pickle

def sample_weighted(p_dict):
ps = p_dict.keys()
Expand Down Expand Up @@ -38,14 +39,18 @@ def __init__(self,alpha,color):
elif color.ndim==3: #rgb image
self.color = color.copy().astype('uint8')
else:
print color.shape
print (color.shape)
raise Exception("color datatype not understood")

class FontColor(object):

def __init__(self, col_file):
with open(col_file,'r') as f:
self.colorsRGB = cp.load(f)
with open(col_file,'rb') as f:
#self.colorsRGB = cp.load(f)
u = pickle._Unpickler(f)
u.encoding = 'latin1'
p = u.load()
self.colorsRGB = p
self.ncol = self.colorsRGB.shape[0]

# convert color-means from RGB to LAB for better nearest neighbour
Expand Down Expand Up @@ -402,7 +407,7 @@ def check_perceptible(self, txt_mask, bg, txt_bg):

diff = np.linalg.norm(bg_px-txt_px,ord=None,axis=1)
diff = np.percentile(diff,[10,30,50,70,90])
print "color diff percentile :", diff
print ("color diff percentile :", diff)
return diff, (bgo,txto)

def color(self, bg_arr, text_arr, hs, place_order=None, pad=20):
Expand All @@ -425,7 +430,7 @@ def color(self, bg_arr, text_arr, hs, place_order=None, pad=20):

# initialize the placement order:
if place_order is None:
place_order = np.array(xrange(len(text_arr)))
place_order = np.array(range(len(text_arr)))

rendered = []
for i in place_order[::-1]:
Expand Down
6 changes: 3 additions & 3 deletions common.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,17 @@ def colorprint(colorcode, text, o=sys.stdout, bold=False):
o.write(colorize(colorcode, text, bold=bold))

def warn(msg):
print colorize(Color.YELLOW, msg)
print (colorize(Color.YELLOW, msg))

def error(msg):
print colorize(Color.RED, msg)
print (colorize(Color.RED, msg))

# http://stackoverflow.com/questions/366682/how-to-limit-execution-time-of-a-function-call-in-python
class TimeoutException(Exception): pass
@contextmanager
def time_limit(seconds):
def signal_handler(signum, frame):
raise TimeoutException, colorize(Color.RED, " *** Timed out!", highlight=True)
raise TimeoutException(colorize(Color.RED, " *** Timed out!", highlight=True))
signal.signal(signal.SIGALRM, signal_handler)
signal.alarm(seconds)
try:
Expand Down
58 changes: 28 additions & 30 deletions gen.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
#-*- coding: utf-8 -*-
# -*- coding: utf-8 -*-

# Author: Ankush Gupta
# Date: 2015
Expand Down Expand Up @@ -37,26 +37,26 @@
OUT_FILE = 'results/SynthText.h5'
OUT_DIR = 'results'


def get_data():
"""
Download the image,depth and segmentation data:
Returns, the h5 database.
"""
Download the image,depth and segmentation data:
Returns, the h5 database.
"""
if not osp.exists(DB_FNAME):
try:
colorprint(Color.BLUE, '\tdownloading data (56 M) from: ' + DATA_URL, bold=True)
print
print('\tdownloading data (56 M) from: ' + DATA_URL)
sys.stdout.flush()
out_fname = 'data.tar.gz'
wget.download(DATA_URL, out=out_fname)
tar = tarfile.open(out_fname)
tar.extractall()
tar.close()
os.remove(out_fname)
colorprint(Color.BLUE, '\n\tdata saved at:' + DB_FNAME, bold=True)
print('\n\tdata saved at:' + DB_FNAME)
sys.stdout.flush()
except:
print colorize(Color.RED, 'Data not found and have problems downloading.', bold=True)
print('Data not found and have problems downloading.')
sys.stdout.flush()
sys.exit(-1)
# open the h5 file and return:
Expand All @@ -65,43 +65,41 @@ def get_data():

def add_res_to_db(imgname, res, db):
"""
Add the synthetically generated text image instance
and other metadata to the dataset.
"""
Add the synthetically generated text image instance
and other metadata to the dataset.
"""
ninstance = len(res)
for i in xrange(ninstance):
for i in range(ninstance):
dname = "%s_%d" % (imgname, i)
db['data'].create_dataset(dname, data=res[i]['img'])
db['data'][dname].attrs['charBB'] = res[i]['charBB']
db['data'][dname].attrs['wordBB'] = res[i]['wordBB']

db['data'][dname].attrs['txt'] = res[i]['txt']
text_utf8 = [char.encode('utf8') for char in res[i]['txt']]
db['data'][dname].attrs['txt'] = text_utf8

db['data'][dname].attrs['txt_utf8'] = text_utf8

def save_res_to_imgs(imgname, res):
"""
Add the synthetically generated text image instance
and other metadata to the dataset.
"""
ninstance = len(res)
for i in xrange(ninstance):
for i in range(ninstance):
filename = "{}/{}_{}.png".format(OUT_DIR, imgname, i)
# Swap bgr to rgb so we can save into image file
img = res[i]['img'][..., [2, 1, 0]]
cv2.imwrite(filename, img)


def main(viz=False):
# open databases:
print colorize(Color.BLUE, 'getting data..', bold=True)
print('getting data..')
db = get_data()
print colorize(Color.BLUE, '\t-> done', bold=True)
print('\t-> done')

# open the output h5 file:
out_db = h5py.File(OUT_FILE,'w')
out_db = h5py.File(OUT_FILE, 'w')
out_db.create_group('/data')
print colorize(Color.GREEN,'Storing the output in: '+OUT_FILE, bold=True)
print('Storing the output in: ' + OUT_FILE)

# get the names of the image files in the dataset:
imnames = sorted(db['image'].keys())
Expand All @@ -112,7 +110,7 @@ def main(viz=False):
start_idx, end_idx = 0, min(NUM_IMG, N)

RV3 = RendererV3(DATA_PATH, max_time=SECS_PER_IMG, lang=args.lang)
for i in xrange(start_idx, end_idx):
for i in range(start_idx, end_idx):
imname = imnames[i]
try:
# get the image:
Expand All @@ -130,23 +128,23 @@ def main(viz=False):

# re-size uniformly:
sz = depth.shape[:2][::-1]
img = np.array(img.resize(sz, Image.ANTIALIAS))
seg = np.array(Image.fromarray(seg).resize(sz, Image.NEAREST))
img = np.array(img.resize(sz, Image.Resampling.LANCZOS))
seg = np.array(Image.fromarray(seg).resize(sz, Image.Resampling.NEAREST))

print colorize(Color.RED, '%d of %d' % (i, end_idx - 1), bold=True)
print('%d of %d' % (i, end_idx - 1))
res = RV3.render_text(img, depth, seg, area, label,
ninstance=INSTANCE_PER_IMAGE, viz=viz)
if len(res) > 0:
# non-empty : successful in placing text:
add_res_to_db(imname,res,out_db)
add_res_to_db(imname, res, out_db)
# visualize the output:
if viz:
save_res_to_imgs(imname, res)
if 'q' in raw_input(colorize(Color.RED, 'continue? (enter to continue, q to exit): ', True)):
if 'q' in input('continue? (enter to continue, q to exit): '):
break
except:
traceback.print_exc()
print colorize(Color.GREEN, '>>>> CONTINUING....', bold=True)
print('>>>> CONTINUING....')
continue
db.close()
out_db.close()
Expand All @@ -155,10 +153,10 @@ def main(viz=False):
if __name__ == '__main__':
import argparse

parser = argparse.ArgumentParser(description='Genereate Synthetic Scene-Text Images')
parser = argparse.ArgumentParser(description='Generate Synthetic Scene-Text Images')
parser.add_argument('--viz', action='store_true', dest='viz', default=False,
help='flag for turning on visualizations')
parser.add_argument('--lang', default='ENG',
help='Select language : ENG/JPN')
help='Select language : ENG or JPN')
args = parser.parse_args()
main(args.viz)
5 changes: 2 additions & 3 deletions invert_font_size.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@
models = {} # linear model

FS = FontState()
# plt.figure()
# plt.hold(True)
for i in xrange(len(FS.fonts)):

for i in range(len(FS.fonts)):
font = freetype.Font(FS.fonts[i], size=12)
h = []
for y in ys:
Expand Down
6 changes: 2 additions & 4 deletions poisson_reconstruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def blit_images(im_top,im_back,scale_grad=1.0,mode='max'):
im_res = np.zeros_like(im_top)

# frac of gradients which come from source:
for ch in xrange(im_top.shape[2]):
for ch in range(im_top.shape[2]):
ims = im_top[:,:,ch]
imd = im_back[:,:,ch]

Expand Down Expand Up @@ -203,7 +203,7 @@ def contiguous_regions(mask):

# plt.imshow(im_alpha_L)
# plt.show()
for i in xrange(500,im_alpha_L.shape[1],5):
for i in range(500,im_alpha_L.shape[1],5):
l_actual = im_actual_L[i,:]#-im_actual_L[i,:-1]
l_alpha = im_alpha_L[i,:]#-im_alpha_L[i,:-1]
l_poisson = im_poisson_L[i,:]#-im_poisson_L[i,:-1]
Expand All @@ -212,7 +212,6 @@ def contiguous_regions(mask):
with sns.axes_style("darkgrid"):
plt.subplot(2,1,2)
plt.plot(l_alpha,label='alpha')
plt.hold(True)
plt.plot(l_poisson,label='poisson')
plt.plot(l_actual,label='actual')
plt.legend()
Expand All @@ -227,7 +226,6 @@ def contiguous_regions(mask):
with sns.axes_style("white"):
plt.subplot(2,1,1)
plt.imshow(im_alpha[:,:,::-1].astype('uint8'))
plt.hold(True)
plt.plot([0,im_alpha_L.shape[0]-1],[i,i],'r')
plt.axis('image')
plt.show()
Expand Down
Loading