Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ dist
*.o
*.pyc
*.so
venv
34 changes: 20 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,23 @@ brew install pkg-config poppler python

### Windows

Currently tested only when using conda:

- Install the Microsoft Visual C++ Build Tools
- Install poppler through conda:
```
conda install -c conda-forge poppler
```


## Install

```
pip install pdftotext
```
- Install using `conda`. Make sure you hae anaconda installed and its working.

- Install the Microsoft Visual C++ Build Tools
- Install poppler through conda:
```
conda install -c conda-forge poppler
```
- Install with pip
```
pip install pdftotext
```
- Install using `pip`. If you have installed python official binary and want to
install `pdftotext`, make sure you have `Visual Studio` installed along with
c/c++ build tools. Also you will need to download `poppler` binaries and
configure your system. You can download `poppler` windows binaries from here -
[https://github.com/oschwartz10612/poppler-windows](https://github.com/oschwartz10612/poppler-windows). Download a release zip file. Extract it somewhere. Set the full path of the extracted folder (where bin, include
and lib directories are in) as `POPPLER_PREFIX` environmental variable.
And also add this to your `PATH` variable - `%POPPLER_PREFIX%\bin` . Then
install via `pip` or you can directly download the source code of this repo
and run `python setup.py install` to install it.
26 changes: 22 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,24 @@
from setuptools import Extension
from setuptools import setup

include_dirs = None
library_dirs = None


def poppler_cpp_at_least(version):
# return True if platform is windows plus include and library dirs
# are non-empty. pkg-config may not be avaiable in windows systems
if platform.system() == "Windows":
if include_dirs and library_dirs:
return True
else:
print(" On windows platform, install via conda or make sure")
print(" POPPLER_PREFIX environmental variable points to the")
print(" folder where you've put the poppler build containing")
print(" bin, include and library dirs. In the later case add")
print(" %POPPLER_PREFIX%\\bin to your environmantal PATH")
print(" variable.")
return False
try:
subprocess.check_call(
["pkg-config", "--exists", "poppler-cpp >= {}".format(version)]
Expand All @@ -16,12 +32,9 @@ def poppler_cpp_at_least(version):
except OSError:
print("WARNING: pkg-config not found--guessing at poppler version.")
print(" If the build fails, install pkg-config and try again.")
return True
return False


include_dirs = None
library_dirs = None

# On some BSDs, poppler is in /usr/local, which is not searched by default
if platform.system() in ["Darwin", "FreeBSD", "OpenBSD"]:
include_dirs = ["/usr/local/include"]
Expand All @@ -30,9 +43,14 @@ def poppler_cpp_at_least(version):
# On Windows, only building with conda is tested so far
if platform.system() == "Windows":
conda_prefix = os.getenv("CONDA_PREFIX")
poppler_prefix = os.getenv("POPPLER_PREFIX")
if conda_prefix is not None:
include_dirs = [os.path.join(conda_prefix, r"Library\include")]
library_dirs = [os.path.join(conda_prefix, r"Library\lib")]
elif poppler_prefix is not None:
include_dirs = [os.path.join(poppler_prefix, r"include")]
library_dirs = [os.path.join(poppler_prefix, r"lib")]


extra_compile_args = ["-Wall"]
extra_link_args = []
Expand Down