-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinstall.py
More file actions
135 lines (105 loc) · 3.85 KB
/
Copy pathinstall.py
File metadata and controls
135 lines (105 loc) · 3.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/env python3
"""
Installation script for PDF Parser dependencies.
This script helps install the required packages with proper error handling.
"""
import subprocess
import sys
import os
from pathlib import Path
def run_command(command, description):
"""Run a command and handle errors."""
print(f"\n{description}...")
try:
result = subprocess.run(command, shell=True, check=True,
capture_output=True, text=True)
print(f"✓ {description} completed successfully")
return True
except subprocess.CalledProcessError as e:
print(f"✗ {description} failed:")
print(f"Error: {e}")
if e.stdout:
print(f"Output: {e.stdout}")
if e.stderr:
print(f"Error output: {e.stderr}")
return False
def check_python_version():
"""Check if Python version is compatible."""
version = sys.version_info
if version.major < 3 or (version.major == 3 and version.minor < 8):
print(f"✗ Python {version.major}.{version.minor} is not supported.")
print("Please use Python 3.8 or higher.")
return False
print(f"✓ Python {version.major}.{version.minor}.{version.micro} is compatible")
return True
def check_java():
"""Check if Java is available (required for tabula-py)."""
try:
result = subprocess.run(["java", "-version"], capture_output=True, text=True)
if result.returncode == 0:
print("✓ Java is available")
return True
except FileNotFoundError:
pass
print("⚠ Java not found. This may cause issues with table extraction.")
print("Consider installing Java for full functionality.")
return False
def install_packages():
"""Install required packages."""
requirements_file = Path("requirements.txt")
if not requirements_file.exists():
print("✗ requirements.txt not found")
return False
# Upgrade pip first
if not run_command(f"{sys.executable} -m pip install --upgrade pip",
"Upgrading pip"):
print("⚠ Pip upgrade failed, continuing anyway...")
# Install packages
return run_command(f"{sys.executable} -m pip install -r requirements.txt",
"Installing required packages")
def test_imports():
"""Test if critical packages can be imported."""
packages = [
("fitz", "PyMuPDF"),
("pdfplumber", "pdfplumber"),
("pandas", "pandas"),
("camelot", "camelot-py")
]
print("\nTesting package imports...")
all_good = True
for package, display_name in packages:
try:
__import__(package)
print(f"✓ {display_name} imported successfully")
except ImportError as e:
print(f"✗ {display_name} import failed: {e}")
all_good = False
return all_good
def main():
"""Main installation function."""
print("PDF Parser Installation Script")
print("=" * 50)
# Check Python version
if not check_python_version():
return 1
# Check Java (optional but recommended)
check_java()
# Install packages
print(f"\nInstalling packages from requirements.txt...")
if not install_packages():
print("\n✗ Package installation failed!")
return 1
# Test imports
if not test_imports():
print("\n⚠ Some packages failed to import. Check the error messages above.")
print("You may need to install additional system dependencies.")
return 1
print("\n" + "=" * 50)
print("✓ Installation completed successfully!")
print("\nYou can now run the PDF parser:")
print(" python pdf_parser.py your_file.pdf")
print("\nOr run the test:")
print(" python test_parser.py")
return 0
if __name__ == "__main__":
sys.exit(main())