# these packages are required by python-docx, which depends on lxml
# and requires these things
python-dev
libxml2-dev
libxslt1-dev

# parse word documents
antiword

# parse image files
tesseract-ocr

# parse pdfs
poppler-utils

# parse postscript files
pstotext
