Better PDF extraction
This commit is contained in:
parent
840a3030f7
commit
2aa39e86d7
@ -1347,7 +1347,7 @@ Generate a title following these rules:
|
|||||||
if len(reader.pages) == 0: return None
|
if len(reader.pages) == 0: return None
|
||||||
text = ""
|
text = ""
|
||||||
for i, page in enumerate(reader.pages):
|
for i, page in enumerate(reader.pages):
|
||||||
text += f"\n- Page {i}\n{page.extract_text()}\n"
|
text += f"\n- Page {i}\n{page.extract_text(extraction_mode='layout', layout_mode_space_vertically=False)}\n"
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def remove_attached_file(self, name):
|
def remove_attached_file(self, name):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user