Skip to content
Snippets Groups Projects
Commit 45302b55 authored by Marc-Philipp Knechtle's avatar Marc-Philipp Knechtle
Browse files

add get_table_coordinates_from_cells

span_polygon
parent e2adcfe0
Branches
No related tags found
No related merge requests found
from typing import Tuple
def span_polygon(point1: Tuple, point2: Tuple) -> list:
"""
The sci tsr polygon bounding boxes do not have the necessary coordinate structure for the shared-file-format.
The coordinates are simply the lower left of the bbox and the upper right of the bbox.
But this is sufficient to construct the right square coordinates.
It's important that the coordinates are in the right order because the shared-file-format assumes that the last
coordinates are connected.
:param: point1: lower left coordinate
:param: point2: upper right coordinate
:return: a list of four coordinates. with index:
0 = lower left
1 = lower right
2 = upper right
3 = upper left
"""
# written not in a single statement for readability
# noinspection PyListCreation
polygon_points: list = []
polygon_points.append(point1)
# point2.x, point1.y = lower right
polygon_points.append((point2[0], point1[1]))
polygon_points.append(point2)
# point1.x, point2.y = upper left
polygon_points.append((point1[0], point2[1]))
return polygon_points
from typing import List
from docrecjson.elements import Cell
import polygon_creations
def get_table_coordinates_from_cells(cells: List[Cell]) -> list:
"""
Computes the cell bounding box based on the already extracted cells.
It just needs to compute the lower left coordinate, as well as the upper right coordinate.
The remaining coordinates can be computed with _span_polygon
:param cells: all cells in the tables. The cell's bounding box can be accessed via cell.bounding_box.
The single coordinates are in the order as they are returned by _span_polygon.
This is because _span_polygon was already used for the cell bounding box creation.
:return: all four rectangle coordinates of the table bounding box
"""
all_x_values = []
all_y_values = []
for cell in cells:
for point in cell.bounding_box.polygon:
all_x_values.append(point[0])
all_y_values.append(point[1])
# lower left coordinate = min x coordinate + max y coordinate
# upper right coordinate = max x coordinate + min y coordinate
max_x = max(all_x_values)
min_x = min(all_x_values)
max_y = max(all_y_values)
min_y = min(all_y_values)
return polygon_creations.span_polygon((min_x, max_y), (max_x, min_y))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment