flowchart TD A(Separate each work order into its own screenshot) --> B B(Identify the cells of the scanned image) --> C C(OCR text and get positional data) --> D D(Do math to assign text to cell) --> E E(Use location to transform to csv)
flowchart TD A(Separate each work order into its own screenshot) --> B B(Identify the cells of the scanned image) --> C C(OCR text and get positional data) --> D D(Do math to assign text to cell) --> E E(Use location to transform to csv)
import cv2
import json
import os
import glob
from table_parsing.image import convert_bounding_box
def make_screenshots(image_dir, json_dir, screenshot_output_path):
image_paths = sorted(glob.glob(f"{image_dir}/*jpg"))
json_paths = sorted(glob.glob(f"{json_dir}/*json"))
first_half = None
counter = 0
for current_image_index in range(len(image_paths)):
image_path = image_paths[current_image_index]
json_path = json_paths[current_image_index]
# print("current image: " + str(current_image_index))
# Load the image
image = cv2.imread(image_path)
# Read the JSON file
with open(json_path, 'r') as f:
data = json.load(f)
work_task_lines = []
for i, box in enumerate(data['observations']):
check_text = box["observation"]["text"]
# print(check_text)
if "for:" in check_text and ("WorkTask" in check_text or "Work Task" in check_text):
# print(i)
x1, y1, x2, y2 = convert_bounding_box(image,box["observation"]["bounds"])
work_task_line = max(y1-30, 0)
work_task_lines.append(work_task_line)
work_task_lines = sorted(work_task_lines)
for line in work_task_lines:
cv2.line(image, (0, line), (image.shape[1], line), (0, 255, 0), 2)
# print(work_task_lines)
# # Display the image
# cv2.imshow('Image with Bounding Boxes', image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# print(counter)
for i in range(len(work_task_lines)):
start_y = work_task_lines[i]
# print("star_y: " + str(start_y))
if first_half is not None:
counter = counter +1
print(str(counter) + "--" + str(len(work_task_lines)))
# Crop from start to start y
second_half = image[:start_y, :]
combined_image = cv2.vconcat([first_half, second_half])
output_path = os.path.join(screenshot_output_path, f"{counter:02}.jpg")
cv2.imwrite(output_path, combined_image)
# cv2.imwrite(os.path.join(screenshot_output_path, f"{counter:02}.5.jpg"), second_half)
first_half = None
# If there is no next line
if i+1 >= len(work_task_lines):
# Crop from start_y to the end of the image
first_half = image[start_y:, :]
else:
end_y = work_task_lines[i+1]
final_image = image[start_y:end_y, :]
if first_half is None:
counter = counter + 1
# print(str(counter) + "--" + str(len(work_task_lines)))
output_path = os.path.join(screenshot_output_path, f"{counter:02}.jpg")
cv2.imwrite(output_path, final_image)
else:
pass
# Screenshot last image
counter = counter + 1
# print(str(counter) + "--" + str(len(work_task_lines)))
output_path = os.path.join(screenshot_output_path, f"{counter:02}.jpg")
cv2.imwrite(output_path, first_half)
import cv2
import json
import numpy as np
from sklearn.cluster import DBSCAN
import glob
from tqdm import tqdm
import os
import re
from table_parsing.utils import stringify_keys
# Handle input data
# ==================================================
def process_image(image_path):
# Load the image
image = cv2.imread(image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply adaptive threshold to get binary image
binary = cv2.adaptiveThreshold(~gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 15, -10)
return image, binary
def load_data(json_path):
with open(json_path, 'r') as file:
data = json.load(file)
return(data)
# Handle bounding boxes
# ============================================
def draw_bounding_box(image, bounds, color=(0, 255, 0), thickness=2):
height, width, _ = image.shape
top_left = (int(bounds['x1'] * width), int((1 - bounds['y2']) * height))
bottom_right = (int(bounds['x2'] * width), int((1 - bounds['y1']) * height))
cv2.rectangle(image, top_left, bottom_right, color, thickness)
def convert_bounding_box(image, bounds):
height, width, _ = image.shape
# top left
x1 = int(bounds['x1'] * width)
y2 = int((1 - bounds['y2']) * height)
# Bottom right
x2 = int(bounds['x2'] * width)
y1 = int((1 - bounds['y1']) * height)
# Calculations
# width = x2 - x1
# height = y2 - y1
return x1, y1, x2, y2
def draw_all_word_bounding_boxes(image,data):
# Loop through the observations and subBounds to draw bounding boxes
for item in data['observations']:
# Draw bounding box for the entire phrase
draw_bounding_box(image, item["observation"]['bounds'], color=(255, 0, 0))
# print(item["observation"]['text'])
return(image)
# Draw bounding boxes for each word
# for subBound in item["observation"]['subBounds']:
# draw_bounding_box(image, subBound['bounds'], color=(0, 255, 0))
# # Display the image
# cv2.imshow('Image with Bounding Boxes', image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# Cluster points
# ======================================
def find_horizontal_vertical_lines(binary, horizontal_scale_factor, vertical_scale_factor):
# # Parameters to adjust
# horizontal_scale_factor = 15 # Increase for larger structuring elements
# vertical_scale_factor = 50 # Increase for larger structuring elements
# dilation_size = 6 # Increase for more forgiving touch point detection
# Detect horizontal lines
horizontal = binary.copy()
cols = horizontal.shape[1]
horizontal_size = cols // horizontal_scale_factor
horizontal_structure = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontal_size, 1))
horizontal = cv2.erode(horizontal, horizontal_structure)
horizontal = cv2.dilate(horizontal, horizontal_structure)
# Detect vertical lines
vertical = binary.copy()
rows = vertical.shape[0]
vertical_size = rows // vertical_scale_factor
vertical_structure = cv2.getStructuringElement(cv2.MORPH_RECT, (1, vertical_size))
vertical = cv2.erode(vertical, vertical_structure)
vertical = cv2.dilate(vertical, vertical_structure)
# Combine horizontal and vertical lines
# grid = cv2.add(horizontal, vertical)
return(horizontal, vertical)
# Find touches (where lines touch but don't intersect)
def find_touches(image, horizontal, vertical, dilation_size):
# get touches
# ==============
touch_points = []
# Dilate horizontal and vertical lines to ensure touching points are detected
horizontal_dilated = cv2.dilate(horizontal, np.ones((dilation_size, dilation_size), np.uint8))
vertical_dilated = cv2.dilate(vertical, np.ones((dilation_size, dilation_size), np.uint8))
# Combine dilated lines to find touch points
touch_points_img = cv2.bitwise_and(horizontal_dilated, vertical_dilated)
# Get touch points coordinates
touch_points_coords = np.argwhere(touch_points_img == 255)
for point in touch_points_coords:
touch_points.append((point[1], point[0]))
# cluster touch points
# ======================
dbscan = DBSCAN(eps=10, min_samples=1).fit(touch_points)
clustered_touch_points = []
for label in np.unique(dbscan.labels_):
cluster = np.array(touch_points)[dbscan.labels_ == label]
if cluster.size > 0:
clustered_touch_points.append(np.mean(cluster, axis=0).astype(int))
return(clustered_touch_points)
def draw_clustered_touch_points(image, clustered_touch_points):
# Draw clustered touch points for visualization
for point in clustered_touch_points:
cv2.circle(image, tuple(point), 5, (255, 0, 0), -1)
# Display the image with detected lines, cells, intersections, and touches
return(image)
# Final grid lines
# ========================================
# Identify and draw the outermost vertical and horizontal lines
def make_final_lines(image, clustered_touch_points):
if not clustered_touch_points:
return image
# touch_points_np = np.array(touch_points)
clustered_touch_points_np = np.array(clustered_touch_points)
# Identify the outermost vertical lines
left_most = np.min(clustered_touch_points_np[:, 0])
right_most = np.max(clustered_touch_points_np[:, 0])
middle = np.median(clustered_touch_points_np[:, 0])
# Identify the outermost horizontal lines
top_most = np.min(clustered_touch_points_np[:, 1])
bottom_most = np.max(clustered_touch_points_np[:, 1])
# Draw vertical lines
cv2.line(image, (left_most, 0), (left_most, image.shape[0]), (0, 255, 0), 2)
cv2.line(image, (int(middle), 0), (int(middle), image.shape[0]), (0, 255, 0), 2)
cv2.line(image, (right_most, 0), (right_most, image.shape[0]), (0, 255, 0), 2)
# Draw horizontal lines
cv2.line(image, (0, top_most), (image.shape[1], top_most), (0, 255, 0), 2)
cv2.line(image, (0, bottom_most), (image.shape[1], bottom_most), (0, 255, 0), 2)
# Remove touch points that are within 5 pixels of top_most and bottom_most y-coordinates
filtered_touch_points = [point for point in clustered_touch_points if not (top_most - 5 <= point[1] <= top_most + 5 or bottom_most - 5 <= point[1] <= bottom_most + 5)]
filtered_touch_points_np = np.array(filtered_touch_points)
# Identify unique y-coordinates of filtered touch points
unique_y_coords = np.array(sorted(set(filtered_touch_points_np[:, 1]))).reshape(-1, 1)
# Cluster the y-coordinates using DBSCAN
dbscan = DBSCAN(eps=10, min_samples=1).fit(unique_y_coords)
clustered_y_coords = []
for label in np.unique(dbscan.labels_):
cluster = unique_y_coords[dbscan.labels_ == label]
if cluster.size > 0:
clustered_y_coords.append(np.mean(cluster))
clustered_y_coords = sorted(clustered_y_coords)
for y in clustered_y_coords:
y = int(y)
cv2.line(image, (left_most, y), (right_most, y), (0, 255, 0), 2)
return image, [top_most] + clustered_y_coords + [bottom_most], [left_most, middle, right_most]
# Assigning data
# ============================================
# Function to calculate the intersection area of two rectangles
def intersection_area(box1, box2):
x1 = max(box1[0], box2[0])
y1 = max(box1[1], box2[1])
x2 = min(box1[2], box2[2])
y2 = min(box1[3], box2[3])
intersection = max(0, x2 - x1) * max(0, y2 - y1)
return intersection
def assign_data(image, data, horizontal_lines, vertical_lines):
row_result = {}
for item in data['observations']:
x1, y1, x2, y2 = convert_bounding_box(image, item["observation"]['bounds'] )
text = item["observation"]['text']
bounding_box = (x1, y1, x2, y2)
# Determine the cell containing the majority of the bounding box
max_intersection = 0
cell_with_max_intersection = (0, 0)
for i in range(len(horizontal_lines) - 1):
for j in range(len(vertical_lines) - 1):
# Define the bounding box for the current cell
cell_box = (vertical_lines[j], horizontal_lines[i], vertical_lines[j + 1], horizontal_lines[i + 1])
# print("bounding_box: " + str(bounding_box))
# print("cell box: " + str(cell_box))
# Calculate the intersection area between the bounding box and the cell
intersection = intersection_area(bounding_box, cell_box)
# print("intersection: " + str(intersection))
# Update the cell with the maximum intersection area
if intersection > max_intersection:
max_intersection = intersection
cell_with_max_intersection = (i, j)
cell_key = cell_with_max_intersection
if cell_key in row_result:
row_result[cell_key] += " " + text
else:
row_result[cell_key] = text
return(row_result)
def natural_sort_key(s):
return [int(text) if text.isdigit() else text for text in re.split(r'(\d+)', s)]
def make_result(image_dir, json_dir, jsonl_output_path, annotated_output_path):
image_paths = sorted(glob.glob(f"{image_dir}/*jpg") , key=natural_sort_key)
json_paths = sorted(glob.glob(f"{json_dir}/*json"), key=natural_sort_key)
with open(jsonl_output_path, 'w') as jsonl_file:
for i in range(len(image_paths)):
image_path = image_paths[i]
json_path = json_paths[i]
file_name = os.path.basename(image_path)
image, binary = process_image(image_path)
data = load_data(json_path)
# image = draw_all_word_bounding_boxes(image, data)
horizontal_lines, vertical_lines = find_horizontal_vertical_lines(binary, horizontal_scale_factor = 15, vertical_scale_factor = 30)
clustered_touch_points = find_touches(image, horizontal_lines, vertical_lines, dilation_size=6)
image = draw_clustered_touch_points(image, clustered_touch_points)
image_with_lines, horizontal_lines, vertical_lines = make_final_lines(image, clustered_touch_points)
result = assign_data(image, data, horizontal_lines, vertical_lines)
# Save results
# ============
cv2.imwrite(os.path.join(annotated_output_path, file_name), image_with_lines)
# Write the result to the JSONL file
jsonl_file.write(json.dumps(stringify_keys(result)) + '\n')
{
"info" : {
"program" : "textra",
"version" : "0.2.1"
},
"observations" : [
{
"observation" : {
"bounds" : {
"x2" : 0.16144200584639501,
"y2" : 0.95008912650623889,
"x1" : 0.11912225663009407,
"y1" : 0.98039215680926917
},
"text" : "36.",
"confidence" : 1,
"subBounds" : [
{
"offset" : [
0,
3
],
"bounds" : {
"y1" : 0.9803921565062389,
"x2" : 0.1614420054231975,
"x1" : 0.11912225705329153,
"y2" : 0.95008912680926916
},
"text" : "36."
}
]
}
},
{
"observation" : {
"confidence" : 0.5,
"subBounds" : [
{
"text" : "Work",
"offset" : [
0,
4
],
"bounds" : {
"x2" : 0.23667711598746083,
"y2" : 0.946524064171123,
"x1" : 0.18652037617554859,
"y1" : 0.97504456292335118
}
},
{
"text" : "Task",
"bounds" : {
"y2" : 0.946524064171123,
"y1" : 0.97504456327985745,
"x1" : 0.23981191222570533,
"x2" : 0.28056426332288403
},
"offset" : [
5,
9
]
},
{
"bounds" : {
"x2" : 0.31504702194357365,
"x1" : 0.28369905956112851,
"y1" : 0.97504456327985745,
"y2" : 0.946524064171123
},
"text" : "for:",
"offset" : [
10,
14
]
},
{
"bounds" : {
"y2" : 0.946524064171123,
"y1" : 0.97504456327985745,
"x1" : 0.31818181818181818,
"x2" : 0.34952978056426331
},
"offset" : [
15,
18
],
"text" : "(b)"
},
{
"text" : "(6)",
"offset" : [
19,
22
],
"bounds" : {
"x1" : 0.35266457680250785,
"y1" : 0.97504456327985745,
"y2" : 0.94652406438502679,
"x2" : 0.38871472949843267
}
}
],
"bounds" : {
"x1" : 0.18652037415360501,
"y1" : 0.97504456320855615,
"x2" : 0.38871473152037617,
"y2" : 0.9465240640998217
},
"text" : "Work Task for: (b) (6)"
}
},
{
"observation" : {
"bounds" : {
"x1" : 0.44200626929467085,
"y2" : 0.95008912675579327,
"y1" : 0.9696969698930481,
"x2" : 0.49686520346394986
},
"text" : "Direct",
"confidence" : 1,
"subBounds" : [
{
"text" : "Direct",
"bounds" : {
"x1" : 0.44200626984326019,
"y1" : 0.96969696969696972,
"y2" : 0.95008912695187164,
"x2" : 0.49686520291536052
},
"offset" : [
0,
6
]
}
]
}
},
{
"observation" : {
"bounds" : {
"y2" : 0.91257735462201994,
"y1" : 0.9305599004304822,
"x1" : 0.18810392744810853,
"x2" : 0.25546974122659416
},
"text" : "Control",
"confidence" : 1,
"subBounds" : [
{
"text" : "Control",
"bounds" : {
"x1" : 0.18810392586520183,
"y2" : 0.91257735442674492,
"y1" : 0.93055990062575722,
"x2" : 0.25546974280950085
},
"offset" : [
0,
7
]
}
]
}
},
{
"observation" : {
"text" : "Edit Sampling Disposition",
"confidence" : 1,
"subBounds" : [
{
"bounds" : {
"x1" : 0.29780564742946702,
"x2" : 0.34247648902821315,
"y1" : 0.93048128327985735,
"y2" : 0.90909090909090906
},
"offset" : [
0,
4
],
"text" : "Edit"
},
{
"offset" : [
5,
13
],
"text" : "Sampling",
"bounds" : {
"x2" : 0.43652037617554856,
"x1" : 0.34482758620689657,
"y2" : 0.90909090909090906,
"y1" : 0.93048128342245984
}
},
{
"offset" : [
14,
25
],
"bounds" : {
"x2" : 0.53761755485893414,
"y2" : 0.90909090937611414,
"x1" : 0.43887147335423199,
"y1" : 0.93048128342245984
},
"text" : "Disposition"
}
],
"bounds" : {
"x2" : 0.53761755725705318,
"y2" : 0.90909090916221036,
"x1" : 0.29780564503134793,
"y1" : 0.93048128349376114
}
}
},
{
"observation" : {
"subBounds" : [
{
"bounds" : {
"y1" : 0.90552584623885923,
"y2" : 0.88235294117647056,
"x1" : 0.18808777733542315,
"x2" : 0.31798588388765869
},
"offset" : [
0,
14
],
"text" : "Entered\/Edited"
},
{
"text" : "by",
"bounds" : {
"x2" : 0.34345610145491118,
"x1" : 0.32053290564438391,
"y1" : 0.90552584670231728,
"y2" : 0.88235294117647056
},
"offset" : [
15,
17
]
},
{
"offset" : [
18,
22
],
"bounds" : {
"x1" : 0.34600312321163645,
"x2" : 0.38930249307596571,
"y2" : 0.88235294117647056,
"y1" : 0.90552584670231728
},
"text" : "John"
},
{
"bounds" : {
"x2" : 0.45768024843260191,
"y2" : 0.88235294117647056,
"x1" : 0.39184951483269098,
"y1" : 0.90552584670231728
},
"text" : "Steuber",
"offset" : [
23,
30
]
}
],
"text" : "Entered\/Edited by John Steuber",
"confidence" : 1,
"bounds" : {
"y1" : 0.9055258464705882,
"x1" : 0.18808777463949844,
"x2" : 0.45768025112852667,
"y2" : 0.88235294094474159
}
}
},
{
"observation" : {
"subBounds" : [
{
"bounds" : {
"y2" : 0.90730837834224598,
"x2" : 0.61128526624869384,
"y1" : 0.93404634581105173,
"x1" : 0.56112852727272722
},
"text" : "Flag?",
"offset" : [
0,
5
]
}
],
"bounds" : {
"x2" : 0.6112852666666666,
"y2" : 0.90730837811942955,
"y1" : 0.93404634603386805,
"x1" : 0.56112852685475434
},
"confidence" : 1,
"text" : "Flag?"
}
},
{
"observation" : {
"confidence" : 1,
"bounds" : {
"x1" : 0.18815321844609226,
"y2" : 0.84093111714321089,
"x2" : 0.28833580851217094,
"y1" : 0.86138617326400346
},
"subBounds" : [
{
"offset" : [
0,
4
],
"text" : "Work",
"bounds" : {
"y1" : 0.86138617345490087,
"x1" : 0.18815321967438162,
"x2" : 0.23759269482473663,
"y2" : 0.84135996157891957
}
},
{
"bounds" : {
"x2" : 0.28833580728388153,
"y1" : 0.86094905011880474,
"x1" : 0.23987593017825068,
"y2" : 0.8409311169523136
},
"text" : "Date:",
"offset" : [
5,
10
]
}
],
"text" : "Work Date:"
}
},
{
"observation" : {
"subBounds" : [
{
"offset" : [
0,
10
],
"bounds" : {
"y2" : 0.79251246073001735,
"x1" : 0.18664027946636474,
"y1" : 0.81533067647150159,
"x2" : 0.28828134807988426
},
"text" : "Agreement:"
}
],
"text" : "Agreement:",
"confidence" : 1,
"bounds" : {
"y2" : 0.7925124601617406,
"x1" : 0.1866402807620976,
"y1" : 0.81533067703977824,
"x2" : 0.2882813467841514
}
}
},
{
"observation" : {
"subBounds" : [
{
"offset" : [
0,
10
],
"bounds" : {
"x2" : 0.51567398056426339,
"y2" : 0.84135472370766484,
"x1" : 0.42476489147335422,
"y1" : 0.86096256645276292
},
"text" : "06\/18\/2019"
}
],
"confidence" : 1,
"bounds" : {
"x1" : 0.42476489056426336,
"x2" : 0.5156739814733543,
"y1" : 0.86096256664884141,
"y2" : 0.84135472351158647
},
"text" : "06\/18\/2019"
}
},
{
"observation" : {
"confidence" : 1,
"bounds" : {
"x2" : 0.75391849447492165,
"y2" : 0.83778966112596553,
"x1" : 0.55172413710815049,
"y1" : 0.86096256665181226
},
"text" : "(Entry Date: 07\/22\/2019)",
"subBounds" : [
{
"text" : "(Entry",
"offset" : [
0,
6
],
"bounds" : {
"x2" : 0.60521159482226472,
"y2" : 0.83778966131907306,
"x1" : 0.55172413879310345,
"y1" : 0.86096256645870473
}
},
{
"offset" : [
7,
12
],
"bounds" : {
"y1" : 0.86096256684491979,
"x2" : 0.65360500820004452,
"x1" : 0.60775861657899,
"y2" : 0.83778966131907306
},
"text" : "Date:"
},
{
"text" : "07\/22\/2019)",
"offset" : [
13,
24
],
"bounds" : {
"x1" : 0.6561520299567698,
"y1" : 0.86096256684491979,
"x2" : 0.7539184927899687,
"y2" : 0.83778966131907306
}
}
]
}
},
{
"observation" : {
"subBounds" : [
{
"offset" : [
0,
3
],
"text" : "(b)",
"bounds" : {
"y1" : 0.82709447350713017,
"y2" : 0.77540106951871657,
"x1" : 0.39655172568573666,
"x2" : 0.4476880877742947
}
},
{
"text" : "(6),",
"offset" : [
4,
8
],
"bounds" : {
"x2" : 0.52155172413793105,
"y2" : 0.77540106951871657,
"x1" : 0.45336990595611287,
"y1" : 0.82709447415329773
}
},
{
"text" : "(b)",
"offset" : [
9,
12
],
"bounds" : {
"x2" : 0.58405172413793105,
"y1" : 0.82709447415329773,
"x1" : 0.52723354231974917,
"y2" : 0.77540106951871657
}
},
{
"offset" : [
13,
16
],
"bounds" : {
"x2" : 0.64655172413793105,
"y1" : 0.82709447415329773,
"y2" : 0.77540106951871657,
"x1" : 0.58973354231974917
},
"text" : "(3)"
},
{
"text" : "(A)",
"offset" : [
17,
20
],
"bounds" : {
"y2" : 0.77540106951871657,
"x2" : 0.71003134559169279,
"x1" : 0.65223354231974917,
"y1" : 0.82709447415329773
}
}
],
"confidence" : 0.5,
"text" : "(b) (6), (b) (3) (A)",
"bounds" : {
"x2" : 0.71003134755094044,
"x1" : 0.396551723726489,
"y2" : 0.77540106919563279,
"y1" : 0.82709447383021395
}
}
},
{
"observation" : {
"bounds" : {
"y2" : 0.74674913099502405,
"x1" : 0.18811629417125428,
"y1" : 0.77018491574121795,
"x2" : 0.27113135340022909
},
"confidence" : 1,
"subBounds" : [
{
"text" : "Property:",
"bounds" : {
"x1" : 0.18811629376148015,
"y2" : 0.74674913140219401,
"y1" : 0.77018491533404809,
"x2" : 0.27113135381000325
},
"offset" : [
0,
9
]
}
],
"text" : "Property:"
}
},
{
"observation" : {
"bounds" : {
"y2" : 0.70231729026292333,
"y1" : 0.72549019578877005,
"x2" : 0.38401253832288407,
"x1" : 0.18652037531347965
},
"confidence" : 1,
"subBounds" : [
{
"offset" : [
0,
9
],
"text" : "Activity:",
"bounds" : {
"x1" : 0.18652037778213162,
"x2" : 0.26293102887730613,
"y2" : 0.70231729055258474,
"y1" : 0.72549019549910865
}
},
{
"text" : "232167123126",
"offset" : [
10,
22
],
"bounds" : {
"x1" : 0.2654780506340314,
"y1" : 0.72549019607843135,
"x2" : 0.38401253585423206,
"y2" : 0.70231729055258474
}
}
],
"text" : "Activity: 232167123126"
}
},
{
"observation" : {
"text" : "Activity",
"bounds" : {
"x1" : 0.18808777498432602,
"x2" : 0.25705329222570533,
"y1" : 0.67914438516934039,
"y2" : 0.65775401083778973
},
"confidence" : 1,
"subBounds" : [
{
"bounds" : {
"y2" : 0.6577540110516934,
"y1" : 0.67914438495543672,
"x1" : 0.18808777567398116,
"x2" : 0.25705329153605017
},
"text" : "Activity",
"offset" : [
0,
8
]
}
]
}
},
{
"observation" : {
"text" : "Measurements:",
"subBounds" : [
{
"text" : "Measurements:",
"bounds" : {
"x2" : 0.31818181603970747,
"y2" : 0.63458110549613789,
"x1" : 0.18652037622779519,
"y1" : 0.65418894830659535
},
"offset" : [
0,
13
]
}
],
"confidence" : 1,
"bounds" : {
"x1" : 0.18652037513061656,
"x2" : 0.31818181713688615,
"y1" : 0.65418894846999409,
"y2" : 0.63458110533273915
}
}
},
{
"observation" : {
"text" : "Conflict",
"bounds" : {
"x1" : 0.18804212707963844,
"x2" : 0.26023373619021656,
"y1" : 0.59872947198931992,
"y2" : 0.57774111643092119
},
"subBounds" : [
{
"text" : "Conflict",
"offset" : [
0,
8
],
"bounds" : {
"x1" : 0.18804212830892908,
"y1" : 0.59872947233735208,
"x2" : 0.26023373496092594,
"y2" : 0.57774111608288903
}
}
],
"confidence" : 1
}
},
{
"observation" : {
"text" : "& Loss:",
"confidence" : 1,
"bounds" : {
"y1" : 0.57230287497679577,
"x1" : 0.18811501012243048,
"x2" : 0.25545865919438998,
"y2" : 0.55069177603962349
},
"subBounds" : [
{
"text" : "&",
"offset" : [
0,
1
],
"bounds" : {
"x1" : 0.18811500922711974,
"y1" : 0.57230287532172841,
"y2" : 0.55085867016349144,
"x2" : 0.20451843678489462
}
},
{
"offset" : [
2,
7
],
"text" : "Loss:",
"bounds" : {
"y1" : 0.57224125242535107,
"x1" : 0.20692370952325456,
"x2" : 0.25545866008970075,
"y2" : 0.55069177654466794
}
}
]
}
},
{
"observation" : {
"text" : "Components",
"confidence" : 1,
"subBounds" : [
{
"text" : "Components",
"offset" : [
0,
10
],
"bounds" : {
"y2" : 0.43950755706575395,
"x1" : 0.18821297698626274,
"y1" : 0.46245322762887275,
"x2" : 0.29454564203566597
}
}
],
"bounds" : {
"x2" : 0.29454564217437784,
"x1" : 0.18821297684755084,
"y1" : 0.46245322782008669,
"y2" : 0.43950755687454002
}
}
},
{
"observation" : {
"subBounds" : [
{
"offset" : [
0,
1
],
"text" : "&",
"bounds" : {
"y2" : 0.41354723707664887,
"x1" : 0.18808777622779516,
"y1" : 0.43672014221628053,
"x2" : 0.2059169265917476
}
},
{
"offset" : [
2,
15
],
"bounds" : {
"y1" : 0.43672014260249559,
"x2" : 0.33385579887669803,
"y2" : 0.41354723707664887,
"x1" : 0.20846394834847287
},
"text" : "Take\/Samples:"
}
],
"confidence" : 1,
"text" : "& Take\/Samples:",
"bounds" : {
"x1" : 0.18808777501306162,
"y1" : 0.43672014240938806,
"y2" : 0.41354723688354134,
"x2" : 0.33385580009143151
}
}
},
{
"observation" : {
"subBounds" : [
{
"offset" : [
0,
8
],
"text" : "Remarks:",
"bounds" : {
"y2" : 0.30783199789780724,
"x2" : 0.27106815845026078,
"y1" : 0.32674910715079897,
"x1" : 0.18661209063933379
}
}
],
"text" : "Remarks:",
"bounds" : {
"x2" : 0.27106815660011696,
"x1" : 0.1866120924894776,
"y1" : 0.3267491071440769,
"y2" : 0.30783199790452931
},
"confidence" : 1
}
},
{
"observation" : {
"confidence" : 1,
"bounds" : {
"y2" : 0.21595561891730042,
"x1" : 0.18802405371508266,
"y1" : 0.23680730495637337,
"x2" : 0.25711701054505059
},
"text" : "Project:",
"subBounds" : [
{
"text" : "Project:",
"offset" : [
0,
8
],
"bounds" : {
"y1" : 0.23680730463596333,
"x2" : 0.25711700959380346,
"y2" : 0.21595561923771056,
"x1" : 0.18802405466632979
}
}
]
}
},
{
"observation" : {
"confidence" : 0.5,
"subBounds" : [
{
"bounds" : {
"x1" : 0.42476489028213166,
"y1" : 0.72727272704354462,
"x2" : 0.48334639068681245,
"y2" : 0.70409982174688057
},
"offset" : [
0,
5
],
"text" : "FIELD"
},
{
"text" : "WRK",
"offset" : [
6,
9
],
"bounds" : {
"x1" : 0.48589341244353768,
"x2" : 0.5317398040645922,
"y1" : 0.72727272727272729,
"y2" : 0.70409982174688057
}
},
{
"text" : "(PERFORMED)",
"bounds" : {
"x1" : 0.53428682582131748,
"x2" : 0.67084639146887592,
"y2" : 0.70409982184873954,
"y1" : 0.72727272727272729
},
"offset" : [
10,
21
]
}
],
"text" : "FIELD WRK (PERFORMED)",
"bounds" : {
"x2" : 0.670846393226601,
"y1" : 0.72727272720906555,
"y2" : 0.70409982168321883,
"x1" : 0.42476488852440664
}
}
},
{
"observation" : {
"text" : "6 HOURS",
"confidence" : 1,
"bounds" : {
"y2" : 0.64884135481283423,
"x1" : 0.42476489075235113,
"y1" : 0.66844919795008917,
"x2" : 0.50940438918495301
},
"subBounds" : [
{
"offset" : [
0,
1
],
"bounds" : {
"x1" : 0.4247648912225705,
"y1" : 0.66844919784115664,
"x2" : 0.43769592476489028,
"y2" : 0.64884135472370774
},
"text" : "6"
},
{
"bounds" : {
"y1" : 0.66844919786096257,
"x2" : 0.50940438871473359,
"x1" : 0.43985109717868337,
"y2" : 0.64884135492176664
},
"text" : "HOURS",
"offset" : [
2,
7
]
}
]
}
},
{
"observation" : {
"confidence" : 1,
"subBounds" : [
{
"offset" : [
0,
6
],
"text" : "BEARS.",
"bounds" : {
"x1" : 0.42633228840125392,
"y1" : 0.60962566822001529,
"y2" : 0.58823529411764708,
"x2" : 0.4945141065830721
}
},
{
"offset" : [
7,
12
],
"bounds" : {
"y1" : 0.60962566844919786,
"x2" : 0.5650470219435737,
"y2" : 0.58823529411764708,
"x1" : 0.49686520376175547
},
"text" : "BLACK"
},
{
"bounds" : {
"y1" : 0.60962566844919786,
"x1" : 0.56739811912225702,
"x2" : 0.63087774294670851,
"y2" : 0.58823529411764708
},
"text" : "damage",
"offset" : [
13,
19
]
},
{
"text" : "threat",
"offset" : [
20,
26
],
"bounds" : {
"y1" : 0.60962566844919786,
"x2" : 0.68260188087774298,
"y2" : 0.58823529411764708,
"x1" : 0.63322884012539182
}
},
{
"offset" : [
27,
29
],
"bounds" : {
"y2" : 0.58823529411764708,
"y1" : 0.60962566844919786,
"x2" : 0.70376175548589337,
"x1" : 0.6849529780564263
},
"text" : "of"
},
{
"text" : "EGGS",
"offset" : [
30,
34
],
"bounds" : {
"x2" : 0.76175548110165703,
"y2" : 0.58823529419404119,
"y1" : 0.60962566844919786,
"x1" : 0.7061128526645768
}
}
],
"bounds" : {
"y2" : 0.58823529404125285,
"x2" : 0.76175548349753708,
"x1" : 0.42633228600537404,
"y1" : 0.60962566837280363
},
"text" : "BEARS. BLACK damage threat of EGGS"
}
},
{
"observation" : {
"confidence" : 1,
"subBounds" : [
{
"text" : "BEARS,",
"bounds" : {
"x2" : 0.4945141065830721,
"x1" : 0.42633229334975364,
"y2" : 0.56149732620320858,
"y1" : 0.58288770040743576
},
"offset" : [
0,
6
]
},
{
"offset" : [
7,
12
],
"bounds" : {
"y1" : 0.58288770053475936,
"x1" : 0.49686520376175547,
"x2" : 0.5650470219435737,
"y2" : 0.56149732620320858
},
"text" : "BLACK"
},
{
"text" : "damage",
"offset" : [
13,
19
],
"bounds" : {
"x2" : 0.63087774294670851,
"x1" : 0.56739811912225702,
"y1" : 0.58288770053475936,
"y2" : 0.56149732620320858
}
},
{
"offset" : [
20,
26
],
"text" : "threat",
"bounds" : {
"y2" : 0.56149732620320858,
"x1" : 0.63322884012539182,
"y1" : 0.58288770053475936,
"x2" : 0.68260188087774298
}
},
{
"text" : "of",
"bounds" : {
"x2" : 0.70376175548589337,
"y2" : 0.56149732620320858,
"x1" : 0.6849529780564263,
"y1" : 0.58288770053475936
},
"offset" : [
27,
29
]
},
{
"bounds" : {
"x1" : 0.7061128526645768,
"y1" : 0.58288770053475936,
"x2" : 0.77272727272727271,
"y2" : 0.56149732638146166
},
"offset" : [
30,
35
],
"text" : "FOWL."
}
],
"bounds" : {
"y1" : 0.5828877005602241,
"x2" : 0.77272727520152262,
"x1" : 0.42633229087550378,
"y2" : 0.56149732622867332
},
"text" : "BEARS, BLACK damage threat of FOWL."
}
},
{
"observation" : {
"text" : "CHICKENS (OTHER)",
"confidence" : 1,
"subBounds" : [
{
"text" : "CHICKENS",
"offset" : [
0,
8
],
"bounds" : {
"x2" : 0.5329153605015674,
"x1" : 0.42476489028213166,
"y1" : 0.55793226352941172,
"y2" : 0.53654188948306603
}
},
{
"offset" : [
9,
16
],
"text" : "(OTHER)",
"bounds" : {
"y1" : 0.55793226381461669,
"x1" : 0.53526645768025083,
"x2" : 0.62068965125391862,
"y2" : 0.5365418896256684
}
}
],
"bounds" : {
"y1" : 0.5579322637433155,
"x2" : 0.62068965321316616,
"x1" : 0.42476488832288406,
"y2" : 0.53654188941176473
}
}
},
{
"observation" : {
"subBounds" : [
{
"offset" : [
0,
4
],
"bounds" : {
"y2" : 0.49019607843137258,
"y1" : 0.51336898349376114,
"x1" : 0.42633228912225701,
"x2" : 0.46963165826558317
},
"text" : "Cmp:"
},
{
"text" : "PHYSICAL",
"bounds" : {
"x1" : 0.47217868002230845,
"y2" : 0.49019607843137258,
"x2" : 0.57151252853459322,
"y1" : 0.5133689839572193
},
"offset" : [
5,
13
]
},
{
"offset" : [
14,
21
],
"text" : "ACTIONS",
"bounds" : {
"x1" : 0.5740595502913185,
"y2" : 0.49019607843137258,
"y1" : 0.5133689839572193,
"x2" : 0.660658290019977
}
},
{
"bounds" : {
"y1" : 0.5133689839572193,
"y2" : 0.49019607843137258,
"x2" : 0.80094043210031363,
"x1" : 0.66320531177670228
},
"offset" : [
22,
34
],
"text" : "(HAND\/VOICE)"
}
],
"bounds" : {
"y1" : 0.51336898372549022,
"x2" : 0.80094043584639507,
"x1" : 0.42633228537617562,
"y2" : 0.4901960781996435
},
"text" : "Cmp: PHYSICAL ACTIONS (HAND\/VOICE)",
"confidence" : 1
}
},
{
"observation" : {
"text" : "APPLIED\/USED 1 IN",
"bounds" : {
"x1" : 0.4263322896663681,
"x2" : 0.60344827712718319,
"y1" : 0.48663101614463966,
"y2" : 0.46880570420168066
},
"subBounds" : [
{
"offset" : [
0,
12
],
"bounds" : {
"x1" : 0.42633229093148228,
"x2" : 0.56543887147335425,
"y1" : 0.48663101601731606,
"y2" : 0.4688057040998217
},
"text" : "APPLIED\/USED"
},
{
"text" : "1",
"offset" : [
13,
14
],
"bounds" : {
"x1" : 0.56739811912225702,
"y1" : 0.4866310160427807,
"x2" : 0.57915360501567403,
"y2" : 0.4688057040998217
}
},
{
"bounds" : {
"x2" : 0.60344827586206895,
"y2" : 0.46880570432900426,
"x1" : 0.5811128526645768,
"y1" : 0.4866310160427807
},
"text" : "IN",
"offset" : [
15,
17
]
}
],
"confidence" : 1
}
},
{
"observation" : {
"bounds" : {
"x2" : 0.85109717625391856,
"x1" : 0.48432601637931039,
"y2" : 0.44028520481283429,
"y1" : 0.46167557914438506
},
"subBounds" : [
{
"bounds" : {
"y1" : 0.46167557896613198,
"x1" : 0.48432601943573667,
"x2" : 0.52429467084639503,
"y2" : 0.44028520499108736
},
"offset" : [
0,
3
],
"text" : "Cmp"
},
{
"bounds" : {
"x2" : 0.57366771159874608,
"x1" : 0.52664576802507834,
"y2" : 0.44028520499108736,
"y1" : 0.46167557932263814
},
"text" : "Take:",
"offset" : [
4,
9
]
},
{
"text" : "1",
"bounds" : {
"x1" : 0.5760188087774295,
"y2" : 0.44028520499108736,
"y1" : 0.46167557932263814,
"x2" : 0.58777429467084641
},
"offset" : [
10,
11
]
},
{
"bounds" : {
"y2" : 0.44028520499108736,
"y1" : 0.46167557932263814,
"x1" : 0.59012539184952983,
"x2" : 0.61833855799373039
},
"offset" : [
12,
14
],
"text" : "EA"
},
{
"bounds" : {
"y1" : 0.46167557932263814,
"x1" : 0.62068965517241381,
"y2" : 0.44028520499108736,
"x2" : 0.74294670846394983
},
"text" : "BEARBLACK",
"offset" : [
15,
24
]
},
{
"bounds" : {
"x1" : 0.74529780564263326,
"y1" : 0.46167557932263814,
"x2" : 0.85109717319749223,
"y2" : 0.44028520499108736
},
"text" : "DISPERSED",
"offset" : [
25,
34
]
}
],
"text" : "Cmp Take: 1 EA BEARBLACK DISPERSED",
"confidence" : 0.5
}
},
{
"observation" : {
"confidence" : 0.30000001192092896,
"bounds" : {
"y1" : 0.43493761126814368,
"x2" : 0.54702194394312587,
"y2" : 0.41532976813088873,
"x1" : 0.48275862105911332
},
"subBounds" : [
{
"bounds" : {
"y1" : 0.4349376111280876,
"x2" : 0.50862068965517238,
"y2" : 0.4153297682709447,
"x1" : 0.48275862151813703
},
"text" : "Int",
"offset" : [
0,
3
]
},
{
"bounds" : {
"y1" : 0.43493761140819964,
"y2" : 0.4153297682709447,
"x2" : 0.54702194348410205,
"x1" : 0.51077586206896552
},
"text" : "Trgt",
"offset" : [
4,
8
]
}
],
"text" : "Int Trgt"
}
},
{
"observation" : {
"bounds" : {
"x1" : 0.42476488655172423,
"y2" : 0.38859180024955453,
"x2" : 0.79780563890282141,
"y1" : 0.41354723696969709
},
"confidence" : 1,
"text" : "Cmp: BARRIERS, FENCING (PERMANENT",
"subBounds" : [
{
"bounds" : {
"x2" : 0.46865203462798022,
"x1" : 0.42476489028213166,
"y1" : 0.4135472367201426,
"y2" : 0.3885918003565062
},
"offset" : [
0,
4
],
"text" : "Cmp:"
},
{
"text" : "BARRIERS,",
"offset" : [
5,
14
],
"bounds" : {
"x1" : 0.47139498114959572,
"y2" : 0.3885918003565062,
"y1" : 0.41354723707664887,
"x2" : 0.57562694897098599
}
},
{
"text" : "FENCING",
"offset" : [
15,
22
],
"bounds" : {
"y2" : 0.3885918003565062,
"x2" : 0.66614418418429866,
"y1" : 0.41354723707664887,
"x1" : 0.57836989549260154
}
},
{
"offset" : [
23,
33
],
"bounds" : {
"y2" : 0.3885918004991088,
"x1" : 0.6688871307059141,
"y1" : 0.41354723707664887,
"x2" : 0.79780563517241398
},
"text" : "(PERMANENT"
}
]
}
},
{
"observation" : {
"bounds" : {
"x1" : 0.4247648866300941,
"x2" : 0.78996864838558001,
"y2" : 0.36541889474153288,
"y1" : 0.38502673787878783
},
"confidence" : 1,
"subBounds" : [
{
"offset" : [
0,
10
],
"bounds" : {
"x1" : 0.42476489028213166,
"y2" : 0.36541889483065959,
"y1" : 0.38502673768270945,
"x2" : 0.5540752351097179
},
"text" : "ELECTRICAL"
},
{
"offset" : [
11,
23
],
"bounds" : {
"x1" : 0.55623040752351094,
"x2" : 0.69631661442006265,
"y1" : 0.38502673796791442,
"y2" : 0.36541889483065959
},
"text" : "APPLIED\/USED"
},
{
"text" : "12",
"offset" : [
24,
26
],
"bounds" : {
"y2" : 0.36541889483065959,
"x1" : 0.6984717868338558,
"y1" : 0.38502673796791442,
"x2" : 0.72002351097178685
}
},
{
"offset" : [
27,
30
],
"text" : "LIN",
"bounds" : {
"x1" : 0.72217868338557989,
"y1" : 0.38502673796791442,
"x2" : 0.75881661442006265,
"y2" : 0.36541889483065959
}
},
{
"offset" : [
31,
33
],
"text" : "YD",
"bounds" : {
"x1" : 0.7609717868338558,
"y1" : 0.38502673796791442,
"x2" : 0.78996864473354245,
"y2" : 0.36541889493761137
}
}
],
"text" : "ELECTRICAL APPLIED\/USED 12 LIN YD"
}
},
{
"observation" : {
"subBounds" : [
{
"text" : "YOUNG",
"offset" : [
0,
5
],
"bounds" : {
"x2" : 0.49745297805642635,
"y1" : 0.34046345809071099,
"x1" : 0.42633229292929287,
"y2" : 0.32085561497326198
}
},
{
"offset" : [
6,
11
],
"bounds" : {
"x1" : 0.49960815047021945,
"y1" : 0.34046345811051693,
"y2" : 0.32085561497326198,
"x2" : 0.56857366771159878
},
"text" : "BLACK"
},
{
"text" : "BEAR",
"offset" : [
12,
16
],
"bounds" : {
"x2" : 0.62676332288401249,
"y1" : 0.34046345811051693,
"y2" : 0.32085561497326198,
"x1" : 0.57072884012539182
}
},
{
"offset" : [
17,
21
],
"text" : "CAME",
"bounds" : {
"x1" : 0.62891849529780564,
"y1" : 0.34046345811051693,
"y2" : 0.32085561497326198,
"x2" : 0.68926332288401249
}
},
{
"text" : "INTO",
"offset" : [
22,
26
],
"bounds" : {
"y1" : 0.34046345811051693,
"x2" : 0.74098746081504707,
"y2" : 0.32085561497326198,
"x1" : 0.69141849529780564
}
},
{
"bounds" : {
"x1" : 0.74314263322884011,
"y1" : 0.34046345811051693,
"y2" : 0.32085561497326198,
"x2" : 0.76469435736677116
},
"text" : "50",
"offset" : [
27,
29
]
},
{
"offset" : [
30,
35
],
"bounds" : {
"y1" : 0.34046345811051693,
"x2" : 0.83385579937304077,
"x1" : 0.76684952978056431,
"y2" : 0.320855615171321
},
"text" : "YARDS"
}
],
"text" : "YOUNG BLACK BEAR CAME INTO 50 YARDS",
"confidence" : 1,
"bounds" : {
"x1" : 0.42633229066527339,
"y1" : 0.34046345819964341,
"y2" : 0.32085561506238858,
"x2" : 0.8338558016370603
}
}
},
{
"observation" : {
"subBounds" : [
{
"bounds" : {
"x1" : 0.42633229851097154,
"y1" : 0.31372548966131908,
"x2" : 0.4898119122257053,
"y2" : 0.29233511586452765
},
"text" : "WHILE",
"offset" : [
0,
5
]
},
{
"text" : "I",
"bounds" : {
"y1" : 0.31372549019607843,
"y2" : 0.29233511586452765,
"x1" : 0.49216300940438873,
"x2" : 0.50156739811912221
},
"offset" : [
6,
7
]
},
{
"offset" : [
8,
11
],
"bounds" : {
"x1" : 0.50391849529780564,
"y2" : 0.29233511586452765,
"y1" : 0.31372549019607843,
"x2" : 0.55094043887147337
},
"text" : "WAS"
},
{
"text" : "WORKING.",
"bounds" : {
"x1" : 0.55329153605015668,
"y1" : 0.31372549019607843,
"y2" : 0.29233511586452765,
"x2" : 0.65438871473354232
},
"offset" : [
12,
20
]
},
{
"bounds" : {
"x1" : 0.65673981191222575,
"y1" : 0.31372549019607843,
"x2" : 0.66614420062695923,
"y2" : 0.29233511586452765
},
"offset" : [
21,
22
],
"text" : "I"
},
{
"offset" : [
23,
28
],
"text" : "HAZED",
"bounds" : {
"x1" : 0.66849529780564265,
"y1" : 0.31372549019607843,
"x2" : 0.73667711598746077,
"y2" : 0.29233511586452765
}
},
{
"bounds" : {
"y2" : 0.29233511586452765,
"x2" : 0.7601880877742947,
"x1" : 0.7390282131661442,
"y1" : 0.31372549019607843
},
"text" : "IT",
"offset" : [
29,
31
]
},
{
"bounds" : {
"y2" : 0.29233511586452765,
"y1" : 0.31372549019607843,
"x2" : 0.83228840121473358,
"x1" : 0.76253918495297801
},
"text" : "AWAY.",
"offset" : [
32,
37
]
}
],
"text" : "WHILE I WAS WORKING. I HAZED IT AWAY.",
"confidence" : 1,
"bounds" : {
"y2" : 0.29233511559714798,
"x1" : 0.42633229343652024,
"x2" : 0.83228840628918477,
"y1" : 0.31372548992869875
}
}
},
{
"observation" : {
"text" : "OPERATIONAL NONLETHAL PREDATION",
"confidence" : 1,
"bounds" : {
"x2" : 0.80721002818704279,
"y1" : 0.26737967905525839,
"x1" : 0.42789968335945666,
"y2" : 0.24598930472370761
},
"subBounds" : [
{
"text" : "OPERATIONAL",
"bounds" : {
"x1" : 0.42789968652037619,
"x2" : 0.56661442006269591,
"y2" : 0.24598930481283421,
"y1" : 0.26737967887700531
},
"offset" : [
0,
11
]
},
{
"text" : "NONLETHAL",
"offset" : [
12,
21
],
"bounds" : {
"x2" : 0.69122257053291536,
"y1" : 0.26737967914438499,
"y2" : 0.24598930481283421,
"x1" : 0.56896551724137934
}
},
{
"text" : "PREDATION",
"offset" : [
22,
31
],
"bounds" : {
"x1" : 0.69357366771159878,
"x2" : 0.80721002502612338,
"y2" : 0.2459893049019608,
"y1" : 0.26737967914438499
}
}
]
}
},
{
"observation" : {
"confidence" : 1,
"text" : "DAMAGE MANAGEMENT",
"subBounds" : [
{
"text" : "DAMAGE",
"offset" : [
0,
6
],
"bounds" : {
"y2" : 0.22103386809269165,
"y1" : 0.23885917998472117,
"x2" : 0.51763322884012541,
"x1" : 0.42946708802060007
}
},
{
"bounds" : {
"y1" : 0.23885918003565065,
"y2" : 0.22103386829640947,
"x2" : 0.66614420062695923,
"x1" : 0.51959247648902818
},
"offset" : [
7,
17
],
"text" : "MANAGEMENT"
}
],
"bounds" : {
"y1" : 0.23885918011204488,
"x2" : 0.66614420231751004,
"y2" : 0.22103386816908588,
"x1" : 0.42946708633004926
}
}
},
{
"observation" : {
"confidence" : 1,
"bounds" : {
"x2" : 0.38871473156739816,
"y2" : 0.13190730832442066,
"x1" : 0.19122256855799377,
"y1" : 0.15508021385026738
},
"subBounds" : [
{
"offset" : [
0,
8
],
"text" : "FlaggedX",
"bounds" : {
"x1" : 0.19122257053291536,
"y1" : 0.1550802136185383,
"x2" : 0.27272726674812342,
"y2" : 0.13190730837789666
}
},
{
"text" : "by:",
"bounds" : {
"y1" : 0.15508021390374327,
"x1" : 0.27527428850484864,
"x2" : 0.3032915278288264,
"y2" : 0.13190730837789666
},
"offset" : [
9,
12
]
},
{
"offset" : [
13,
22
],
"bounds" : {
"x2" : 0.38871472959247655,
"x1" : 0.30583854958555168,
"y1" : 0.15508021390374327,
"y2" : 0.13190730855614974
},
"text" : "Alexandra"
}
],
"text" : "FlaggedX by: Alexandra"
}
},
{
"observation" : {
"text" : "Few on 07\/18\/19",
"confidence" : 1,
"bounds" : {
"x2" : 0.32445141098484848,
"y2" : 0.11051693420974451,
"y1" : 0.13012477734699945,
"x1" : 0.19122257085945663
},
"subBounds" : [
{
"text" : "Few",
"bounds" : {
"x2" : 0.22786050156739812,
"y1" : 0.13012477718360071,
"y2" : 0.11051693404634577,
"x1" : 0.19122257196969694
},
"offset" : [
0,
3
]
},
{
"text" : "on",
"offset" : [
4,
6
],
"bounds" : {
"y2" : 0.11051693404634577,
"y1" : 0.13012477718360071,
"x1" : 0.23001567398119122,
"x2" : 0.25156739811912227
}
},
{
"text" : "07\/18\/19",
"bounds" : {
"y1" : 0.13012477718360071,
"x1" : 0.25372257053291536,
"y2" : 0.11051693437314314,
"x2" : 0.32445140987460813
},
"offset" : [
7,
15
]
}
]
}
},
{
"observation" : {
"subBounds" : [
{
"offset" : [
0,
9
],
"bounds" : {
"y2" : 0.074866310160427774,
"y1" : 0.10516934046345816,
"x2" : 0.26782915360501569,
"x1" : 0.19122257339341686
},
"text" : "Corrected"
},
{
"text" : "by",
"offset" : [
10,
12
],
"bounds" : {
"y1" : 0.10516934046345816,
"x2" : 0.28781347962382448,
"y2" : 0.074866310160427774,
"x1" : 0.2711598746081505
}
},
{
"text" : "°",
"offset" : [
13,
14
],
"bounds" : {
"x2" : 0.3077978056426332,
"y1" : 0.10516934046345816,
"x1" : 0.29114420062695923,
"y2" : 0.074866310160427774
}
},
{
"text" : "0\/",
"offset" : [
15,
17
],
"bounds" : {
"y1" : 0.10516934046345816,
"x1" : 0.31112852664576801,
"x2" : 0.33777429467084641,
"y2" : 0.074866310160427774
}
},
{
"text" : "a",
"bounds" : {
"y2" : 0.074866310160427774,
"y1" : 0.10516934046345816,
"x2" : 0.34443573667711597,
"x1" : 0.34110501567398122
},
"offset" : [
18,
19
]
},
{
"text" : "va",
"offset" : [
20,
22
],
"bounds" : {
"y2" : 0.074866310918003443,
"x1" : 0.34776645768025077,
"y1" : 0.10516934046345816,
"x2" : 0.3526645756269593
}
}
],
"bounds" : {
"x2" : 0.35266457764498432,
"y2" : 0.074866310539215775,
"y1" : 0.10516934084224594,
"x1" : 0.19122257137539184
},
"confidence" : 0.30000001192092896,
"text" : "Corrected by ° 0\/ a va"
}
},
{
"observation" : {
"confidence" : 1,
"bounds" : {
"y1" : 0.074769308303107351,
"x2" : 0.33387747120885075,
"y2" : 0.053572937846799396,
"x1" : 0.26016641675669006
},
"text" : "07\/19\/19",
"subBounds" : [
{
"text" : "07\/19\/19",
"offset" : [
0,
8
],
"bounds" : {
"y1" : 0.074769308397636292,
"y2" : 0.053572937752270122,
"x2" : 0.33387747148615882,
"x1" : 0.26016641647938205
}
}
]
}
},
{
"observation" : {
"text" : "Please add the component hazing to this work task and",
"confidence" : 1,
"bounds" : {
"x1" : 0.41692789699255495,
"y1" : 0.15508021384803916,
"x2" : 0.84796237975117572,
"y2" : 0.13190730832219244
},
"subBounds" : [
{
"text" : "Please",
"offset" : [
0,
6
],
"bounds" : {
"x2" : 0.46786833482102541,
"x1" : 0.41692789968652039,
"y1" : 0.15508021370320857,
"y2" : 0.13190730837789666
}
},
{
"offset" : [
7,
10
],
"bounds" : {
"y2" : 0.13190730837789666,
"x1" : 0.47041535657775063,
"y1" : 0.15508021390374327,
"x2" : 0.50097961765845367
},
"text" : "add"
},
{
"offset" : [
11,
14
],
"text" : "the",
"bounds" : {
"y1" : 0.15508021390374327,
"y2" : 0.13190730837789666,
"x1" : 0.50352663941517894,
"x2" : 0.52899685698243148
}
},
{
"bounds" : {
"y1" : 0.15508021390374327,
"x2" : 0.6232366619812657,
"x1" : 0.53154387873915665,
"y2" : 0.13190730837789666
},
"offset" : [
15,
24
],
"text" : "component"
},
{
"bounds" : {
"y2" : 0.13190730837789666,
"x2" : 0.67927114062922123,
"y1" : 0.15508021390374327,
"x1" : 0.62578368373799098
},
"offset" : [
25,
31
],
"text" : "hazing"
},
{
"text" : "to",
"offset" : [
32,
34
],
"bounds" : {
"y2" : 0.13190730837789666,
"x1" : 0.6818181623859465,
"y1" : 0.15508021390374327,
"x2" : 0.69964731468302332
}
},
{
"offset" : [
35,
39
],
"bounds" : {
"x1" : 0.70219433643974849,
"y1" : 0.15508021390374327,
"x2" : 0.73275859752045158,
"y2" : 0.13190730837789666
},
"text" : "this"
},
{
"offset" : [
40,
44
],
"bounds" : {
"y1" : 0.15508021390374327,
"x2" : 0.77860498914150611,
"x1" : 0.73530561927717686,
"y2" : 0.13190730837789666
},
"text" : "work"
},
{
"offset" : [
45,
49
],
"bounds" : {
"y2" : 0.13190730837789666,
"x1" : 0.78115201089823139,
"x2" : 0.81426329373565964,
"y1" : 0.15508021390374327
},
"text" : "task"
},
{
"offset" : [
50,
53
],
"bounds" : {
"y2" : 0.13190730846702314,
"x2" : 0.84796237705721011,
"x1" : 0.81681031549238481,
"y1" : 0.15508021390374327
},
"text" : "and"
}
]
}
},
{
"observation" : {
"subBounds" : [
{
"offset" : [
0,
8
],
"bounds" : {
"y1" : 0.13198302617439839,
"x1" : 0.41537440775719958,
"y2" : 0.10641790388710937,
"x2" : 0.4782731557767686
},
"text" : "indicate"
},
{
"bounds" : {
"x1" : 0.48120274996350854,
"x2" : 0.51941586255074168,
"y1" : 0.13134588885603504,
"y2" : 0.10601969325351657
},
"text" : "how",
"offset" : [
9,
12
]
},
{
"offset" : [
13,
16
],
"bounds" : {
"y2" : 0.10570112474664217,
"x2" : 0.55233002796992026,
"x1" : 0.52234545673748167,
"y1" : 0.13094767822244213
},
"text" : "you"
},
{
"bounds" : {
"x2" : 0.60170127609868795,
"y2" : 0.10522327198633075,
"y1" : 0.13062910971556796,
"x1" : 0.55525962215666014
},
"text" : "hazed",
"offset" : [
17,
22
]
},
{
"offset" : [
23,
26
],
"bounds" : {
"x1" : 0.60463087028542783,
"y2" : 0.1049312508550293,
"x2" : 0.63187259439960153,
"y1" : 0.13015125695525642
},
"text" : "the"
},
{
"text" : "bear",
"offset" : [
27,
31
],
"bounds" : {
"y1" : 0.12985923582395498,
"x2" : 0.67027245405530977,
"y2" : 0.10455958759700934,
"x1" : 0.63480218858634141
}
},
{
"bounds" : {
"y2" : 0.10405518746112496,
"x1" : 0.67320204824204966,
"x2" : 0.72238654930234236,
"y1" : 0.12948757256593491
},
"offset" : [
32,
37
],
"text" : "away."
},
{
"bounds" : {
"y1" : 0.12898317243005064,
"x1" : 0.72531614348908224,
"x2" : 0.79148995940014022,
"y2" : 0.1025756349286161
},
"offset" : [
38,
45
],
"text" : "Thanks!"
}
],
"confidence" : 1,
"bounds" : {
"y1" : 0.13198302676332385,
"x2" : 0.79148995450205717,
"y2" : 0.10257563433969075,
"x1" : 0.41537441265528263
},
"text" : "indicate how you hazed the bear away. Thanks!"
}
},
{
"observation" : {
"bounds" : {
"x1" : 0.41692790228578885,
"y1" : 0.085561497400475361,
"x2" : 0.72884012799111808,
"y2" : 0.062388591874628641
},
"confidence" : 1,
"text" : "Added Components and hazing activity",
"subBounds" : [
{
"text" : "Added",
"offset" : [
0,
5
],
"bounds" : {
"y2" : 0.062388591800356497,
"x2" : 0.47041535657775063,
"x1" : 0.41692790488505738,
"y1" : 0.085561497207367831
}
},
{
"text" : "Components",
"offset" : [
6,
16
],
"bounds" : {
"x1" : 0.47296237833447591,
"y1" : 0.085561497326203217,
"x2" : 0.57484324860348601,
"y2" : 0.062388591800356497
}
},
{
"text" : "and",
"offset" : [
17,
20
],
"bounds" : {
"y1" : 0.085561497326203217,
"x2" : 0.60795453144091427,
"y2" : 0.062388591800356497,
"x1" : 0.57739027036021118
}
},
{
"bounds" : {
"y1" : 0.085561497326203217,
"y2" : 0.062388591800356497,
"x2" : 0.66653603184559507,
"x1" : 0.61050155319763943
},
"text" : "hazing",
"offset" : [
21,
27
]
},
{
"bounds" : {
"y1" : 0.085561497326203217,
"x2" : 0.7288401253918495,
"y2" : 0.06238859206773617,
"x1" : 0.66908305360232023
},
"offset" : [
28,
36
],
"text" : "activity"
}
]
}
}
]
}
import cv2
import json
import numpy as np
from sklearn.cluster import DBSCAN
import glob
from tqdm import tqdm
import os
import re
from table_parsing.utils import stringify_keys
# Handle input data
# ==================================================
def process_image(image_path):
# Load the image
image = cv2.imread(image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply adaptive threshold to get binary image
binary = cv2.adaptiveThreshold(~gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 15, -10)
return image, binary
def load_data(json_path):
with open(json_path, 'r') as file:
data = json.load(file)
return(data)
# Handle bounding boxes
# ============================================
def draw_bounding_box(image, bounds, color=(0, 255, 0), thickness=2):
height, width, _ = image.shape
top_left = (int(bounds['x1'] * width), int((1 - bounds['y2']) * height))
bottom_right = (int(bounds['x2'] * width), int((1 - bounds['y1']) * height))
cv2.rectangle(image, top_left, bottom_right, color, thickness)
def convert_bounding_box(image, bounds):
height, width, _ = image.shape
# top left
x1 = int(bounds['x1'] * width)
y2 = int((1 - bounds['y2']) * height)
# Bottom right
x2 = int(bounds['x2'] * width)
y1 = int((1 - bounds['y1']) * height)
# Calculations
# width = x2 - x1
# height = y2 - y1
return x1, y1, x2, y2
def draw_all_word_bounding_boxes(image,data):
# Loop through the observations and subBounds to draw bounding boxes
for item in data['observations']:
# Draw bounding box for the entire phrase
draw_bounding_box(image, item["observation"]['bounds'], color=(255, 0, 0))
# print(item["observation"]['text'])
return(image)
# Draw bounding boxes for each word
# for subBound in item["observation"]['subBounds']:
# draw_bounding_box(image, subBound['bounds'], color=(0, 255, 0))
# # Display the image
# cv2.imshow('Image with Bounding Boxes', image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# Cluster points
# ======================================
def find_horizontal_vertical_lines(binary, horizontal_scale_factor, vertical_scale_factor):
# # Parameters to adjust
# horizontal_scale_factor = 15 # Increase for larger structuring elements
# vertical_scale_factor = 50 # Increase for larger structuring elements
# dilation_size = 6 # Increase for more forgiving touch point detection
# Detect horizontal lines
horizontal = binary.copy()
cols = horizontal.shape[1]
horizontal_size = cols // horizontal_scale_factor
horizontal_structure = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontal_size, 1))
horizontal = cv2.erode(horizontal, horizontal_structure)
horizontal = cv2.dilate(horizontal, horizontal_structure)
# Detect vertical lines
vertical = binary.copy()
rows = vertical.shape[0]
vertical_size = rows // vertical_scale_factor
vertical_structure = cv2.getStructuringElement(cv2.MORPH_RECT, (1, vertical_size))
vertical = cv2.erode(vertical, vertical_structure)
vertical = cv2.dilate(vertical, vertical_structure)
# Combine horizontal and vertical lines
# grid = cv2.add(horizontal, vertical)
return(horizontal, vertical)
# Find touches (where lines touch but don't intersect)
def find_touches(image, horizontal, vertical, dilation_size):
# get touches
# ==============
touch_points = []
# Dilate horizontal and vertical lines to ensure touching points are detected
horizontal_dilated = cv2.dilate(horizontal, np.ones((dilation_size, dilation_size), np.uint8))
vertical_dilated = cv2.dilate(vertical, np.ones((dilation_size, dilation_size), np.uint8))
# Combine dilated lines to find touch points
touch_points_img = cv2.bitwise_and(horizontal_dilated, vertical_dilated)
# Get touch points coordinates
touch_points_coords = np.argwhere(touch_points_img == 255)
for point in touch_points_coords:
touch_points.append((point[1], point[0]))
# cluster touch points
# ======================
dbscan = DBSCAN(eps=10, min_samples=1).fit(touch_points)
clustered_touch_points = []
for label in np.unique(dbscan.labels_):
cluster = np.array(touch_points)[dbscan.labels_ == label]
if cluster.size > 0:
clustered_touch_points.append(np.mean(cluster, axis=0).astype(int))
return(clustered_touch_points)
def draw_clustered_touch_points(image, clustered_touch_points):
# Draw clustered touch points for visualization
for point in clustered_touch_points:
cv2.circle(image, tuple(point), 5, (255, 0, 0), -1)
# Display the image with detected lines, cells, intersections, and touches
return(image)
# Final grid lines
# ========================================
# Identify and draw the outermost vertical and horizontal lines
def make_final_lines(image, clustered_touch_points):
if not clustered_touch_points:
return image
# touch_points_np = np.array(touch_points)
clustered_touch_points_np = np.array(clustered_touch_points)
# Identify the outermost vertical lines
left_most = np.min(clustered_touch_points_np[:, 0])
right_most = np.max(clustered_touch_points_np[:, 0])
middle = np.median(clustered_touch_points_np[:, 0])
# Identify the outermost horizontal lines
top_most = np.min(clustered_touch_points_np[:, 1])
bottom_most = np.max(clustered_touch_points_np[:, 1])
# Draw vertical lines
cv2.line(image, (left_most, 0), (left_most, image.shape[0]), (0, 255, 0), 2)
cv2.line(image, (int(middle), 0), (int(middle), image.shape[0]), (0, 255, 0), 2)
cv2.line(image, (right_most, 0), (right_most, image.shape[0]), (0, 255, 0), 2)
# Draw horizontal lines
cv2.line(image, (0, top_most), (image.shape[1], top_most), (0, 255, 0), 2)
cv2.line(image, (0, bottom_most), (image.shape[1], bottom_most), (0, 255, 0), 2)
# Remove touch points that are within 5 pixels of top_most and bottom_most y-coordinates
filtered_touch_points = [point for point in clustered_touch_points if not (top_most - 5 <= point[1] <= top_most + 5 or bottom_most - 5 <= point[1] <= bottom_most + 5)]
filtered_touch_points_np = np.array(filtered_touch_points)
# Identify unique y-coordinates of filtered touch points
unique_y_coords = np.array(sorted(set(filtered_touch_points_np[:, 1]))).reshape(-1, 1)
# Cluster the y-coordinates using DBSCAN
dbscan = DBSCAN(eps=10, min_samples=1).fit(unique_y_coords)
clustered_y_coords = []
for label in np.unique(dbscan.labels_):
cluster = unique_y_coords[dbscan.labels_ == label]
if cluster.size > 0:
clustered_y_coords.append(np.mean(cluster))
clustered_y_coords = sorted(clustered_y_coords)
for y in clustered_y_coords:
y = int(y)
cv2.line(image, (left_most, y), (right_most, y), (0, 255, 0), 2)
return image, [top_most] + clustered_y_coords + [bottom_most], [left_most, middle, right_most]
# Assigning data
# ============================================
# Function to calculate the intersection area of two rectangles
def intersection_area(box1, box2):
x1 = max(box1[0], box2[0])
y1 = max(box1[1], box2[1])
x2 = min(box1[2], box2[2])
y2 = min(box1[3], box2[3])
intersection = max(0, x2 - x1) * max(0, y2 - y1)
return intersection
def assign_data(image, data, horizontal_lines, vertical_lines):
row_result = {}
for item in data['observations']:
x1, y1, x2, y2 = convert_bounding_box(image, item["observation"]['bounds'] )
text = item["observation"]['text']
bounding_box = (x1, y1, x2, y2)
# Determine the cell containing the majority of the bounding box
max_intersection = 0
cell_with_max_intersection = (0, 0)
for i in range(len(horizontal_lines) - 1):
for j in range(len(vertical_lines) - 1):
# Define the bounding box for the current cell
cell_box = (vertical_lines[j], horizontal_lines[i], vertical_lines[j + 1], horizontal_lines[i + 1])
# print("bounding_box: " + str(bounding_box))
# print("cell box: " + str(cell_box))
# Calculate the intersection area between the bounding box and the cell
intersection = intersection_area(bounding_box, cell_box)
# print("intersection: " + str(intersection))
# Update the cell with the maximum intersection area
if intersection > max_intersection:
max_intersection = intersection
cell_with_max_intersection = (i, j)
cell_key = cell_with_max_intersection
if cell_key in row_result:
row_result[cell_key] += " " + text
else:
row_result[cell_key] = text
return(row_result)
def natural_sort_key(s):
return [int(text) if text.isdigit() else text for text in re.split(r'(\d+)', s)]
def make_result(image_dir, json_dir, jsonl_output_path, annotated_output_path):
image_paths = sorted(glob.glob(f"{image_dir}/*jpg") , key=natural_sort_key)
json_paths = sorted(glob.glob(f"{json_dir}/*json"), key=natural_sort_key)
with open(jsonl_output_path, 'w') as jsonl_file:
for i in range(len(image_paths)):
image_path = image_paths[i]
json_path = json_paths[i]
file_name = os.path.basename(image_path)
image, binary = process_image(image_path)
data = load_data(json_path)
# image = draw_all_word_bounding_boxes(image, data)
horizontal_lines, vertical_lines = find_horizontal_vertical_lines(binary, horizontal_scale_factor = 15, vertical_scale_factor = 30)
clustered_touch_points = find_touches(image, horizontal_lines, vertical_lines, dilation_size=6)
image = draw_clustered_touch_points(image, clustered_touch_points)
image_with_lines, horizontal_lines, vertical_lines = make_final_lines(image, clustered_touch_points)
result = assign_data(image, data, horizontal_lines, vertical_lines)
# Save results
# ============
cv2.imwrite(os.path.join(annotated_output_path, file_name), image_with_lines)
# Write the result to the JSONL file
jsonl_file.write(json.dumps(stringify_keys(result)) + '\n')