rectangle annotations

This commit is contained in:
q
2025-06-25 14:40:04 +03:00
parent 195045ff98
commit 233a0292d6
2 changed files with 331 additions and 104 deletions

View File

@@ -24,6 +24,22 @@ def get_options():
required=False,
help="Save timestamps to a CSV file",
)
parser.add_argument(
"--op",
action="store",
dest="output_points",
default=None,
required=False,
help="Save points to a JSON file",
)
parser.add_argument(
"--ip",
action="store",
dest="input_points",
default=None,
required=False,
help="Load points from a JSON file",
)
parser.add_argument(
"--fps",
action="store",
@@ -42,6 +58,23 @@ def get_options():
type=str,
help="predefined crop. Syntax: 'w:h:x:y' example: 1280:720:30:20",
)
parser.add_argument(
"--ss",
action="store",
dest="start_time",
default=None,
required=False,
type=str,
help="Starting position as frame (int) or HH:MM:SS.ss",
)
parser.add_argument(
"--max-res",
action="store",
dest="max_res",
default="1280x720",
type=str,
help="Max resolution of video viewer: %(default)s",
)
parser.add_argument(
"--ffmpeg-copy",
action="store_true",

View File

@@ -1,3 +1,4 @@
import json
import os
import shlex
import subprocess
@@ -29,8 +30,9 @@ class Marker:
self.auto_step = True
self.font = cv2.FONT_HERSHEY_SIMPLEX
self.frame_visu = []
self.max_res = (1280, 720)
self.max_res = tuple([int(x) for x in self.opts.max_res.split("x")])
self.min_res = (512, None)
self.mouse_position = (0, 0)
self.crop = [(None, None), (None, None), None]
self.crop_click = 0
self.point_click = 0
@@ -44,6 +46,11 @@ class Marker:
self.open()
self.calculate_res()
self.parse_timestamps()
if self.opts.start_time:
try:
self.nr = int(self.opts.start_time)
except ValueError:
self.nr = self.parse_time(self.opts.start_time)
self.loop()
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
@@ -89,14 +96,8 @@ class Marker:
if self.opts.crop:
w, h, x, y = [int(c) for c in self.opts.crop.split(":")]
self.crop = [
(
int(self.video_res[0] * x / self.video_res_original[0]),
int(self.video_res[1] * y / self.video_res_original[1]),
),
(
int(self.video_res[0] * w / self.video_res_original[0]),
int(self.video_res[1] * h / self.video_res_original[1]),
),
self.original_to_visual((x, y)),
self.original_to_visual((w, h)),
True,
]
self.bar_start = int(self.video_res[0] * 0.05)
@@ -196,10 +197,8 @@ class Marker:
1,
)
if self.crop_click == 1:
x, y = [
int(self.video_res_original[0] * self.crop[0][0] / self.video_res[0]),
int(self.video_res_original[1] * self.crop[0][1] / self.video_res[1]),
]
x, y = self.visual_to_original((self.crop[0][0], self.crop[0][1]))
self.shadow_text(
frame,
f"{x},{y}",
@@ -209,14 +208,8 @@ class Marker:
(0, 192, 192),
)
if self.crop_click == 2:
x, y = [
int(self.video_res_original[0] * self.crop[0][0] / self.video_res[0]),
int(self.video_res_original[1] * self.crop[0][1] / self.video_res[1]),
]
w, h = [
abs(int(self.video_res_original[0] * self.crop[1][0] / self.video_res[0])),
abs(int(self.video_res_original[1] * self.crop[1][1] / self.video_res[1])),
]
x, y = self.visual_to_original((self.crop[0][0], self.crop[0][1]))
w, h = self.visual_to_original((self.crop[1][0], self.crop[1][1]))
self.shadow_text(
frame,
f"{w}x{h}",
@@ -228,53 +221,95 @@ class Marker:
def draw_points(self, frame):
if self.point_click == 0:
if self.opts.output_points is None:
return
x, y = [self.video_res[0] - 120, 50]
self.shadow_text(
frame,
"Points: " + str(self.point_index),
(x, y),
0.5,
1,
(0, 192, 192),
)
try:
current = self.points_interpolated[self.point_index][self.nr]
for index in self.points:
if index == self.point_index and self.point_click == 1:
continue
current = self.get_interpolated_point(index=index)
if current[5] == 0:
continue
color = (0, 192, 192)
if current[3] == 2:
if current[5] == 2:
color = (60, 205, 60)
if current[3] == 1:
if current[5] == 1:
color = (192, 0, 192)
cv2.circle(frame, (current[1], current[2]), 10, color, 1)
history = list(range(max(1, int(self.nr - self.viewer_fps)), self.nr + 1))
for p in history:
self.points_interpolated[self.point_index][p - 1][1:2]
cv2.line(
cv2.circle(frame, (current[6], current[7]), 10, (0, 0, 0), 2)
cv2.circle(frame, (current[6], current[7]), 10, color, 1)
self.shadow_text(
frame,
index,
(current[6], current[7]),
0.5,
1,
color,
)
if self.point_click == 1:
# Draw crosshair
cv2.line(
frame, (self.mouse_position[0], 0), (self.mouse_position[0], self.video_res[1]), (128, 128, 128), 1
)
cv2.line(
frame, (0, self.mouse_position[1]), (self.video_res[0], self.mouse_position[1]), (128, 128, 128), 1
)
# Show current track
x, y = [self.video_res[0] - 120, 50]
self.shadow_text(
frame,
"Points: " + str(self.point_index),
(x, y),
0.5,
1,
(0, 192, 192),
)
try:
current = self.get_interpolated_point() # self.points_interpolated[self.point_index][self.nr]
color = (0, 192, 192)
if current[5] == 2:
color = (60, 205, 60)
if current[5] == 1:
color = (192, 0, 192)
cv2.rectangle(
frame,
tuple(self.points_interpolated[self.point_index][p - 1][1:3]),
tuple(self.points_interpolated[self.point_index][p][1:3]),
(192, 0, 192),
(current[1], current[2]),
(current[3], current[4]),
color,
1,
)
cv2.circle(frame, (current[6], current[7]), 10, color, 1)
except KeyError:
pass
except IndexError:
print(current, self.nr)
pass
try:
# ~ point_keys = list(sorted(self.points[self.point_index].keys()))
current = self.points[self.point_index][self.nr]
color = (60, 205, 60)
cv2.circle(frame, (current[0], current[1]), 13, color, 2)
except KeyError:
pass
except IndexError:
print(self.points[self.point_index])
print(self.nr)
pass
history = list(range(max(1, int(self.nr - self.viewer_fps)), self.nr + 1))
for p in history:
current = self.get_interpolated_point(p)
past = self.get_interpolated_point(p - 1)
cv2.line(
frame,
(past[6], past[7]),
(current[6], current[7]),
(192, 0, 192),
1,
)
except KeyError:
pass
except IndexError:
print(current, self.nr)
pass
try:
# ~ point_keys = list(sorted(self.points[self.point_index].keys()))
# current = self.points[self.point_index][self.nr]
current = self.get_point() # self.points_interpolated[self.point_index][self.nr]
color = (60, 205, 60)
cv2.circle(frame, (current[4], current[5]), 13, color, 2)
except KeyError:
pass
except IndexError:
# ~ print(self.points[self.point_index])
# ~ print(self.nr)
pass
def scan_point(self, direction):
try:
@@ -302,6 +337,115 @@ class Marker:
except Exception:
pass
def get_point(self, nr=None, index=None):
if nr is None:
nr = self.nr
if index is None:
index = self.point_index
if index in self.points:
if nr in self.points[index]:
return [
*self.points[index][nr],
int((self.points[index][nr][0] + self.points[index][nr][2]) / 2),
int((self.points[index][nr][1] + self.points[index][nr][3]) / 2),
]
return [None, None, None, None, None, None]
def get_interpolated_point(self, nr=None, index=None):
if nr is None:
nr = self.nr
if index is None:
index = self.point_index
if index in self.points:
if nr in self.points_interpolated[index]:
return [
*self.points_interpolated[index][nr],
int((self.points_interpolated[index][nr][1] + self.points_interpolated[index][nr][3]) / 2),
int((self.points_interpolated[index][nr][2] + self.points_interpolated[index][nr][4]) / 2),
]
return [None, None, None, None, None, None, None, None]
def modify_point(self, position, x, y):
"""position: tl topleft, br bottomright, c center"""
if position == "tl":
ix = 0
iy = 1
if position == "br":
ix = 2
iy = 3
if not self.point_index in self.points:
self.points[self.point_index] = {}
if not self.nr in self.points[self.point_index]:
if len(self.points[self.point_index]) > 0:
keys = sorted(list(self.points[self.point_index].keys()))
if self.nr > keys[-1]:
last_p = self.points[self.point_index][keys[-1]]
elif self.nr < keys[0]:
last_p = self.points[self.point_index][keys[0]]
else:
prev_key = keys[0]
for key in keys:
if key > self.nr:
last_p = self.points[self.point_index][prev_key]
break
prev_key = key
w = abs(last_p[2] - last_p[0])
h = abs(last_p[3] - last_p[1])
else:
w = 50
h = 50
if position == "tl":
self.points[self.point_index][self.nr] = [
x,
y,
min(self.video_res[0] - 1, x + w),
min(self.video_res[1] - 1, y + h),
]
if position == "br":
self.points[self.point_index][self.nr] = [max(0, x - w), max(0, y - h), x, y]
if position == "c":
self.points[self.point_index][self.nr] = [
max(0, int(x - w / 2)),
max(0, int(y - h / 2)),
min(self.video_res[0] - 1, int(x + w / 2)),
min(self.video_res[1] - 1, int(y + h / 2)),
]
else:
# not a new point
if position == "c":
current = self.points[self.point_index][self.nr]
w = abs(current[2] - current[0])
h = abs(current[3] - current[1])
self.points[self.point_index][self.nr] = [
max(0, int(x - w / 2)),
max(0, int(y - h / 2)),
min(self.video_res[0] - 1, int(x + w / 2)),
min(self.video_res[1] - 1, int(y + h / 2)),
]
else:
self.points[self.point_index][self.nr][ix] = x
self.points[self.point_index][self.nr][iy] = y
if self.points[self.point_index][self.nr][0] > self.points[self.point_index][self.nr][2]:
self.points[self.point_index][self.nr][2], self.points[self.point_index][self.nr][0] = (
self.points[self.point_index][self.nr][0],
self.points[self.point_index][self.nr][2],
)
if self.points[self.point_index][self.nr][1] > self.points[self.point_index][self.nr][3]:
self.points[self.point_index][self.nr][3], self.points[self.point_index][self.nr][1] = (
self.points[self.point_index][self.nr][1],
self.points[self.point_index][self.nr][3],
)
self.interpolate_points()
def interpolate_points(self):
if not self.point_index in self.points_interpolated:
@@ -309,16 +453,16 @@ class Marker:
if len(self.points[self.point_index]) == 1:
key = list(self.points[self.point_index].keys())[0]
x, y = self.points[self.point_index][key]
x, y, x2, y2 = self.points[self.point_index][key]
for key in range(self.frames):
self.points_interpolated[self.point_index][key] = [False, int(x), int(y), 0]
self.points_interpolated[self.point_index][self.nr][3] = 2
self.points_interpolated[self.point_index][key] = [False, int(x), int(y), x2, y2, 0]
self.points_interpolated[self.point_index][self.nr][5] = 2
else: # more points
point_keys = list(sorted(list(self.points[self.point_index].keys())))
point_values = [self.points[self.point_index][k] for k in point_keys]
xy = np.array(point_values).T
spline = PchipInterpolator(point_keys, xy, axis=1)
xyxy = np.array(point_values).T
spline = PchipInterpolator(point_keys, xyxy, axis=1)
start_key = min(point_keys)
end_key = max(point_keys) + 1
t2 = np.arange(start_key, end_key)
@@ -327,16 +471,32 @@ class Marker:
False,
self.points[self.point_index][start_key][0],
self.points[self.point_index][start_key][1],
self.points[self.point_index][start_key][2],
self.points[self.point_index][start_key][3],
0,
]
# interpolated points
for row in np.vstack((t2, spline(t2))).T:
self.points_interpolated[self.point_index][row[0]] = [True, int(row[1]), int(row[2]), 1]
self.points_interpolated[self.point_index][row[0]] = [
True,
int(row[1]),
int(row[2]),
int(row[3]),
int(row[4]),
1,
]
for key in range(end_key, self.frames + 1):
self.points_interpolated[self.point_index][key] = [False, int(row[1]), int(row[2]), 0]
self.points_interpolated[self.point_index][key] = [
False,
int(row[1]),
int(row[2]),
int(row[3]),
int(row[4]),
3,
]
# clicked points (not necessary, could determine at draw time!)
for key in point_keys:
self.points_interpolated[self.point_index][key][3] = 2
self.points_interpolated[self.point_index][key][5] = 2
def draw_help(self, frame):
@@ -362,7 +522,7 @@ class Marker:
formatted = "{} {}".format(
self.format_time(self.nr),
"||" if self.paused else "",
f"|| ({self.nr})" if self.paused else "",
)
self.shadow_text(frame, formatted, (left, bottom), 1.1, 2, (255, 255, 255))
@@ -387,7 +547,8 @@ class Marker:
space or click video
pause
a and s modify crop offset or size
f toggle 0.5x 1x or 2x FPS
p toggle bounding box drawing. follow with any key as index. left/middle/right mouse sets box position
f toggle 0.25x 1x or 4x FPS
v toggle HUD
h toggle help
q or esc quit
@@ -403,6 +564,7 @@ class Marker:
y > self.bar_top,
)
)
self.mouse_position = (x, y)
if self.crop_click == 1:
self.crop[0] = (x, y)
if event == cv2.EVENT_LBUTTONDOWN:
@@ -414,15 +576,16 @@ class Marker:
self.crop_click = 0
return
if self.point_click == 1:
if event == cv2.EVENT_LBUTTONDOWN:
self.modify_point("tl", int(x), int(y))
if event == cv2.EVENT_RBUTTONDOWN:
self.modify_point("br", int(x), int(y))
if event == cv2.EVENT_MBUTTONDOWN:
self.modify_point("c", int(x), int(y))
return
if event == cv2.EVENT_LBUTTONDOWN:
if self.point_click == 1:
if not self.point_index in self.points:
self.points[self.point_index] = {}
self.points[self.point_index][self.nr] = (int(x), int(y))
self.interpolate_points()
return
if in_bar:
click_relative = (x - self.bar_start) / (self.bar_end - self.bar_start)
self.nr = int(click_relative * self.frames)
@@ -431,13 +594,6 @@ class Marker:
self.paused = not self.paused
if event == cv2.EVENT_LBUTTONDBLCLK:
if self.point_click == 1:
if not self.point_index in self.points:
return
if self.nr in self.points[self.point_index]:
del self.points[self.point_index][self.nr]
return
if not in_bar:
self.toggle_stamp()
# doubleclick (toggle?)
@@ -485,6 +641,22 @@ class Marker:
else:
self.stamps = []
self.nr = 0
# Read boundinb boxes from JSON
if self.opts.input_points:
if os.path.exists(self.opts.input_points):
with open(self.opts.input_points, "rt") as fp:
self.points = json.load(fp)
for index in self.points:
self.point_index = index
self.points[index] = {int(k): v for k, v in self.points[index].items()}
for key in self.points[index]:
self.points[index][key] = [
*self.original_to_visual((self.points[index][key][0], self.points[index][key][1])),
*self.original_to_visual((self.points[index][key][2], self.points[index][key][3])),
]
self.interpolate_points()
print(f"Loaded points with index: {index}")
self.point_index = None
def print_help(self):
print(self.get_help())
@@ -495,14 +667,9 @@ class Marker:
cropstr = []
else:
self.opts.ffmpeg_copy = False
x, y = [
int(self.video_res_original[0] * self.crop[0][0] / self.video_res[0]),
int(self.video_res_original[1] * self.crop[0][1] / self.video_res[1]),
]
w, h = [
int(self.video_res_original[0] * self.crop[1][0] / self.video_res[0]),
int(self.video_res_original[1] * self.crop[1][1] / self.video_res[1]),
]
x, y = self.visual_to_original((self.crop[0][0], self.crop[0][1]))
w, h = self.visual_to_original((self.crop[1][0], self.crop[1][1]))
if w < 0:
x = x + w
w = -w
@@ -555,11 +722,23 @@ class Marker:
def save_timestamps(self):
if self.opts.output == None:
return
with open(self.opts.output, "wt") as fp:
for i, ts in enumerate(self.stamps):
fp.write("{},{},{}\n".format(self.format_time(ts), i + 1, ts))
if self.opts.output is not None:
with open(self.opts.output, "wt") as fp:
for i, ts in enumerate(self.stamps):
fp.write("{},{},{}\n".format(self.format_time(ts), i + 1, ts))
if self.opts.output_points is not None:
points = {}
for index in self.points.keys():
points[index] = {}
for key in sorted(self.points[index].keys()):
points[index][key] = [
*self.visual_to_original((self.points[index][key][0], self.points[index][key][1])),
*self.visual_to_original((self.points[index][key][2], self.points[index][key][3])),
]
with open(self.opts.output_points, "wt") as fp:
json.dump(points, fp, indent=2)
def shadow_text(self, frame, text, pos, size, thicc, color):
@@ -591,6 +770,20 @@ class Marker:
self.stamps.append(self.nr)
self.stamps.sort()
def original_to_visual(self, t):
"""display (x,y) to video resolution (x,y)"""
return (
int(self.video_res[0] * t[0] / self.video_res_original[0]),
int(self.video_res[1] * t[1] / self.video_res_original[1]),
)
def visual_to_original(self, t):
"""video resolution (x,y) to display (x,y)"""
return (
int(self.video_res_original[0] * t[0] / self.video_res[0]),
int(self.video_res_original[1] * t[1] / self.video_res[1]),
)
def loop(self):
self.step = 1
@@ -601,20 +794,19 @@ class Marker:
self.video_reader.set(cv2.CAP_PROP_POS_FRAMES, self.nr)
self.print_help()
cv2.namedWindow("tsmark")
cv2.namedWindow("tsmark", flags=cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO | cv2.WINDOW_GUI_NORMAL)
cv2.setMouseCallback("tsmark", self.mouse_click)
digits_ords = [ord(str(x)) for x in range(10)]
FPS_modifier = 1
FPS_modifiers = [0.5, 1, 2]
FPS_modifiers = [0.25, 1, 4]
while self.video_reader.isOpened():
show_time = time.time()
if (not self.paused) or self.read_next:
ret, frame = self.video_reader.read()
if ret == True:
if self.paused:
draw_wait = 200
else:
draw_wait = 1
draw_wait = 200 if self.paused or self.point_click == 0 else 1
if (not self.paused) or self.read_next:
self.read_next = False
frame_visu = cv2.resize(frame.copy(), self.video_res)
@@ -719,7 +911,9 @@ class Marker:
self.crop_click = 0 if self.crop_click == 2 else 2
self.crop[2] = True
elif k & 0xFF == ord("p"): # toggle points
self.point_click = 0 if self.point_click == 1 else 1
if self.opts.output_points is None:
continue
self.point_click = 1 - self.point_click
if self.point_click == 1:
self.shadow_text(
frame_visu,