add more ways to modify boxes

This commit is contained in:
q
2025-08-26 20:00:20 +03:00
parent c35a3e90bd
commit 68f060b204
2 changed files with 144 additions and 30 deletions

View File

@@ -2,7 +2,7 @@ import argparse
from tsmark.video_annotator import Marker from tsmark.video_annotator import Marker
VERSION = "0.7.11" VERSION = "0.7.12"
class SmartFormatter(argparse.HelpFormatter): class SmartFormatter(argparse.HelpFormatter):

View File

@@ -40,8 +40,10 @@ class Marker:
self.max_res = tuple([int(x) for x in self.opts.max_res.split("x")]) self.max_res = tuple([int(x) for x in self.opts.max_res.split("x")])
self.min_res = (512, None) self.min_res = (512, None)
self.mouse_position = (0, 0) self.mouse_position = (0, 0)
self.mouse_flags = {"shift": False, "ctrl": False, "alt": False}
self.crop = [(None, None), (None, None), None] self.crop = [(None, None), (None, None), None]
self.crop_click = 0 self.crop_click = 0
self.button_r_down = None
self.point_click = 0 self.point_click = 0
self.point_tracking = 0 self.point_tracking = 0
self.point_tracking_length = float(self.opts.max_track) self.point_tracking_length = float(self.opts.max_track)
@@ -180,13 +182,31 @@ class Marker:
if self.point_click == 1 and self.point_index in self.points: if self.point_click == 1 and self.point_index in self.points:
bar_middle = int((self.bar_top + self.bar_bottom) / 2) bar_middle = int((self.bar_top + self.bar_bottom) / 2)
point_y_map = {
key: int(self.bar_top + (i + 1) * (self.bar_bottom - self.bar_top) / (1 + len(POINT_VISIBILITY)))
for i, key in enumerate(POINT_VISIBILITY)
}
for ts in self.points[self.point_index]: for ts in self.points[self.point_index]:
p_pos = int(self.bar_start + ts / self.frames * (self.bar_end - self.bar_start)) p_pos = int(self.bar_start + ts / self.frames * (self.bar_end - self.bar_start))
cv2.circle(frame, (p_pos, bar_middle), 3, (32, 32, 32), -1) y_pos = point_y_map.get(self.points[self.point_index][ts]["visible"], bar_middle)
cv2.circle(
frame,
(p_pos, y_pos),
3,
(32, 32, 32),
-1,
)
for ts in self.points[self.point_index]: for ts in self.points[self.point_index]:
p_pos = int(self.bar_start + ts / self.frames * (self.bar_end - self.bar_start)) p_pos = int(self.bar_start + ts / self.frames * (self.bar_end - self.bar_start))
y_pos = point_y_map.get(self.points[self.point_index][ts]["visible"], bar_middle)
color = self.get_point_color(self.points[self.point_index][ts]) color = self.get_point_color(self.points[self.point_index][ts])
cv2.circle(frame, (p_pos, bar_middle), 1, color, -1) cv2.circle(
frame,
(p_pos, y_pos),
1,
color,
-1,
)
cv2.line( cv2.line(
frame, frame,
@@ -313,6 +333,27 @@ class Marker:
cv2.circle(frame, (current["cx"], current["cy"]), 10, color, 1) cv2.circle(frame, (current["cx"], current["cy"]), 10, color, 1)
if self.mouse_flags["ctrl"] and current["type"] == "key":
nearest_wall = self.get_snap(current, self.mouse_position)
thick_x0 = current[nearest_wall] if "x" in nearest_wall else current["x0"]
thick_x1 = current[nearest_wall] if "x" in nearest_wall else current["x1"]
thick_y0 = current[nearest_wall] if "y" in nearest_wall else current["y0"]
thick_y1 = current[nearest_wall] if "y" in nearest_wall else current["y1"]
cv2.line(frame, (thick_x0, thick_y0), (thick_x1, thick_y1), color, 5)
if self.mouse_flags["shift"]:
key_combos = (("x0", "y0"), ("x0", "y1"), ("x1", "y1"), ("x1", "y0"))
dists = [
(
(px, py),
abs(self.mouse_position[0] - current[px]) + abs(self.mouse_position[1] - current[py]),
)
for i, (px, py) in enumerate(key_combos)
]
dists.sort(key=lambda d: d[1])
cv2.circle(frame, (current[dists[0][0][0]], current[dists[0][0][1]]), 5, color, -1)
except (KeyError, IndexError, TypeError): except (KeyError, IndexError, TypeError):
# print(self.get_interpolated_point(), self.nr) # print(self.get_interpolated_point(), self.nr)
pass pass
@@ -437,6 +478,7 @@ class Marker:
if index in self.points_interpolated: if index in self.points_interpolated:
if nr in self.points_interpolated[index]: if nr in self.points_interpolated[index]:
try:
value = self.points_interpolated[index][nr].copy() value = self.points_interpolated[index][nr].copy()
value.update( value.update(
{ {
@@ -445,6 +487,8 @@ class Marker:
} }
) )
return value return value
except KeyError:
pass
return { return {
"x0": None, "x0": None,
@@ -458,6 +502,15 @@ class Marker:
"age": None, "age": None,
} }
def get_snap(self, bbox, xy):
"Return x0/x1/y1/y0 side depending which the xy coord is farthest"
snap_map = {True: {True: "y1", False: "x0"}, False: {True: "x1", False: "y0"}}
x_diff = xy[0] - bbox["cx"]
y_diff = xy[1] - bbox["cy"]
return snap_map[x_diff > y_diff][x_diff < -y_diff]
def convert_interpolated_points(self): def convert_interpolated_points(self):
if self.point_click == 1 and self.point_index in self.points: if self.point_click == 1 and self.point_index in self.points:
@@ -475,12 +528,32 @@ class Marker:
} }
def modify_point(self, position, x, y): def modify_point(self, position, x, y):
"""position: tl topleft, br bottomright, c center""" """position: tl topleft, br bottomright, c center, n nearest, snap to nearest wall"""
def get_points_by_nearest(last_p, x, y, w=None, h=None):
# Modifies in place!
# nw,sw,se,ne
key_combos = (("x0", "y0"), ("x0", "y1"), ("x1", "y1"), ("x1", "y0"))
# manhattan distances
dists = [(i, abs(x - last_p[px]) + abs(y - last_p[py])) for i, (px, py) in enumerate(key_combos)]
dists.sort(key=lambda d: d[1])
opposite = (dists[0][0] + 2) % len(key_combos)
last_p.update({key_combos[dists[0][0]][0]: x, key_combos[dists[0][0]][1]: y})
if w is not None:
last_p[key_combos[opposite][0]] = x + w if key_combos[opposite][0] == "x1" else x - w
if h is not None:
last_p[key_combos[opposite][1]] = y + h if key_combos[opposite][1] == "y1" else y - h
return last_p
if not self.point_index in self.points: if not self.point_index in self.points:
self.points[self.point_index] = {} self.points[self.point_index] = {}
if not self.nr in self.points[self.point_index]: if not self.nr in self.points[self.point_index]:
# new point
if len(self.points[self.point_index]) > 0: if len(self.points[self.point_index]) > 0:
# find previous point
keys = sorted(list(self.points[self.point_index].keys())) keys = sorted(list(self.points[self.point_index].keys()))
if self.nr > keys[-1]: # last point if at end of track if self.nr > keys[-1]: # last point if at end of track
last_p = self.points[self.point_index][keys[-1]] last_p = self.points[self.point_index][keys[-1]]
@@ -488,11 +561,17 @@ class Marker:
last_p = self.points[self.point_index][keys[0]] last_p = self.points[self.point_index][keys[0]]
else: # previous point if in the middle of track else: # previous point if in the middle of track
prev_key = keys[0] prev_key = keys[0]
for key in keys: prev_key = self.nr - 1
if key > self.nr: while True:
if prev_key < 0:
break
if prev_key > self.frames:
break
if prev_key in self.points[self.point_index]:
last_p = self.points[self.point_index][prev_key] last_p = self.points[self.point_index][prev_key]
break break
prev_key = key prev_key -= 1
w = abs(last_p["x1"] - last_p["x0"]) w = abs(last_p["x1"] - last_p["x0"])
h = abs(last_p["y1"] - last_p["y0"]) h = abs(last_p["y1"] - last_p["y0"])
visibility = last_p["visible"] visibility = last_p["visible"]
@@ -500,6 +579,13 @@ class Marker:
w = 50 w = 50
h = 50 h = 50
visibility = POINT_VISIBILITY[0] visibility = POINT_VISIBILITY[0]
last_p = {
"x0": int((self.video_res[1] - w) / 2),
"y0": int((self.video_res[0] - h) / 2),
"x1": int((self.video_res[1] + w) / 2),
"y1": int((self.video_res[0] + h) / 2),
"visible": POINT_VISIBILITY[0],
}
if position == "tl": if position == "tl":
self.points[self.point_index][self.nr] = { self.points[self.point_index][self.nr] = {
@@ -525,6 +611,8 @@ class Marker:
"y1": min(self.video_res[1] - 1, int(y + h / 2)), "y1": min(self.video_res[1] - 1, int(y + h / 2)),
"visible": visibility, "visible": visibility,
} }
if position == "n":
self.points[self.point_index][self.nr] = get_points_by_nearest(last_p, x, y, w, h)
else: else:
# not a new point # not a new point
@@ -548,6 +636,22 @@ class Marker:
self.points[self.point_index][self.nr]["x1"] = x self.points[self.point_index][self.nr]["x1"] = x
self.points[self.point_index][self.nr]["y1"] = y self.points[self.point_index][self.nr]["y1"] = y
elif position == "n":
self.points[self.point_index][self.nr] = get_points_by_nearest(
self.points[self.point_index][self.nr], x, y
)
elif position == "snap":
nearest_wall = self.get_snap(self.get_point(index=self.point_index, nr=self.nr), (x, y))
if nearest_wall in ("x0", "y0"):
self.points[self.point_index][self.nr][nearest_wall] = 0
elif nearest_wall == "x1":
self.points[self.point_index][self.nr][nearest_wall] = self.video_res[0]
elif nearest_wall == "y1":
self.points[self.point_index][self.nr][nearest_wall] = self.video_res[1]
if self.nr in self.points[self.point_index]:
if self.points[self.point_index][self.nr]["x0"] > self.points[self.point_index][self.nr]["x1"]: if self.points[self.point_index][self.nr]["x0"] > self.points[self.point_index][self.nr]["x1"]:
self.points[self.point_index][self.nr]["x1"], self.points[self.point_index][self.nr]["x0"] = ( self.points[self.point_index][self.nr]["x1"], self.points[self.point_index][self.nr]["x0"] = (
self.points[self.point_index][self.nr]["x0"], self.points[self.point_index][self.nr]["x0"],
@@ -919,7 +1023,7 @@ class World:
x toggle (delete) key frame x toggle (delete) key frame
r convert interpolated points to points (no undo!) r convert interpolated points to points (no undo!)
u toggle automatic interpolation u toggle automatic interpolation
mouse left: set top-left corner of box mouse left: set top-left corner of box. shift: modify nearest corner, ctrl: side to image edge
mouse middle: set center of box mouse middle: set center of box
mouse right: set lower right corner of box mouse right: set lower right corner of box
e set width/height of box symmetric around center e set width/height of box symmetric around center
@@ -946,6 +1050,10 @@ class World:
) )
) )
self.mouse_position = (x, y) self.mouse_position = (x, y)
self.mouse_flags["shift"] = flags & cv2.EVENT_FLAG_SHIFTKEY
self.mouse_flags["ctrl"] = flags & cv2.EVENT_FLAG_CTRLKEY
self.mouse_flags["alt"] = flags & cv2.EVENT_FLAG_ALTKEY
if self.crop_click == 1: if self.crop_click == 1:
self.crop[0] = (x, y) self.crop[0] = (x, y)
if event == cv2.EVENT_LBUTTONDOWN: if event == cv2.EVENT_LBUTTONDOWN:
@@ -959,11 +1067,17 @@ class World:
if self.point_click == 1: if self.point_click == 1:
if event == cv2.EVENT_LBUTTONDOWN: if event == cv2.EVENT_LBUTTONDOWN:
if self.mouse_flags["ctrl"]:
self.modify_point("snap", int(x), int(y))
elif self.mouse_flags["shift"]:
self.modify_point("n", int(x), int(y))
else:
self.modify_point("tl", int(x), int(y)) self.modify_point("tl", int(x), int(y))
if event == cv2.EVENT_RBUTTONDOWN: elif event == cv2.EVENT_RBUTTONDOWN:
self.modify_point("br", int(x), int(y)) self.modify_point("br", int(x), int(y))
if event == cv2.EVENT_MBUTTONDOWN: elif event == cv2.EVENT_MBUTTONDOWN:
self.modify_point("c", int(x), int(y)) self.modify_point("c", int(x), int(y))
return return
if event == cv2.EVENT_LBUTTONDOWN: if event == cv2.EVENT_LBUTTONDOWN: