rectangle annotations

2025-06-25 14:40:04 +03:00
parent 195045ff98
commit 233a0292d6
2 changed files with 331 additions and 104 deletions
--- a/tsmark/init.py
+++ b/tsmark/init.py
@@ -24,6 +24,22 @@ def get_options():
        required=False,
        help="Save timestamps to a CSV file",
    )
+    parser.add_argument(
+        "--op",
+        action="store",
+        dest="output_points",
+        default=None,
+        required=False,
+        help="Save points to a JSON file",
+    )
+    parser.add_argument(
+        "--ip",
+        action="store",
+        dest="input_points",
+        default=None,
+        required=False,
+        help="Load points from a JSON file",
+    )
    parser.add_argument(
        "--fps",
        action="store",
@@ -42,6 +58,23 @@ def get_options():
        type=str,
        help="predefined crop. Syntax: 'w:h:x:y' example: 1280:720:30:20",
    )
+    parser.add_argument(
+        "--ss",
+        action="store",
+        dest="start_time",
+        default=None,
+        required=False,
+        type=str,
+        help="Starting position as frame (int) or HH:MM:SS.ss",
+    )
+    parser.add_argument(
+        "--max-res",
+        action="store",
+        dest="max_res",
+        default="1280x720",
+        type=str,
+        help="Max resolution of video viewer: %(default)s",
+    )
    parser.add_argument(
        "--ffmpeg-copy",
        action="store_true",
--- a/tsmark/video_annotator.py
+++ b/tsmark/video_annotator.py
@@ -1,3 +1,4 @@
+import json
 import os
 import shlex
 import subprocess
@@ -29,8 +30,9 @@ class Marker:
        self.auto_step = True
        self.font = cv2.FONT_HERSHEY_SIMPLEX
        self.frame_visu = []
-        self.max_res = (1280, 720)
+        self.max_res = tuple([int(x) for x in self.opts.max_res.split("x")])
        self.min_res = (512, None)
+        self.mouse_position = (0, 0)
        self.crop = [(None, None), (None, None), None]
        self.crop_click = 0
        self.point_click = 0
@@ -44,6 +46,11 @@ class Marker:
            self.open()
            self.calculate_res()
            self.parse_timestamps()
+            if self.opts.start_time:
+                try:
+                    self.nr = int(self.opts.start_time)
+                except ValueError:
+                    self.nr = self.parse_time(self.opts.start_time)
            self.loop()
        except Exception as e:
            exc_type, exc_obj, exc_tb = sys.exc_info()
@@ -89,14 +96,8 @@ class Marker:
        if self.opts.crop:
            w, h, x, y = [int(c) for c in self.opts.crop.split(":")]
            self.crop = [
-                (
-                    int(self.video_res[0] * x / self.video_res_original[0]),
-                    int(self.video_res[1] * y / self.video_res_original[1]),
-                ),
-                (
-                    int(self.video_res[0] * w / self.video_res_original[0]),
-                    int(self.video_res[1] * h / self.video_res_original[1]),
-                ),
+                self.original_to_visual((x, y)),
+                self.original_to_visual((w, h)),
                True,
            ]
        self.bar_start = int(self.video_res[0] * 0.05)
@@ -196,10 +197,8 @@ class Marker:
            1,
        )
        if self.crop_click == 1:
-            x, y = [
-                int(self.video_res_original[0] * self.crop[0][0] / self.video_res[0]),
-                int(self.video_res_original[1] * self.crop[0][1] / self.video_res[1]),
-            ]
+            x, y = self.visual_to_original((self.crop[0][0], self.crop[0][1]))
+
            self.shadow_text(
                frame,
                f"{x},{y}",
@@ -209,14 +208,8 @@ class Marker:
                (0, 192, 192),
            )
        if self.crop_click == 2:
-            x, y = [
-                int(self.video_res_original[0] * self.crop[0][0] / self.video_res[0]),
-                int(self.video_res_original[1] * self.crop[0][1] / self.video_res[1]),
-            ]
-            w, h = [
-                abs(int(self.video_res_original[0] * self.crop[1][0] / self.video_res[0])),
-                abs(int(self.video_res_original[1] * self.crop[1][1] / self.video_res[1])),
-            ]
+            x, y = self.visual_to_original((self.crop[0][0], self.crop[0][1]))
+            w, h = self.visual_to_original((self.crop[1][0], self.crop[1][1]))
            self.shadow_text(
                frame,
                f"{w}x{h}",
@@ -228,53 +221,95 @@ class Marker:

    def draw_points(self, frame):

-        if self.point_click == 0:
+        if self.opts.output_points is None:
            return

-        x, y = [self.video_res[0] - 120, 50]
-        self.shadow_text(
-            frame,
-            "Points: " + str(self.point_index),
-            (x, y),
-            0.5,
-            1,
-            (0, 192, 192),
-        )
-        try:
-            current = self.points_interpolated[self.point_index][self.nr]
+        for index in self.points:
+            if index == self.point_index and self.point_click == 1:
+                continue
+
+            current = self.get_interpolated_point(index=index)
+            if current[5] == 0:
+                continue
            color = (0, 192, 192)
-            if current[3] == 2:
+            if current[5] == 2:
                color = (60, 205, 60)
-            if current[3] == 1:
+            if current[5] == 1:
                color = (192, 0, 192)
-            cv2.circle(frame, (current[1], current[2]), 10, color, 1)
-            history = list(range(max(1, int(self.nr - self.viewer_fps)), self.nr + 1))
-            for p in history:
-                self.points_interpolated[self.point_index][p - 1][1:2]
-                cv2.line(
+            cv2.circle(frame, (current[6], current[7]), 10, (0, 0, 0), 2)
+            cv2.circle(frame, (current[6], current[7]), 10, color, 1)
+            self.shadow_text(
+                frame,
+                index,
+                (current[6], current[7]),
+                0.5,
+                1,
+                color,
+            )
+
+        if self.point_click == 1:
+            # Draw crosshair
+            cv2.line(
+                frame, (self.mouse_position[0], 0), (self.mouse_position[0], self.video_res[1]), (128, 128, 128), 1
+            )
+            cv2.line(
+                frame, (0, self.mouse_position[1]), (self.video_res[0], self.mouse_position[1]), (128, 128, 128), 1
+            )
+            # Show current track
+            x, y = [self.video_res[0] - 120, 50]
+            self.shadow_text(
+                frame,
+                "Points: " + str(self.point_index),
+                (x, y),
+                0.5,
+                1,
+                (0, 192, 192),
+            )
+            try:
+                current = self.get_interpolated_point()  # self.points_interpolated[self.point_index][self.nr]
+                color = (0, 192, 192)
+                if current[5] == 2:
+                    color = (60, 205, 60)
+                if current[5] == 1:
+                    color = (192, 0, 192)
+                cv2.rectangle(
                    frame,
-                    tuple(self.points_interpolated[self.point_index][p - 1][1:3]),
-                    tuple(self.points_interpolated[self.point_index][p][1:3]),
-                    (192, 0, 192),
+                    (current[1], current[2]),
+                    (current[3], current[4]),
+                    color,
                    1,
                )
+                cv2.circle(frame, (current[6], current[7]), 10, color, 1)

-        except KeyError:
-            pass
-        except IndexError:
-            print(current, self.nr)
-            pass
-        try:
-            # ~ point_keys = list(sorted(self.points[self.point_index].keys()))
-            current = self.points[self.point_index][self.nr]
-            color = (60, 205, 60)
-            cv2.circle(frame, (current[0], current[1]), 13, color, 2)
-        except KeyError:
-            pass
-        except IndexError:
-            print(self.points[self.point_index])
-            print(self.nr)
-            pass
+                history = list(range(max(1, int(self.nr - self.viewer_fps)), self.nr + 1))
+                for p in history:
+                    current = self.get_interpolated_point(p)
+                    past = self.get_interpolated_point(p - 1)
+                    cv2.line(
+                        frame,
+                        (past[6], past[7]),
+                        (current[6], current[7]),
+                        (192, 0, 192),
+                        1,
+                    )
+
+            except KeyError:
+                pass
+            except IndexError:
+                print(current, self.nr)
+                pass
+            try:
+                # ~ point_keys = list(sorted(self.points[self.point_index].keys()))
+                # current = self.points[self.point_index][self.nr]
+                current = self.get_point()  # self.points_interpolated[self.point_index][self.nr]
+                color = (60, 205, 60)
+                cv2.circle(frame, (current[4], current[5]), 13, color, 2)
+            except KeyError:
+                pass
+            except IndexError:
+                # ~ print(self.points[self.point_index])
+                # ~ print(self.nr)
+                pass

    def scan_point(self, direction):
        try:
@@ -302,6 +337,115 @@ class Marker:
        except Exception:
            pass

+    def get_point(self, nr=None, index=None):
+        if nr is None:
+            nr = self.nr
+        if index is None:
+            index = self.point_index
+        if index in self.points:
+
+            if nr in self.points[index]:
+                return [
+                    *self.points[index][nr],
+                    int((self.points[index][nr][0] + self.points[index][nr][2]) / 2),
+                    int((self.points[index][nr][1] + self.points[index][nr][3]) / 2),
+                ]
+
+        return [None, None, None, None, None, None]
+
+    def get_interpolated_point(self, nr=None, index=None):
+        if nr is None:
+            nr = self.nr
+        if index is None:
+            index = self.point_index
+
+        if index in self.points:
+            if nr in self.points_interpolated[index]:
+                return [
+                    *self.points_interpolated[index][nr],
+                    int((self.points_interpolated[index][nr][1] + self.points_interpolated[index][nr][3]) / 2),
+                    int((self.points_interpolated[index][nr][2] + self.points_interpolated[index][nr][4]) / 2),
+                ]
+
+        return [None, None, None, None, None, None, None, None]
+
+    def modify_point(self, position, x, y):
+        """position:  tl topleft, br bottomright, c center"""
+        if position == "tl":
+            ix = 0
+            iy = 1
+        if position == "br":
+            ix = 2
+            iy = 3
+
+        if not self.point_index in self.points:
+            self.points[self.point_index] = {}
+        if not self.nr in self.points[self.point_index]:
+            if len(self.points[self.point_index]) > 0:
+                keys = sorted(list(self.points[self.point_index].keys()))
+                if self.nr > keys[-1]:
+                    last_p = self.points[self.point_index][keys[-1]]
+                elif self.nr < keys[0]:
+                    last_p = self.points[self.point_index][keys[0]]
+                else:
+                    prev_key = keys[0]
+                    for key in keys:
+                        if key > self.nr:
+                            last_p = self.points[self.point_index][prev_key]
+                            break
+                        prev_key = key
+                w = abs(last_p[2] - last_p[0])
+                h = abs(last_p[3] - last_p[1])
+            else:
+                w = 50
+                h = 50
+
+            if position == "tl":
+                self.points[self.point_index][self.nr] = [
+                    x,
+                    y,
+                    min(self.video_res[0] - 1, x + w),
+                    min(self.video_res[1] - 1, y + h),
+                ]
+            if position == "br":
+                self.points[self.point_index][self.nr] = [max(0, x - w), max(0, y - h), x, y]
+            if position == "c":
+                self.points[self.point_index][self.nr] = [
+                    max(0, int(x - w / 2)),
+                    max(0, int(y - h / 2)),
+                    min(self.video_res[0] - 1, int(x + w / 2)),
+                    min(self.video_res[1] - 1, int(y + h / 2)),
+                ]
+
+        else:
+            # not a new point
+            if position == "c":
+                current = self.points[self.point_index][self.nr]
+                w = abs(current[2] - current[0])
+                h = abs(current[3] - current[1])
+                self.points[self.point_index][self.nr] = [
+                    max(0, int(x - w / 2)),
+                    max(0, int(y - h / 2)),
+                    min(self.video_res[0] - 1, int(x + w / 2)),
+                    min(self.video_res[1] - 1, int(y + h / 2)),
+                ]
+            else:
+                self.points[self.point_index][self.nr][ix] = x
+                self.points[self.point_index][self.nr][iy] = y
+
+        if self.points[self.point_index][self.nr][0] > self.points[self.point_index][self.nr][2]:
+            self.points[self.point_index][self.nr][2], self.points[self.point_index][self.nr][0] = (
+                self.points[self.point_index][self.nr][0],
+                self.points[self.point_index][self.nr][2],
+            )
+        if self.points[self.point_index][self.nr][1] > self.points[self.point_index][self.nr][3]:
+            self.points[self.point_index][self.nr][3], self.points[self.point_index][self.nr][1] = (
+                self.points[self.point_index][self.nr][1],
+                self.points[self.point_index][self.nr][3],
+            )
+
+        self.interpolate_points()
+
    def interpolate_points(self):

        if not self.point_index in self.points_interpolated:
@@ -309,16 +453,16 @@ class Marker:

        if len(self.points[self.point_index]) == 1:
            key = list(self.points[self.point_index].keys())[0]
-            x, y = self.points[self.point_index][key]
+            x, y, x2, y2 = self.points[self.point_index][key]
            for key in range(self.frames):
-                self.points_interpolated[self.point_index][key] = [False, int(x), int(y), 0]
-            self.points_interpolated[self.point_index][self.nr][3] = 2
+                self.points_interpolated[self.point_index][key] = [False, int(x), int(y), x2, y2, 0]
+            self.points_interpolated[self.point_index][self.nr][5] = 2

        else:  # more points
            point_keys = list(sorted(list(self.points[self.point_index].keys())))
            point_values = [self.points[self.point_index][k] for k in point_keys]
-            xy = np.array(point_values).T
-            spline = PchipInterpolator(point_keys, xy, axis=1)
+            xyxy = np.array(point_values).T
+            spline = PchipInterpolator(point_keys, xyxy, axis=1)
            start_key = min(point_keys)
            end_key = max(point_keys) + 1
            t2 = np.arange(start_key, end_key)
@@ -327,16 +471,32 @@ class Marker:
                    False,
                    self.points[self.point_index][start_key][0],
                    self.points[self.point_index][start_key][1],
+                    self.points[self.point_index][start_key][2],
+                    self.points[self.point_index][start_key][3],
                    0,
                ]
            # interpolated points
            for row in np.vstack((t2, spline(t2))).T:
-                self.points_interpolated[self.point_index][row[0]] = [True, int(row[1]), int(row[2]), 1]
+                self.points_interpolated[self.point_index][row[0]] = [
+                    True,
+                    int(row[1]),
+                    int(row[2]),
+                    int(row[3]),
+                    int(row[4]),
+                    1,
+                ]
            for key in range(end_key, self.frames + 1):
-                self.points_interpolated[self.point_index][key] = [False, int(row[1]), int(row[2]), 0]
+                self.points_interpolated[self.point_index][key] = [
+                    False,
+                    int(row[1]),
+                    int(row[2]),
+                    int(row[3]),
+                    int(row[4]),
+                    3,
+                ]
            # clicked points (not necessary, could determine at draw time!)
            for key in point_keys:
-                self.points_interpolated[self.point_index][key][3] = 2
+                self.points_interpolated[self.point_index][key][5] = 2

    def draw_help(self, frame):

@@ -362,7 +522,7 @@ class Marker:

        formatted = "{} {}".format(
            self.format_time(self.nr),
-            "||" if self.paused else "",
+            f"|| ({self.nr})" if self.paused else "",
        )
        self.shadow_text(frame, formatted, (left, bottom), 1.1, 2, (255, 255, 255))

@@ -387,7 +547,8 @@ class Marker:
    space or click video
              pause
    a and s   modify crop offset or size
-    f         toggle 0.5x 1x or 2x FPS
+    p         toggle bounding box drawing. follow with any key as index.  left/middle/right mouse sets box position
+    f         toggle 0.25x 1x or 4x FPS
    v         toggle HUD
    h         toggle help
    q or esc  quit
@@ -403,6 +564,7 @@ class Marker:
                y > self.bar_top,
            )
        )
+        self.mouse_position = (x, y)
        if self.crop_click == 1:
            self.crop[0] = (x, y)
            if event == cv2.EVENT_LBUTTONDOWN:
@@ -414,15 +576,16 @@ class Marker:
                self.crop_click = 0
            return

+        if self.point_click == 1:
+            if event == cv2.EVENT_LBUTTONDOWN:
+                self.modify_point("tl", int(x), int(y))
+            if event == cv2.EVENT_RBUTTONDOWN:
+                self.modify_point("br", int(x), int(y))
+            if event == cv2.EVENT_MBUTTONDOWN:
+                self.modify_point("c", int(x), int(y))
+            return
+
        if event == cv2.EVENT_LBUTTONDOWN:
-            if self.point_click == 1:
-                if not self.point_index in self.points:
-                    self.points[self.point_index] = {}
-                self.points[self.point_index][self.nr] = (int(x), int(y))
-                self.interpolate_points()
-
-                return
-
            if in_bar:
                click_relative = (x - self.bar_start) / (self.bar_end - self.bar_start)
                self.nr = int(click_relative * self.frames)
@@ -431,13 +594,6 @@ class Marker:
                self.paused = not self.paused

        if event == cv2.EVENT_LBUTTONDBLCLK:
-            if self.point_click == 1:
-                if not self.point_index in self.points:
-                    return
-                if self.nr in self.points[self.point_index]:
-                    del self.points[self.point_index][self.nr]
-                return
-
            if not in_bar:
                self.toggle_stamp()
            # doubleclick (toggle?)
@@ -485,6 +641,22 @@ class Marker:
        else:
            self.stamps = []
            self.nr = 0
+        # Read boundinb boxes from JSON
+        if self.opts.input_points:
+            if os.path.exists(self.opts.input_points):
+                with open(self.opts.input_points, "rt") as fp:
+                    self.points = json.load(fp)
+                for index in self.points:
+                    self.point_index = index
+                    self.points[index] = {int(k): v for k, v in self.points[index].items()}
+                    for key in self.points[index]:
+                        self.points[index][key] = [
+                            *self.original_to_visual((self.points[index][key][0], self.points[index][key][1])),
+                            *self.original_to_visual((self.points[index][key][2], self.points[index][key][3])),
+                        ]
+                    self.interpolate_points()
+                    print(f"Loaded points with index: {index}")
+                self.point_index = None

    def print_help(self):
        print(self.get_help())
@@ -495,14 +667,9 @@ class Marker:
            cropstr = []
        else:
            self.opts.ffmpeg_copy = False
-            x, y = [
-                int(self.video_res_original[0] * self.crop[0][0] / self.video_res[0]),
-                int(self.video_res_original[1] * self.crop[0][1] / self.video_res[1]),
-            ]
-            w, h = [
-                int(self.video_res_original[0] * self.crop[1][0] / self.video_res[0]),
-                int(self.video_res_original[1] * self.crop[1][1] / self.video_res[1]),
-            ]
+            x, y = self.visual_to_original((self.crop[0][0], self.crop[0][1]))
+            w, h = self.visual_to_original((self.crop[1][0], self.crop[1][1]))
+
            if w < 0:
                x = x + w
                w = -w
@@ -555,11 +722,23 @@ class Marker:

    def save_timestamps(self):

-        if self.opts.output == None:
-            return
-        with open(self.opts.output, "wt") as fp:
-            for i, ts in enumerate(self.stamps):
-                fp.write("{},{},{}\n".format(self.format_time(ts), i + 1, ts))
+        if self.opts.output is not None:
+            with open(self.opts.output, "wt") as fp:
+                for i, ts in enumerate(self.stamps):
+                    fp.write("{},{},{}\n".format(self.format_time(ts), i + 1, ts))
+
+        if self.opts.output_points is not None:
+            points = {}
+            for index in self.points.keys():
+                points[index] = {}
+                for key in sorted(self.points[index].keys()):
+                    points[index][key] = [
+                        *self.visual_to_original((self.points[index][key][0], self.points[index][key][1])),
+                        *self.visual_to_original((self.points[index][key][2], self.points[index][key][3])),
+                    ]
+
+            with open(self.opts.output_points, "wt") as fp:
+                json.dump(points, fp, indent=2)

    def shadow_text(self, frame, text, pos, size, thicc, color):

@@ -591,6 +770,20 @@ class Marker:
            self.stamps.append(self.nr)
        self.stamps.sort()

+    def original_to_visual(self, t):
+        """display (x,y) to video resolution (x,y)"""
+        return (
+            int(self.video_res[0] * t[0] / self.video_res_original[0]),
+            int(self.video_res[1] * t[1] / self.video_res_original[1]),
+        )
+
+    def visual_to_original(self, t):
+        """video resolution (x,y) to display (x,y)"""
+        return (
+            int(self.video_res_original[0] * t[0] / self.video_res[0]),
+            int(self.video_res_original[1] * t[1] / self.video_res[1]),
+        )
+
    def loop(self):

        self.step = 1
@@ -601,20 +794,19 @@ class Marker:
        self.video_reader.set(cv2.CAP_PROP_POS_FRAMES, self.nr)

        self.print_help()
-        cv2.namedWindow("tsmark")
+        cv2.namedWindow("tsmark", flags=cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO | cv2.WINDOW_GUI_NORMAL)
        cv2.setMouseCallback("tsmark", self.mouse_click)
        digits_ords = [ord(str(x)) for x in range(10)]
        FPS_modifier = 1
-        FPS_modifiers = [0.5, 1, 2]
+        FPS_modifiers = [0.25, 1, 4]
        while self.video_reader.isOpened():
            show_time = time.time()
            if (not self.paused) or self.read_next:
                ret, frame = self.video_reader.read()
            if ret == True:
-                if self.paused:
-                    draw_wait = 200
-                else:
-                    draw_wait = 1
+
+                draw_wait = 200 if self.paused or self.point_click == 0 else 1
+
                if (not self.paused) or self.read_next:
                    self.read_next = False
                frame_visu = cv2.resize(frame.copy(), self.video_res)
@@ -719,7 +911,9 @@ class Marker:
                    self.crop_click = 0 if self.crop_click == 2 else 2
                    self.crop[2] = True
                elif k & 0xFF == ord("p"):  # toggle points
-                    self.point_click = 0 if self.point_click == 1 else 1
+                    if self.opts.output_points is None:
+                        continue
+                    self.point_click = 1 - self.point_click
                    if self.point_click == 1:
                        self.shadow_text(
                            frame_visu,