|
28 | 28 | # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29 | 29 |
|
30 | 30 | import configparser
|
31 |
| -import errno |
32 | 31 | import json
|
33 | 32 | import logging
|
34 | 33 | import math
|
35 | 34 | import os
|
36 |
| -import shutil |
37 | 35 | import sys
|
38 | 36 | import time
|
39 | 37 | from io import BytesIO
|
40 | 38 | from pathlib import Path
|
41 |
| -from typing import Any, Dict, List, Optional, Tuple, Union |
| 39 | +from typing import Any, Dict, List, Tuple, Union |
42 | 40 |
|
43 | 41 | import torch
|
44 | 42 | import torch.nn as nn
|
45 |
| -from torch import Tensor |
46 | 43 |
|
47 | 44 | from compressai_vision.codecs.utils import FpnUtils
|
48 | 45 | from compressai_vision.model_wrappers import BaseWrapper
|
|
53 | 50 |
|
54 | 51 | from .encdec_utils import *
|
55 | 52 | from .encdec_utils.png_yuv import PngFilesToYuvFileConverter, YuvFileToPngFilesConverter
|
56 |
| -from .utils import ( |
57 |
| - MIN_MAX_DATASET, |
58 |
| - compute_frame_resolution, |
59 |
| - min_max_inv_normalization, |
60 |
| - min_max_normalization, |
61 |
| - tensor_to_tiled, |
62 |
| - tiled_to_tensor, |
63 |
| -) |
| 53 | +from .utils import MIN_MAX_DATASET, min_max_inv_normalization, min_max_normalization |
64 | 54 |
|
65 | 55 |
|
66 | 56 | def get_filesize(filepath: Union[Path, str]) -> int:
|
@@ -405,231 +395,6 @@ def get_decode_cmd(
|
405 | 395 | self.logger.debug(cmd)
|
406 | 396 | return cmd
|
407 | 397 |
|
408 |
| - def convert_input_to_yuv(self, input: Dict, file_prefix: str): |
409 |
| - """ |
410 |
| - Converts the input image or video to YUV format using ffmpeg, or use existing YUV if available. |
411 |
| - Args: |
412 |
| - input (Dict): A dictionary containing information about the input. It should have the following keys: |
413 |
| - - file_names (List[str]): A list of file names for the input. If it contains more than one file, it is considered a video. |
414 |
| - - last_frame (int): The last frame number of the video. |
415 |
| - - frame_skip (int): The number of frames to skip in the video. |
416 |
| - - org_input_size (Dict[str, int]): A dictionary containing the width and height of the input. |
417 |
| - file_prefix (str): The prefix for the output file name. |
418 |
| - Returns: |
419 |
| - Tuple[str, int, int, int, str]: A tuple containing the following: |
420 |
| - - yuv_in_path (str): The path to the converted YUV input file. |
421 |
| - - nb_frames (int): The number of frames in the input. |
422 |
| - - frame_width (int): The width of the frames in the input. |
423 |
| - - frame_height (int): The height of the frames in the input. |
424 |
| - - file_prefix (str): The updated file prefix. |
425 |
| - Raises: |
426 |
| - AssertionError: If the number of images in the input folder does not match the expected number of frames. |
427 |
| - """ |
428 |
| - file_names = input["file_names"] |
429 |
| - yuv_file = None |
430 |
| - if len(file_names) > 1: # video |
431 |
| - # NOTE: using glob for now, should be more robust and look at skipped |
432 |
| - # NOTE: somewhat rigid pattern (lowercase png) |
433 |
| - filename_pattern = f"{str(Path(file_names[0]).parent)}/*.png" |
434 |
| - nb_frames = input["last_frame"] - input["frame_skip"] |
435 |
| - images_in_folder = len( |
436 |
| - [file for file in Path(file_names[0]).parent.glob("*.png")] |
437 |
| - ) |
438 |
| - assert ( |
439 |
| - images_in_folder == nb_frames |
440 |
| - ), f"input folder contains {images_in_folder} images, {nb_frames} were expected" |
441 |
| - |
442 |
| - input_info = [ |
443 |
| - "-pattern_type", |
444 |
| - "glob", |
445 |
| - "-i", |
446 |
| - filename_pattern, |
447 |
| - ] |
448 |
| - |
449 |
| - yuv_file = Path(f"{Path(file_names[0]).parent.parent}.yuv") |
450 |
| - print(f"Checking if YUV is available: {yuv_file}") |
451 |
| - if not yuv_file.is_file(): |
452 |
| - yuv_file = None |
453 |
| - else: |
454 |
| - input_info = ["-i", file_names[0]] |
455 |
| - |
456 |
| - chroma_format = self.enc_cfgs["chroma_format"] |
457 |
| - input_bitdepth = self.enc_cfgs["input_bitdepth"] |
458 |
| - |
459 |
| - frame_width = math.ceil(input["org_input_size"]["width"] / 2) * 2 |
460 |
| - frame_height = math.ceil(input["org_input_size"]["height"] / 2) * 2 |
461 |
| - file_prefix = f"{file_prefix}_{frame_width}x{frame_height}_{self.frame_rate}fps_{input_bitdepth}bit_p{chroma_format}" |
462 |
| - yuv_in_path = f"{file_prefix}_input.yuv" |
463 |
| - |
464 |
| - pix_fmt_suffix = "10le" if input_bitdepth == 10 else "" |
465 |
| - chroma_format = "gray" if chroma_format == "400" else f"yuv{chroma_format}p" |
466 |
| - |
467 |
| - # Use existing YUV (if found): |
468 |
| - if yuv_file is not None: |
469 |
| - size = yuv_file.stat().st_size |
470 |
| - bytes_per_luma_sample = {"yuv420p": 1.5}[chroma_format] |
471 |
| - bytes_per_sample = (input_bitdepth + 7) >> 3 |
472 |
| - expected_size = int( |
473 |
| - frame_width |
474 |
| - * frame_height |
475 |
| - * bytes_per_luma_sample |
476 |
| - * bytes_per_sample |
477 |
| - * nb_frames |
478 |
| - ) |
479 |
| - assert ( |
480 |
| - size == expected_size |
481 |
| - ), f"YUV found for input but expected size of {expected_size} bytes differs from actual size of {size} bytes" |
482 |
| - shutil.copy(yuv_file, yuv_in_path) |
483 |
| - print(f"Using pre-existing YUV file: {yuv_file}") |
484 |
| - return (yuv_in_path, nb_frames, frame_width, frame_height, file_prefix) |
485 |
| - |
486 |
| - # TODO (fracape) |
487 |
| - # we don't enable skipping frames (codec.skip_n_frames) nor use n_frames_to_be_encoded in video mode |
488 |
| - |
489 |
| - convert_cmd = [ |
490 |
| - "ffmpeg", |
491 |
| - "-y", |
492 |
| - "-hide_banner", |
493 |
| - "-loglevel", |
494 |
| - f"{self.ffmpeg_loglevel}", |
495 |
| - ] |
496 |
| - convert_cmd += input_info |
497 |
| - convert_cmd += [ |
498 |
| - "-vf", |
499 |
| - "pad=ceil(iw/2)*2:ceil(ih/2)*2", |
500 |
| - "-f", |
501 |
| - "rawvideo", |
502 |
| - "-pix_fmt", |
503 |
| - f"{chroma_format}{pix_fmt_suffix}", |
504 |
| - "-dst_range", |
505 |
| - "1", # (fracape) convert to full range for now |
506 |
| - ] |
507 |
| - |
508 |
| - convert_cmd.append(yuv_in_path) |
509 |
| - self.logger.debug(convert_cmd) |
510 |
| - |
511 |
| - run_cmdline(convert_cmd) |
512 |
| - |
513 |
| - return (yuv_in_path, nb_frames, frame_width, frame_height, file_prefix) |
514 |
| - |
515 |
| - def convert_yuv_to_pngs( |
516 |
| - self, |
517 |
| - output_file_prefix: str, |
518 |
| - dec_path: str, |
519 |
| - yuv_dec_path: Path, |
520 |
| - org_img_size: Dict = None, |
521 |
| - vcm_mode: bool = False, |
522 |
| - ): |
523 |
| - """ |
524 |
| - Converts a YUV file to a series of PNG images using ffmpeg. |
525 |
| - Args: |
526 |
| - output_file_prefix (str): The prefix of the output file name. |
527 |
| - dec_path (str): The path to the directory where the PNG images will be saved. |
528 |
| - yuv_dec_path (Path): The path to the input YUV file. |
529 |
| - org_img_size (Dict, optional): The original image size. Defaults to None. |
530 |
| - Returns: |
531 |
| - None |
532 |
| - Raises: |
533 |
| - AssertionError: If the video format is not YUV420. |
534 |
| - """ |
535 |
| - video_info = get_raw_video_file_info(yuv_dec_path.split("qp")[-1]) |
536 |
| - frame_width = video_info["width"] |
537 |
| - frame_height = video_info["height"] |
538 |
| - |
539 |
| - assert ( |
540 |
| - "420" in video_info["format"].value |
541 |
| - ), f"Only support yuv420, but got {video_info['format']}" |
542 |
| - pix_fmt_suffix = "10le" if video_info["bitdepth"] == 10 else "" |
543 |
| - chroma_format = f"yuv420p" |
544 |
| - |
545 |
| - convert_cmd = [ |
546 |
| - "ffmpeg", |
547 |
| - "-y", |
548 |
| - "-hide_banner", |
549 |
| - "-loglevel", |
550 |
| - "error", |
551 |
| - "-f", |
552 |
| - "rawvideo", |
553 |
| - "-pix_fmt", |
554 |
| - f"{chroma_format}{pix_fmt_suffix}", |
555 |
| - "-s", |
556 |
| - f"{frame_width}x{frame_height}", |
557 |
| - ] |
558 |
| - if not vcm_mode: |
559 |
| - convert_cmd.extend( |
560 |
| - [ |
561 |
| - "-src_range", |
562 |
| - "1", # (fracape) assume dec yuv is full range for now |
563 |
| - ] |
564 |
| - ) |
565 |
| - convert_cmd.extend( |
566 |
| - [ |
567 |
| - "-i", |
568 |
| - f"{yuv_dec_path}", |
569 |
| - "-pix_fmt", |
570 |
| - "rgb24", |
571 |
| - ] |
572 |
| - ) |
573 |
| - if vcm_mode: |
574 |
| - convert_cmd.extend( |
575 |
| - [ |
576 |
| - "-vsync", |
577 |
| - "1", |
578 |
| - ] |
579 |
| - ) |
580 |
| - |
581 |
| - # TODO (fracape) hacky, clean this |
582 |
| - if self.datacatalog == "MPEGOIV6": |
583 |
| - output_png = f"{dec_path}/{output_file_prefix}.png" |
584 |
| - elif self.datacatalog == "SFUHW": |
585 |
| - prefix = output_file_prefix.split("qp")[0] |
586 |
| - output_png = f"{dec_path}/{prefix}%03d.png" |
587 |
| - convert_cmd += ["-start_number", "0"] |
588 |
| - elif self.datacatalog in ["MPEGHIEVE"]: |
589 |
| - convert_cmd += ["-start_number", "0"] |
590 |
| - output_png = f"{dec_path}/%06d.png" |
591 |
| - elif self.datacatalog in ["MPEGTVDTRACKING"]: |
592 |
| - convert_cmd += ["-start_number", "1"] |
593 |
| - output_png = f"{dec_path}/%06d.png" |
594 |
| - convert_cmd.append(output_png) |
595 |
| - |
596 |
| - run_cmdline(convert_cmd) |
597 |
| - |
598 |
| - if org_img_size is not None: |
599 |
| - discrepancy = ( |
600 |
| - True |
601 |
| - if frame_height != org_img_size["height"] |
602 |
| - or frame_width != org_img_size["width"] |
603 |
| - else False |
604 |
| - ) |
605 |
| - |
606 |
| - if discrepancy: |
607 |
| - self.logger.warning( |
608 |
| - f"Different original input size found. It must be {org_img_size['width']}x{org_img_size['height']}, but {frame_width}x{frame_height} are parsed from YUV" |
609 |
| - ) |
610 |
| - self.logger.warning( |
611 |
| - f"Use {org_img_size['width']}x{org_img_size['height']}, instead of {frame_width}x{frame_height}" |
612 |
| - ) |
613 |
| - |
614 |
| - final_png = f"{dec_path}/{Path(output_png).stem}_tmp.png" |
615 |
| - |
616 |
| - convert_cmd = [ |
617 |
| - "ffmpeg", |
618 |
| - "-y", |
619 |
| - "-hide_banner", |
620 |
| - "-loglevel", |
621 |
| - "error", |
622 |
| - "-i", |
623 |
| - output_png, |
624 |
| - "-vf", |
625 |
| - f"crop={org_img_size['width']}:{org_img_size['height']}", |
626 |
| - final_png, # no name change |
627 |
| - ] |
628 |
| - run_cmdline(convert_cmd) |
629 |
| - |
630 |
| - Path(output_png).unlink() |
631 |
| - Path(final_png).rename(output_png) |
632 |
| - |
633 | 398 | def encode(
|
634 | 399 | self,
|
635 | 400 | x: Dict,
|
@@ -849,7 +614,11 @@ def decode(
|
849 | 614 | self.logger.debug(f"dec_time:{dec_time}")
|
850 | 615 |
|
851 | 616 | self.convert_yuv_to_pngs(
|
852 |
| - output_file_prefix, dec_path, yuv_dec_path, org_img_size, vcm_mode |
| 617 | + output_file_prefix, |
| 618 | + dec_path, |
| 619 | + yuv_dec_path, |
| 620 | + org_img_size=org_img_size, |
| 621 | + vcm_mode=vcm_mode, |
853 | 622 | )
|
854 | 623 |
|
855 | 624 | # output the list of file paths for each frame
|
|
0 commit comments