use crate::api::InterConfig;
use crate::context::{
BlockOffset, PlaneBlockOffset, SuperBlockOffset, TileBlockOffset,
TileSuperBlockOffset, MAX_SB_SIZE_LOG2, MIB_SIZE_LOG2, MI_SIZE,
MI_SIZE_LOG2, SB_SIZE,
};
use crate::dist::*;
use crate::frame::*;
use crate::mc::MotionVector;
use crate::partition::*;
use crate::predict::PredictionMode;
use crate::tiling::*;
use crate::util::ILog;
use crate::util::{clamp, Pixel};
use crate::FrameInvariants;
use arrayvec::*;
use std::ops::{Index, IndexMut};
use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
#[derive(Debug, Copy, Clone, Default)]
pub struct MEStats {
pub mv: MotionVector,
pub normalized_sad: u32,
}
#[derive(Debug, Clone)]
pub struct FrameMEStats {
stats: Box<[MEStats]>,
pub cols: usize,
pub rows: usize,
}
pub type RefMEStats = Arc<RwLock<[FrameMEStats; REF_FRAMES]>>;
pub type ReadGuardMEStats<'a> =
RwLockReadGuard<'a, [FrameMEStats; REF_FRAMES]>;
pub type WriteGuardMEStats<'a> =
RwLockWriteGuard<'a, [FrameMEStats; REF_FRAMES]>;
impl FrameMEStats {
#[inline]
pub fn rows_iter(&self) -> std::slice::ChunksExact<'_, MEStats> {
self.stats.chunks_exact(self.cols)
}
pub fn new(cols: usize, rows: usize) -> Self {
Self {
stats: vec![MEStats::default(); cols * rows].into_boxed_slice(),
cols,
rows,
}
}
pub fn new_arc_array(cols: usize, rows: usize) -> RefMEStats {
Arc::new(RwLock::new([
FrameMEStats::new(cols, rows),
FrameMEStats::new(cols, rows),
FrameMEStats::new(cols, rows),
FrameMEStats::new(cols, rows),
FrameMEStats::new(cols, rows),
FrameMEStats::new(cols, rows),
FrameMEStats::new(cols, rows),
FrameMEStats::new(cols, rows),
]))
}
}
impl Index<usize> for FrameMEStats {
type Output = [MEStats];
#[inline]
fn index(&self, index: usize) -> &Self::Output {
&self.stats[index * self.cols..(index + 1) * self.cols]
}
}
impl IndexMut<usize> for FrameMEStats {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
&mut self.stats[index * self.cols..(index + 1) * self.cols]
}
}
#[derive(Debug, Copy, Clone)]
pub struct MotionSearchResult {
pub mv: MotionVector,
pub rd: MVCandidateRD,
}
impl MotionSearchResult {
#[inline(always)]
pub fn empty() -> MotionSearchResult {
MotionSearchResult {
mv: MotionVector::default(),
rd: MVCandidateRD::empty(),
}
}
#[inline(always)]
const fn is_empty(&self) -> bool {
self.rd.cost == u64::MAX
}
}
#[derive(Debug, Copy, Clone)]
pub struct MVCandidateRD {
pub cost: u64,
pub sad: u32,
}
impl MVCandidateRD {
#[inline(always)]
const fn empty() -> MVCandidateRD {
MVCandidateRD { sad: u32::MAX, cost: u64::MAX }
}
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum MVSamplingMode {
INIT,
CORNER { right: bool, bottom: bool },
}
pub fn estimate_tile_motion<T: Pixel>(
fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
inter_cfg: &InterConfig,
) {
let init_size = MIB_SIZE_LOG2;
let mut prev_ssdec: Option<u8> = None;
for mv_size_in_b_log2 in (2..=init_size).rev() {
let init = mv_size_in_b_log2 == init_size;
let ssdec = match init_size - mv_size_in_b_log2 {
0 => 2,
1 => 1,
_ => 0,
};
let new_subsampling =
if let Some(prev) = prev_ssdec { prev != ssdec } else { false };
prev_ssdec = Some(ssdec);
let lambda = (fi.me_lambda * 256.0 / (1 << (2 * ssdec)) as f64
* if ssdec == 0 { 0.5 } else { 0.125 }) as u32;
for sby in 0..ts.sb_height {
for sbx in 0..ts.sb_width {
let mut tested_frames_flags = 0;
for &ref_frame in inter_cfg.allowed_ref_frames() {
let frame_flag = 1 << fi.ref_frames[ref_frame.to_index()];
if tested_frames_flags & frame_flag == frame_flag {
continue;
}
tested_frames_flags |= frame_flag;
let tile_bo =
TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby })
.block_offset(0, 0);
if new_subsampling {
refine_subsampled_sb_motion(
fi,
ts,
ref_frame,
mv_size_in_b_log2 + 1,
tile_bo,
ssdec,
lambda,
);
}
estimate_sb_motion(
fi,
ts,
ref_frame,
mv_size_in_b_log2,
tile_bo,
init,
ssdec,
lambda,
);
}
}
}
}
}
fn estimate_sb_motion<T: Pixel>(
fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, ref_frame: RefType,
mv_size_in_b_log2: usize, tile_bo: TileBlockOffset, init: bool, ssdec: u8,
lambda: u32,
) {
let pix_offset = tile_bo.to_luma_plane_offset();
let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize);
let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize);
let mv_size = MI_SIZE << mv_size_in_b_log2;
for y in (0..sb_h).step_by(mv_size) {
for x in (0..sb_w).step_by(mv_size) {
let corner: MVSamplingMode = if init {
MVSamplingMode::INIT
} else {
MVSamplingMode::CORNER {
right: x & mv_size == mv_size,
bottom: y & mv_size == mv_size,
}
};
let sub_bo = tile_bo
.with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2);
let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec;
let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec;
if let Some(results) = estimate_motion(
fi,
ts,
w,
h,
sub_bo,
ref_frame,
None,
corner,
init,
ssdec,
Some(lambda),
) {
let sad = (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2))
/ (w * h) as u64) as u32;
save_me_stats(
ts,
mv_size_in_b_log2,
sub_bo,
ref_frame,
MEStats { mv: results.mv, normalized_sad: sad },
);
}
}
}
}
fn refine_subsampled_sb_motion<T: Pixel>(
fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, ref_frame: RefType,
mv_size_in_b_log2: usize, tile_bo: TileBlockOffset, ssdec: u8, lambda: u32,
) {
let pix_offset = tile_bo.to_luma_plane_offset();
let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize);
let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize);
let mv_size = MI_SIZE << mv_size_in_b_log2;
for y in (0..sb_h).step_by(mv_size) {
for x in (0..sb_w).step_by(mv_size) {
let sub_bo = tile_bo
.with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2);
let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec;
let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec;
if let Some(results) = refine_subsampled_motion_estimate(
fi, ts, w, h, sub_bo, ref_frame, ssdec, lambda,
) {
let sad = (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2))
/ (w * h) as u64) as u32;
save_me_stats(
ts,
mv_size_in_b_log2,
sub_bo,
ref_frame,
MEStats { mv: results.mv, normalized_sad: sad },
);
}
}
}
}
fn save_me_stats<T: Pixel>(
ts: &mut TileStateMut<'_, T>, mv_size_in_b_log2: usize,
tile_bo: TileBlockOffset, ref_frame: RefType, stats: MEStats,
) {
let size_in_b = 1 << mv_size_in_b_log2;
let tile_me_stats = &mut ts.me_stats[ref_frame.to_index()];
let tile_bo_x_end = (tile_bo.0.x + size_in_b).min(ts.mi_width);
let tile_bo_y_end = (tile_bo.0.y + size_in_b).min(ts.mi_height);
for mi_y in tile_bo.0.y..tile_bo_y_end {
for a in tile_me_stats[mi_y][tile_bo.0.x..tile_bo_x_end].iter_mut() {
*a = stats;
}
}
}
fn get_mv_range(
w_in_b: usize, h_in_b: usize, bo: PlaneBlockOffset, blk_w: usize,
blk_h: usize,
) -> (isize, isize, isize, isize) {
let border_w = 128 + blk_w as isize * 8;
let border_h = 128 + blk_h as isize * 8;
let mvx_min = -(bo.0.x as isize) * (8 * MI_SIZE) as isize - border_w;
let mvx_max = ((w_in_b - bo.0.x) as isize - (blk_w / MI_SIZE) as isize)
* (8 * MI_SIZE) as isize
+ border_w;
let mvy_min = -(bo.0.y as isize) * (8 * MI_SIZE) as isize - border_h;
let mvy_max = ((h_in_b - bo.0.y) as isize - (blk_h / MI_SIZE) as isize)
* (8 * MI_SIZE) as isize
+ border_h;
use crate::context::{MV_LOW, MV_UPP};
(
mvx_min.max(MV_LOW as isize + 1),
mvx_max.min(MV_UPP as isize - 1),
mvy_min.max(MV_LOW as isize + 1),
mvy_max.min(MV_UPP as isize - 1),
)
}
struct MotionEstimationSubsets {
min_sad: u32,
median: Option<MotionVector>,
subset_b: ArrayVec<MotionVector, 5>,
subset_c: ArrayVec<MotionVector, 5>,
}
impl MotionEstimationSubsets {
fn all_mvs(&self) -> ArrayVec<MotionVector, 11> {
let mut all = ArrayVec::new();
if let Some(median) = self.median {
all.push(median);
}
all.extend(self.subset_b.iter().copied());
all.extend(self.subset_c.iter().copied());
all
}
}
#[profiling::function]
fn get_subset_predictors(
tile_bo: TileBlockOffset, tile_me_stats: &TileMEStats<'_>,
frame_ref_opt: Option<ReadGuardMEStats<'_>>, ref_frame_id: usize,
pix_w: usize, pix_h: usize, mvx_min: isize, mvx_max: isize, mvy_min: isize,
mvy_max: isize, corner: MVSamplingMode, ssdec: u8,
) -> MotionEstimationSubsets {
let mut min_sad: u32 = u32::MAX;
let mut subset_b = ArrayVec::<MotionVector, 5>::new();
let mut subset_c = ArrayVec::<MotionVector, 5>::new();
let w = ((pix_w << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2;
let h = ((pix_h << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2;
let clipped_half_w = (w >> 1).min(tile_me_stats.cols() - 1 - tile_bo.0.x);
let clipped_half_h = (h >> 1).min(tile_me_stats.rows() - 1 - tile_bo.0.y);
let mut process_cand = |stats: MEStats| -> MotionVector {
min_sad = min_sad.min(stats.normalized_sad);
let mv = stats.mv.quantize_to_fullpel();
MotionVector {
col: clamp(mv.col as isize, mvx_min, mvx_max) as i16,
row: clamp(mv.row as isize, mvy_min, mvy_max) as i16,
}
};
if tile_bo.0.x > 0 {
subset_b.push(process_cand(
tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x - 1],
));
}
if tile_bo.0.y > 0 {
subset_b.push(process_cand(
tile_me_stats[tile_bo.0.y - 1][tile_bo.0.x + clipped_half_w],
));
}
if let MVSamplingMode::CORNER { right: true, bottom: _ } = corner {
if tile_bo.0.x + w < tile_me_stats.cols() {
subset_b.push(process_cand(
tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x + w],
));
}
}
if let MVSamplingMode::CORNER { right: _, bottom: true } = corner {
if tile_bo.0.y + h < tile_me_stats.rows() {
subset_b.push(process_cand(
tile_me_stats[tile_bo.0.y + h][tile_bo.0.x + clipped_half_w],
));
}
}
let median = if corner != MVSamplingMode::INIT {
Some(process_cand(
tile_me_stats[tile_bo.0.y + clipped_half_h]
[tile_bo.0.x + clipped_half_w],
))
} else if subset_b.len() != 3 {
None
} else {
let mut rows: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.row).collect();
let mut cols: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.col).collect();
rows.as_mut_slice().sort_unstable();
cols.as_mut_slice().sort_unstable();
Some(MotionVector { row: rows[1], col: cols[1] })
};
subset_b.push(MotionVector::default());
if let Some(frame_me_stats) = frame_ref_opt {
let prev_frame = &frame_me_stats[ref_frame_id];
let frame_bo = PlaneBlockOffset(BlockOffset {
x: tile_me_stats.x() + tile_bo.0.x,
y: tile_me_stats.y() + tile_bo.0.y,
});
let clipped_half_w = (w >> 1).min(prev_frame.cols - 1 - frame_bo.0.x);
let clipped_half_h = (h >> 1).min(prev_frame.rows - 1 - frame_bo.0.y);
if frame_bo.0.x > 0 {
subset_c.push(process_cand(
prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x - 1],
));
}
if frame_bo.0.y > 0 {
subset_c.push(process_cand(
prev_frame[frame_bo.0.y - 1][frame_bo.0.x + clipped_half_w],
));
}
if frame_bo.0.x + w < prev_frame.cols {
subset_c.push(process_cand(
prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + w],
));
}
if frame_bo.0.y + h < prev_frame.rows {
subset_c.push(process_cand(
prev_frame[frame_bo.0.y + h][frame_bo.0.x + clipped_half_w],
));
}
subset_c.push(process_cand(
prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + clipped_half_w],
));
}
let min_sad = ((min_sad as u64 * (pix_w * pix_h) as u64)
>> (MAX_SB_SIZE_LOG2 * 2)) as u32;
let dec_mv = |mv: MotionVector| MotionVector {
col: mv.col >> ssdec,
row: mv.row >> ssdec,
};
let median = median.map(dec_mv);
for mv in subset_b.iter_mut() {
*mv = dec_mv(*mv);
}
for mv in subset_c.iter_mut() {
*mv = dec_mv(*mv);
}
MotionEstimationSubsets { min_sad, median, subset_b, subset_c }
}
pub fn estimate_motion<T: Pixel>(
fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, w: usize, h: usize,
tile_bo: TileBlockOffset, ref_frame: RefType,
pmv: Option<[MotionVector; 2]>, corner: MVSamplingMode,
extensive_search: bool, ssdec: u8, lambda: Option<u32>,
) -> Option<MotionSearchResult> {
if let Some(ref rec) =
fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
{
let frame_bo = ts.to_frame_block_offset(tile_bo);
let (mvx_min, mvx_max, mvy_min, mvy_max) =
get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec);
let lambda = lambda.unwrap_or({
(fi.me_lambda * 256.0 * 0.5) as u32
});
let global_mv = [MotionVector { row: 0, col: 0 }; 2];
let po = frame_bo.to_luma_plane_offset();
let (mvx_min, mvx_max, mvy_min, mvy_max) =
(mvx_min >> ssdec, mvx_max >> ssdec, mvy_min >> ssdec, mvy_max >> ssdec);
let po = PlaneOffset { x: po.x >> ssdec, y: po.y >> ssdec };
let p_ref = match ssdec {
0 => &rec.frame.planes[0],
1 => &rec.input_hres,
2 => &rec.input_qres,
_ => unimplemented!(),
};
let org_region = &match ssdec {
0 => ts.input_tile.planes[0]
.subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }),
2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }),
_ => unimplemented!(),
};
let mut best: MotionSearchResult = full_pixel_me(
fi,
ts,
org_region,
p_ref,
tile_bo,
po,
lambda,
pmv.unwrap_or(global_mv),
w,
h,
mvx_min,
mvx_max,
mvy_min,
mvy_max,
ref_frame,
corner,
extensive_search,
ssdec,
);
if let Some(pmv) = pmv {
let use_satd: bool = fi.config.speed_settings.motion.use_satd_subpel;
if use_satd {
best.rd = get_fullpel_mv_rd(
fi,
po,
org_region,
p_ref,
fi.sequence.bit_depth,
pmv,
lambda,
use_satd,
mvx_min,
mvx_max,
mvy_min,
mvy_max,
w,
h,
best.mv,
);
}
sub_pixel_me(
fi, po, org_region, p_ref, lambda, pmv, mvx_min, mvx_max, mvy_min,
mvy_max, w, h, use_satd, &mut best, ref_frame,
);
}
best.mv = best.mv << ssdec;
Some(best)
} else {
None
}
}
fn refine_subsampled_motion_estimate<T: Pixel>(
fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, w: usize, h: usize,
tile_bo: TileBlockOffset, ref_frame: RefType, ssdec: u8, lambda: u32,
) -> Option<MotionSearchResult> {
if let Some(ref rec) =
fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
{
let frame_bo = ts.to_frame_block_offset(tile_bo);
let (mvx_min, mvx_max, mvy_min, mvy_max) =
get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec);
let pmv = [MotionVector { row: 0, col: 0 }; 2];
let po = frame_bo.to_luma_plane_offset();
let (mvx_min, mvx_max, mvy_min, mvy_max) =
(mvx_min >> ssdec, mvx_max >> ssdec, mvy_min >> ssdec, mvy_max >> ssdec);
let po = PlaneOffset { x: po.x >> ssdec, y: po.y >> ssdec };
let p_ref = match ssdec {
0 => &rec.frame.planes[0],
1 => &rec.input_hres,
2 => &rec.input_qres,
_ => unimplemented!(),
};
let org_region = &match ssdec {
0 => ts.input_tile.planes[0]
.subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }),
2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }),
_ => unimplemented!(),
};
let mv =
ts.me_stats[ref_frame.to_index()][tile_bo.0.y][tile_bo.0.x].mv >> ssdec;
let x_lo = po.x + (mv.col as isize / 8 - 1).max(mvx_min / 8);
let x_hi = po.x + (mv.col as isize / 8 + 2).min(mvx_max / 8);
let y_lo = po.y + (mv.row as isize / 8 - 1).max(mvy_min / 8);
let y_hi = po.y + (mv.row as isize / 8 + 2).min(mvy_max / 8);
let mut results = full_search(
fi, x_lo, x_hi, y_lo, y_hi, w, h, org_region, p_ref, po, 1, lambda, pmv,
);
results.mv = results.mv << ssdec;
Some(results)
} else {
None
}
}
#[profiling::function]
fn full_pixel_me<T: Pixel>(
fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>,
org_region: &PlaneRegion<T>, p_ref: &Plane<T>, tile_bo: TileBlockOffset,
po: PlaneOffset, lambda: u32, pmv: [MotionVector; 2], w: usize, h: usize,
mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
ref_frame: RefType, corner: MVSamplingMode, extensive_search: bool,
ssdec: u8,
) -> MotionSearchResult {
let ref_frame_id = ref_frame.to_index();
let tile_me_stats = &ts.me_stats[ref_frame_id].as_const();
let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize]
.as_ref()
.map(|frame_ref| frame_ref.frame_me_stats.read().expect("poisoned lock"));
let subsets = get_subset_predictors(
tile_bo,
tile_me_stats,
frame_ref,
ref_frame_id,
w,
h,
mvx_min,
mvx_max,
mvy_min,
mvy_max,
corner,
ssdec,
);
let try_cands = |predictors: &[MotionVector],
best: &mut MotionSearchResult| {
let mut results = get_best_predictor(
fi,
po,
org_region,
p_ref,
predictors,
fi.sequence.bit_depth,
pmv,
lambda,
mvx_min,
mvx_max,
mvy_min,
mvy_max,
w,
h,
);
fullpel_diamond_search(
fi,
po,
org_region,
p_ref,
&mut results,
fi.sequence.bit_depth,
pmv,
lambda,
mvx_min,
mvx_max,
mvy_min,
mvy_max,
w,
h,
);
if results.rd.cost < best.rd.cost {
*best = results;
}
};
let mut best: MotionSearchResult = MotionSearchResult::empty();
if !extensive_search {
try_cands(&subsets.all_mvs(), &mut best);
best
} else {
let thresh = (subsets.min_sad as f32 * 1.2) as u32
+ (((w * h) as u32) << (fi.sequence.bit_depth - 8));
if let Some(median) = subsets.median {
try_cands(&[median], &mut best);
if best.rd.sad < thresh {
return best;
}
}
try_cands(&subsets.subset_b, &mut best);
if best.rd.sad < thresh {
return best;
}
try_cands(&subsets.subset_c, &mut best);
if best.rd.sad < thresh {
return best;
}
uneven_multi_hex_search(
fi,
po,
org_region,
p_ref,
&mut best,
fi.sequence.bit_depth,
pmv,
lambda,
mvx_min,
mvx_max,
mvy_min,
mvy_max,
w,
h,
24,
);
if !fi.config.speed_settings.motion.me_allow_full_search
|| best.rd.sad < thresh
{
return best;
}
{
let range_x = (192 * fi.me_range_scale as isize) >> ssdec;
let range_y = (64 * fi.me_range_scale as isize) >> ssdec;
let x_lo = po.x + (-range_x).max(mvx_min / 8);
let x_hi = po.x + (range_x).min(mvx_max / 8);
let y_lo = po.y + (-range_y).max(mvy_min / 8);
let y_hi = po.y + (range_y).min(mvy_max / 8);
let results = full_search(
fi,
x_lo,
x_hi,
y_lo,
y_hi,
w,
h,
org_region,
p_ref,
po,
4 >> ssdec,
lambda,
[MotionVector::default(); 2],
);
if results.rd.cost < best.rd.cost {
results
} else {
best
}
}
}
}
fn sub_pixel_me<T: Pixel>(
fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
p_ref: &Plane<T>, lambda: u32, pmv: [MotionVector; 2], mvx_min: isize,
mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize, h: usize,
use_satd: bool, best: &mut MotionSearchResult, ref_frame: RefType,
) {
subpel_diamond_search(
fi,
po,
org_region,
p_ref,
fi.sequence.bit_depth,
pmv,
lambda,
mvx_min,
mvx_max,
mvy_min,
mvy_max,
w,
h,
use_satd,
best,
ref_frame,
);
}
#[profiling::function]
fn get_best_predictor<T: Pixel>(
fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
p_ref: &Plane<T>, predictors: &[MotionVector], bit_depth: usize,
pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
mvy_min: isize, mvy_max: isize, w: usize, h: usize,
) -> MotionSearchResult {
let mut best: MotionSearchResult = MotionSearchResult::empty();
for &init_mv in predictors.iter() {
let rd = get_fullpel_mv_rd(
fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
mvx_max, mvy_min, mvy_max, w, h, init_mv,
);
if rd.cost < best.rd.cost {
best.mv = init_mv;
best.rd = rd;
}
}
best
}
macro_rules! search_pattern {
($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => {
[ $(MotionVector { $field_a: $ll_a << 3, $field_b: $ll_b << 3 } ),*]
};
}
macro_rules! search_pattern_subpel {
($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => {
[ $(MotionVector { $field_a: $ll_a, $field_b: $ll_b } ),*]
};
}
const DIAMOND_R1_PATTERN_SUBPEL: [MotionVector; 4] = search_pattern_subpel!(
col: [ 0, 1, 0, -1],
row: [ 1, 0, -1, 0]
);
const DIAMOND_R1_PATTERN: [MotionVector; 4] = search_pattern!(
col: [ 0, 1, 0, -1],
row: [ 1, 0, -1, 0]
);
#[profiling::function]
fn fullpel_diamond_search<T: Pixel>(
fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
mvy_min: isize, mvy_max: isize, w: usize, h: usize,
) {
let (mut diamond_radius_log2, diamond_radius_end_log2) = (1u8, 0u8);
loop {
let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
for &offset in &DIAMOND_R1_PATTERN {
let cand_mv = current.mv + (offset << diamond_radius_log2);
let rd = get_fullpel_mv_rd(
fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
mvx_max, mvy_min, mvy_max, w, h, cand_mv,
);
if rd.cost < best_cand.rd.cost {
best_cand.mv = cand_mv;
best_cand.rd = rd;
}
}
if current.rd.cost <= best_cand.rd.cost {
if diamond_radius_log2 == diamond_radius_end_log2 {
break;
} else {
diamond_radius_log2 -= 1;
}
} else {
*current = best_cand;
}
}
assert!(!current.is_empty());
}
const HEXAGON_PATTERN: [MotionVector; 6] = search_pattern!(
col: [ 0, 2, 2, 0, -2, -2],
row: [ -2, -1, 1, 2, 1, -1]
);
const SQUARE_REFINE_PATTERN: [MotionVector; 8] = search_pattern!(
col: [ -1, 0, 1, -1, 1, -1, 0, 1],
row: [ 1, 1, 1, 0, 0, -1, -1, -1]
);
#[profiling::function]
fn hexagon_search<T: Pixel>(
fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
mvy_min: isize, mvy_max: isize, w: usize, h: usize,
) {
let mut best_cand_idx: usize = 0;
let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
for i in 0..6 {
let cand_mv = current.mv + HEXAGON_PATTERN[i];
let rd = get_fullpel_mv_rd(
fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
mvx_max, mvy_min, mvy_max, w, h, cand_mv,
);
if rd.cost < best_cand.rd.cost {
best_cand_idx = i;
best_cand.mv = cand_mv;
best_cand.rd = rd;
}
}
while best_cand.rd.cost < current.rd.cost {
*current = best_cand;
best_cand = MotionSearchResult::empty();
let center_cand_idx = best_cand_idx;
for idx_offset_mod6 in 5..=7 {
let i = (center_cand_idx + idx_offset_mod6) % 6;
let cand_mv = current.mv + HEXAGON_PATTERN[i];
let rd = get_fullpel_mv_rd(
fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
mvx_max, mvy_min, mvy_max, w, h, cand_mv,
);
if rd.cost < best_cand.rd.cost {
best_cand_idx = i;
best_cand.mv = cand_mv;
best_cand.rd = rd;
}
}
}
let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
for &offset in &SQUARE_REFINE_PATTERN {
let cand_mv = current.mv + offset;
let rd = get_fullpel_mv_rd(
fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
mvx_max, mvy_min, mvy_max, w, h, cand_mv,
);
if rd.cost < best_cand.rd.cost {
best_cand.mv = cand_mv;
best_cand.rd = rd;
}
}
if best_cand.rd.cost < current.rd.cost {
*current = best_cand;
}
assert!(!current.is_empty());
}
const UMH_PATTERN: [MotionVector; 16] = search_pattern!(
col: [ -2, -1, 0, 1, 2, 3, 4, 3, 2, 1, 0, -1, -2, 3, -4, -3],
row: [ 4, 4, 4, 4, 4, 2, 0, -2, -4, -4, -4, -4, -4, -2, 0, 2]
);
#[profiling::function]
fn uneven_multi_hex_search<T: Pixel>(
fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
mvy_min: isize, mvy_max: isize, w: usize, h: usize, me_range: i16,
) {
assert!(!current.is_empty());
let center = current.mv;
for i in (1..=me_range).step_by(2) {
const HORIZONTAL_LINE: [MotionVector; 2] = search_pattern!(
col: [ 0, 0],
row: [-1, 1]
);
for &offset in &HORIZONTAL_LINE {
let cand_mv = center + offset * i;
let rd = get_fullpel_mv_rd(
fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
mvx_max, mvy_min, mvy_max, w, h, cand_mv,
);
if rd.cost < current.rd.cost {
current.mv = cand_mv;
current.rd = rd;
}
}
}
for i in (1..=me_range >> 1).step_by(2) {
const VERTICAL_LINE: [MotionVector; 2] = search_pattern!(
col: [-1, 1],
row: [ 0, 0]
);
for &offset in &VERTICAL_LINE {
let cand_mv = center + offset * i;
let rd = get_fullpel_mv_rd(
fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
mvx_max, mvy_min, mvy_max, w, h, cand_mv,
);
if rd.cost < current.rd.cost {
current.mv = cand_mv;
current.rd = rd;
}
}
}
let center = current.mv;
for row in -2..=2 {
for col in -2..=2 {
if row == 0 && col == 0 {
continue;
}
let cand_mv = center + MotionVector { row, col };
let rd = get_fullpel_mv_rd(
fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
mvx_max, mvy_min, mvy_max, w, h, cand_mv,
);
if rd.cost < current.rd.cost {
current.mv = cand_mv;
current.rd = rd;
}
}
}
let center = current.mv;
let iterations = me_range >> 2;
for i in 1..=iterations {
for &offset in &UMH_PATTERN {
let cand_mv = center + offset * i;
let rd = get_fullpel_mv_rd(
fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
mvx_max, mvy_min, mvy_max, w, h, cand_mv,
);
if rd.cost < current.rd.cost {
current.mv = cand_mv;
current.rd = rd;
}
}
}
hexagon_search(
fi, po, org_region, p_ref, current, bit_depth, pmv, lambda, mvx_min,
mvx_max, mvy_min, mvy_max, w, h,
);
}
#[profiling::function]
fn subpel_diamond_search<T: Pixel>(
fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
_p_ref: &Plane<T>, bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize,
h: usize, use_satd: bool, current: &mut MotionSearchResult,
ref_frame: RefType,
) {
use crate::util::Aligned;
let mc_w = w.next_power_of_two();
let mc_h = (h + 1) & !1;
let cfg = PlaneConfig::new(mc_w, mc_h, 0, 0, 0, 0, std::mem::size_of::<T>());
let mut buf: Aligned<[T; 128 * 128]> = unsafe { Aligned::uninitialized() };
let mut tmp_region = PlaneRegionMut::from_slice(
&mut buf.data,
&cfg,
Rect { x: 0, y: 0, width: cfg.width, height: cfg.height },
);
let (mut diamond_radius_log2, diamond_radius_end_log2) =
(2u8, u8::from(!fi.allow_high_precision_mv));
loop {
let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
for &offset in &DIAMOND_R1_PATTERN_SUBPEL {
let cand_mv = current.mv + (offset << diamond_radius_log2);
let rd = get_subpel_mv_rd(
fi,
po,
org_region,
bit_depth,
pmv,
lambda,
use_satd,
mvx_min,
mvx_max,
mvy_min,
mvy_max,
w,
h,
cand_mv,
&mut tmp_region,
ref_frame,
);
if rd.cost < best_cand.rd.cost {
best_cand.mv = cand_mv;
best_cand.rd = rd;
}
}
if current.rd.cost <= best_cand.rd.cost {
if diamond_radius_log2 == diamond_radius_end_log2 {
break;
} else {
diamond_radius_log2 -= 1;
}
} else {
*current = best_cand;
}
}
assert!(!current.is_empty());
}
#[inline]
fn get_fullpel_mv_rd<T: Pixel>(
fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
p_ref: &Plane<T>, bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
use_satd: bool, mvx_min: isize, mvx_max: isize, mvy_min: isize,
mvy_max: isize, w: usize, h: usize, cand_mv: MotionVector,
) -> MVCandidateRD {
if (cand_mv.col as isize) < mvx_min
|| (cand_mv.col as isize) > mvx_max
|| (cand_mv.row as isize) < mvy_min
|| (cand_mv.row as isize) > mvy_max
{
return MVCandidateRD::empty();
}
let plane_ref = p_ref.region(Area::StartingAt {
x: po.x + (cand_mv.col / 8) as isize,
y: po.y + (cand_mv.row / 8) as isize,
});
compute_mv_rd(
fi, pmv, lambda, use_satd, bit_depth, w, h, cand_mv, org_region,
&plane_ref,
)
}
fn get_subpel_mv_rd<T: Pixel>(
fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
bit_depth: usize, pmv: [MotionVector; 2], lambda: u32, use_satd: bool,
mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize,
h: usize, cand_mv: MotionVector, tmp_region: &mut PlaneRegionMut<T>,
ref_frame: RefType,
) -> MVCandidateRD {
if (cand_mv.col as isize) < mvx_min
|| (cand_mv.col as isize) > mvx_max
|| (cand_mv.row as isize) < mvy_min
|| (cand_mv.row as isize) > mvy_max
{
return MVCandidateRD::empty();
}
let tmp_width = tmp_region.rect().width;
let tmp_height = tmp_region.rect().height;
let tile_rect =
TileRect { x: 0, y: 0, width: tmp_width, height: tmp_height };
PredictionMode::NEWMV.predict_inter_single(
fi, tile_rect, 0, po, tmp_region,
tmp_width, tmp_height, ref_frame, cand_mv,
);
let plane_ref = tmp_region.as_const();
compute_mv_rd(
fi, pmv, lambda, use_satd, bit_depth, w, h, cand_mv, org_region,
&plane_ref,
)
}
#[inline(always)]
fn compute_mv_rd<T: Pixel>(
fi: &FrameInvariants<T>, pmv: [MotionVector; 2], lambda: u32,
use_satd: bool, bit_depth: usize, w: usize, h: usize, cand_mv: MotionVector,
plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>,
) -> MVCandidateRD {
let sad = if use_satd {
get_satd(plane_org, plane_ref, w, h, bit_depth, fi.cpu_feature_level)
} else {
get_sad(plane_org, plane_ref, w, h, bit_depth, fi.cpu_feature_level)
};
let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
let rate = rate1.min(rate2 + 1);
MVCandidateRD { cost: 256 * sad as u64 + rate as u64 * lambda as u64, sad }
}
#[profiling::function]
fn full_search<T: Pixel>(
fi: &FrameInvariants<T>, x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize,
w: usize, h: usize, org_region: &PlaneRegion<T>, p_ref: &Plane<T>,
po: PlaneOffset, step: usize, lambda: u32, pmv: [MotionVector; 2],
) -> MotionSearchResult {
let search_region = p_ref.region(Area::Rect {
x: x_lo,
y: y_lo,
width: (x_hi - x_lo) as usize + w,
height: (y_hi - y_lo) as usize + h,
});
let mut best: MotionSearchResult = MotionSearchResult::empty();
for vert_window in search_region.vert_windows(h).step_by(step) {
for ref_window in vert_window.horz_windows(w).step_by(step) {
let &Rect { x, y, .. } = ref_window.rect();
let mv = MotionVector {
row: 8 * (y as i16 - po.y as i16),
col: 8 * (x as i16 - po.x as i16),
};
let rd = compute_mv_rd(
fi,
pmv,
lambda,
false,
fi.sequence.bit_depth,
w,
h,
mv,
org_region,
&ref_window,
);
if rd.cost < best.rd.cost {
best.rd = rd;
best.mv = mv;
}
}
}
best
}
#[inline(always)]
fn get_mv_rate(
a: MotionVector, b: MotionVector, allow_high_precision_mv: bool,
) -> u32 {
#[inline(always)]
fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
let d = if allow_high_precision_mv { diff } else { diff >> 1 };
2 * ILog::ilog(d.abs()) as u32
}
diff_to_rate(a.row - b.row, allow_high_precision_mv)
+ diff_to_rate(a.col - b.col, allow_high_precision_mv)
}