yading@10: /* yading@10: * Blackfin Pixel Operations yading@10: * Copyright (C) 2007 Marc Hoffman yading@10: * yading@10: * This file is part of FFmpeg. yading@10: * yading@10: * FFmpeg is free software; you can redistribute it and/or yading@10: * modify it under the terms of the GNU Lesser General Public yading@10: * License as published by the Free Software Foundation; either yading@10: * version 2.1 of the License, or (at your option) any later version. yading@10: * yading@10: * FFmpeg is distributed in the hope that it will be useful, yading@10: * but WITHOUT ANY WARRANTY; without even the implied warranty of yading@10: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU yading@10: * Lesser General Public License for more details. yading@10: * yading@10: * You should have received a copy of the GNU Lesser General Public yading@10: * License along with FFmpeg; if not, write to the Free Software yading@10: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA yading@10: */ yading@10: #include "config_bfin.h" yading@10: yading@10: /* yading@10: motion compensation yading@10: primitives yading@10: yading@10: * Halfpel motion compensation with rounding (a+b+1)>>1. yading@10: * This is an array[4][4] of motion compensation funcions for 4 yading@10: * horizontal blocksizes (8,16) and the 4 halfpel positions
yading@10: * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] yading@10: * @param block destination where the result is stored yading@10: * @param pixels source yading@10: * @param line_size number of bytes in a horizontal line of block yading@10: * @param h height yading@10: yading@10: */ yading@10: yading@10: DEFUN(put_pixels8uc,mL1, yading@10: (uint8_t *block, const uint8_t *s0, const uint8_t *s1, yading@10: int dest_size, int line_size, int h)): yading@10: i3=r0; // dest yading@10: i0=r1; // src0 yading@10: i1=r2; // src1 yading@10: r0=[sp+12]; // dest_size yading@10: r2=[sp+16]; // line_size yading@10: p0=[sp+20]; // h yading@10: [--sp] = (r7:6); yading@10: r0+=-4; yading@10: m3=r0; yading@10: r2+=-8; yading@10: m0=r2; yading@10: LSETUP(pp8$0,pp8$1) LC0=P0; yading@10: DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; yading@10: yading@10: pp8$0: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; yading@10: R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++M0]|| R2 =[I1++M0]; yading@10: R7 = BYTEOP1P(R1:0,R3:2)(R) || R0 = [I0++] || [I3++] = R6 ; yading@10: pp8$1: DISALGNEXCPT || R2 = [I1++] || [I3++M3] = R7; yading@10: yading@10: (r7:6) = [sp++]; yading@10: RTS; yading@10: DEFUN_END(put_pixels8uc) yading@10: yading@10: DEFUN(put_pixels16uc,mL1, yading@10: (uint8_t *block, const uint8_t *s0, const uint8_t *s1, yading@10: int dest_size, int line_size, int h)): yading@10: link 0; yading@10: [--sp] = (r7:6); yading@10: i3=r0; // dest yading@10: i0=r1; // src0 yading@10: i1=r2; // src1 yading@10: r0=[fp+20]; // dest_size yading@10: r2=[fp+24]; // line_size yading@10: p0=[fp+28]; // h yading@10: yading@10: yading@10: r0+=-12; yading@10: m3=r0; // line_size yading@10: r2+=-16; yading@10: m0=r2; yading@10: yading@10: LSETUP(pp16$0,pp16$1) LC0=P0; yading@10: DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; yading@10: yading@10: pp16$0: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; yading@10: R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++] || R2 =[I1++]; yading@10: R7 = BYTEOP1P(R1:0,R3:2)(R) || R1 = [I0++] || R3 =[I1++]; yading@10: [I3++] = R6; yading@10: R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++M0] || R2 =[I1++M0]; yading@10: R7 = BYTEOP1P(R1:0,R3:2)(R) || R0 = [I0++] || [I3++] = R7 ; yading@10: [I3++] = R6; yading@10: pp16$1: DISALGNEXCPT || R2 = [I1++] || [I3++M3] = R7; yading@10: yading@10: (r7:6) = [sp++]; yading@10: unlink; yading@10: RTS; yading@10: DEFUN_END(put_pixels16uc) yading@10: yading@10: yading@10: yading@10: yading@10: yading@10: yading@10: DEFUN(put_pixels8uc_nornd,mL1, yading@10: (uint8_t *block, const uint8_t *s0, const uint8_t *s1, yading@10: int line_size, int h)): yading@10: i3=r0; // dest yading@10: i0=r1; // src0 yading@10: i1=r2; // src1 yading@10: r2=[sp+12]; // line_size yading@10: p0=[sp+16]; // h yading@10: [--sp] = (r7:6); yading@10: r2+=-4; yading@10: m3=r2; yading@10: r2+=-4; yading@10: m0=r2; yading@10: LSETUP(pp8$2,pp8$3) LC0=P0; yading@10: DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; yading@10: yading@10: pp8$2: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; yading@10: R6 = BYTEOP1P(R1:0,R3:2)(T) || R0 = [I0++M0]|| R2 =[I1++M0]; yading@10: R7 = BYTEOP1P(R1:0,R3:2)(T,R) || R0 = [I0++] || [I3++] = R6 ; yading@10: pp8$3: DISALGNEXCPT || R2 = [I1++] || [I3++M3] = R7; yading@10: yading@10: (r7:6) = [sp++]; yading@10: RTS; yading@10: DEFUN_END(put_pixels8uc_nornd) yading@10: yading@10: DEFUN(put_pixels16uc_nornd,mL1, yading@10: (uint8_t *block, const uint8_t *s0, const uint8_t *s1, yading@10: int line_size, int h)): yading@10: i3=r0; // dest yading@10: i0=r1; // src0 yading@10: i1=r2; // src1 yading@10: r2=[sp+12]; // line_size yading@10: p0=[sp+16]; // h yading@10: yading@10: [--sp] = (r7:6); yading@10: r2+=-12; yading@10: m3=r2; // line_size yading@10: r2+=-4; yading@10: m0=r2; yading@10: yading@10: LSETUP(pp16$2,pp16$3) LC0=P0; yading@10: DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; yading@10: yading@10: pp16$2: yading@10: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; yading@10: R6 = BYTEOP1P(R1:0,R3:2)(T) || R0 = [I0++] || R2 =[I1++]; yading@10: R7 = BYTEOP1P(R1:0,R3:2)(T,R) || R1 = [I0++] || R3 =[I1++]; yading@10: [I3++] = R6; yading@10: yading@10: R6 = BYTEOP1P(R1:0,R3:2)(T) || R0 = [I0++M0] || R2 =[I1++M0]; yading@10: R7 = BYTEOP1P(R1:0,R3:2)(T,R) || R0 = [I0++] || [I3++] = R7 ; yading@10: [I3++] = R6; yading@10: pp16$3: DISALGNEXCPT || R2 = [I1++] || [I3++M3] = R7; yading@10: yading@10: (r7:6) = [sp++]; yading@10: yading@10: RTS; yading@10: DEFUN_END(put_pixels16uc_nornd) yading@10: yading@10: DEFUN(z_put_pixels16_xy2,mL1, yading@10: (uint8_t *block, const uint8_t *s0, yading@10: int dest_size, int line_size, int h)): yading@10: link 0; yading@10: [--sp] = (r7:4); yading@10: i3=r0; // dest yading@10: i0=r1; // src0--> pixels yading@10: i1=r1; // src1--> pixels + line_size yading@10: r2+=-12; yading@10: m2=r2; // m2=dest_width-4 yading@10: r2=[fp+20]; yading@10: m3=r2; // line_size yading@10: p0=[fp+24]; // h yading@10: r2+=-16; yading@10: i1+=m3; /* src1 + line_size */ yading@10: m0=r2; /* line-size - 20 */ yading@10: yading@10: B0 = I0; yading@10: B1 = I1; yading@10: B3 = I3; yading@10: yading@10: DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; yading@10: yading@10: LSETUP(LS$16E,LE$16E) LC0=P0; yading@10: LS$16E: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; yading@10: R4 = BYTEOP2P (R3:2,R1:0) (RNDL) || R0 = [I0++] || R2 =[I1++]; yading@10: R5 = BYTEOP2P (R3:2,R1:0) (RNDL,R) || R1 = [I0++] || [I3++] = R4 ; yading@10: DISALGNEXCPT || R3 = [I1++] || [I3++] = R5; yading@10: R4 = BYTEOP2P (R3:2,R1:0) (RNDL) || R0 = [I0++M0]|| R2 = [I1++M0]; yading@10: R5 = BYTEOP2P (R3:2,R1:0) (RNDL,R) || R0 = [I0++] || [I3++] = R4 ; yading@10: LE$16E: DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5; yading@10: yading@10: M1 = 1; yading@10: I3 = B3; yading@10: I1 = B1; yading@10: I0 = B0; yading@10: yading@10: I0 += M1; yading@10: I1 += M1; yading@10: yading@10: DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; yading@10: LSETUP(LS$16O,LE$16O) LC0=P0; yading@10: LS$16O: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; yading@10: R4 = BYTEOP2P (R3:2,R1:0) (RNDH) || R0 = [I0++] || R2 =[I1++]; yading@10: R5 = BYTEOP2P (R3:2,R1:0) (RNDH,R) || R1 = [I0++] || R6 =[I3++]; yading@10: R4 = R4 +|+ R6 || R7 = [I3--]; yading@10: R5 = R5 +|+ R7 || [I3++] = R4; yading@10: DISALGNEXCPT || R3 =[I1++] || [I3++] = R5; yading@10: R4 = BYTEOP2P (R3:2,R1:0) (RNDH) || R0 = [I0++M0]|| R2 = [I1++M0]; yading@10: R5 = BYTEOP2P (R3:2,R1:0) (RNDH,R) || R0 = [I0++] || R6 = [I3++]; yading@10: R4 = R4 +|+ R6 || R7 = [I3--]; yading@10: R5 = R5 +|+ R7 || [I3++] = R4; yading@10: LE$16O: DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5; yading@10: yading@10: (r7:4) = [sp++]; yading@10: unlink; yading@10: rts; yading@10: DEFUN_END(z_put_pixels16_xy2) yading@10: yading@10: DEFUN(put_pixels16_xy2_nornd,mL1, yading@10: (uint8_t *block, const uint8_t *s0, yading@10: int line_size, int h)): yading@10: link 0; yading@10: [--sp] = (r7:4); yading@10: i3=r0; // dest yading@10: i0=r1; // src0--> pixels yading@10: i1=r1; // src1--> pixels + line_size yading@10: m3=r2; yading@10: r2+=-12; yading@10: m2=r2; yading@10: r2+=-4; yading@10: i1+=m3; /* src1 + line_size */ yading@10: m0=r2; /* line-size - 20 */ yading@10: p0=[fp+20]; // h yading@10: yading@10: B0=I0; yading@10: B1=I1; yading@10: B3=I3; yading@10: yading@10: DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; yading@10: yading@10: LSETUP(LS$16ET,LE$16ET) LC0=P0; yading@10: LS$16ET:DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; yading@10: R4 = BYTEOP2P (R3:2,R1:0) (TL) || R0 = [I0++] || R2 =[I1++]; yading@10: R5 = BYTEOP2P (R3:2,R1:0) (TL,R) || R1 = [I0++] || [I3++] = R4 ; yading@10: DISALGNEXCPT || R3 = [I1++] || [I3++] = R5; yading@10: R4 = BYTEOP2P (R3:2,R1:0) (TL) || R0 = [I0++M0]|| R2 = [I1++M0]; yading@10: R5 = BYTEOP2P (R3:2,R1:0) (TL,R) || R0 = [I0++] || [I3++] = R4 ; yading@10: LE$16ET:DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5; yading@10: yading@10: M1 = 1; yading@10: I3=B3; yading@10: I1=B1; yading@10: I0=B0; yading@10: yading@10: I0 += M1; yading@10: I1 += M1; yading@10: yading@10: DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; yading@10: LSETUP(LS$16OT,LE$16OT) LC0=P0; yading@10: LS$16OT:DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; yading@10: R4 = BYTEOP2P (R3:2,R1:0) (TH) || R0 = [I0++] || R2 =[I1++]; yading@10: R5 = BYTEOP2P (R3:2,R1:0) (TH,R) || R1 = [I0++] || R6 =[I3++]; yading@10: R4 = R4 +|+ R6 || R7 = [I3--]; yading@10: R5 = R5 +|+ R7 || [I3++] = R4; yading@10: DISALGNEXCPT || R3 =[I1++] || [I3++] = R5; yading@10: R4 = BYTEOP2P (R3:2,R1:0) (TH) || R0 = [I0++M0]|| R2 = [I1++M0]; yading@10: R5 = BYTEOP2P (R3:2,R1:0) (TH,R) || R0 = [I0++] || R6 = [I3++]; yading@10: R4 = R4 +|+ R6 || R7 = [I3--]; yading@10: R5 = R5 +|+ R7 || [I3++] = R4; yading@10: LE$16OT:DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5; yading@10: yading@10: (r7:4) = [sp++]; yading@10: unlink; yading@10: rts; yading@10: DEFUN_END(put_pixels16_xy2_nornd) yading@10: yading@10: DEFUN(z_put_pixels8_xy2,mL1, yading@10: (uint8_t *block, const uint8_t *s0, yading@10: int dest_size, int line_size, int h)): yading@10: link 0; yading@10: [--sp] = (r7:4); yading@10: i3=r0; // dest yading@10: i0=r1; // src0--> pixels yading@10: i1=r1; // src1--> pixels + line_size yading@10: r2+=-4; yading@10: m2=r2; // m2=dest_width-4 yading@10: r2=[fp+20]; yading@10: m3=r2; // line_size yading@10: p0=[fp+24]; // h yading@10: r2+=-8; yading@10: i1+=m3; /* src1 + line_size */ yading@10: m0=r2; /* line-size - 20 */ yading@10: yading@10: b0 = I0; yading@10: b1 = I1; yading@10: b3 = I3; yading@10: yading@10: LSETUP(LS$8E,LE$8E) LC0=P0; yading@10: DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; yading@10: LS$8E: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; yading@10: R4 = BYTEOP2P (R3:2,R1:0) (RNDL) || R0 = [I0++M0] || R2 =[I1++M0]; yading@10: R5 = BYTEOP2P (R3:2,R1:0) (RNDL,R) || R0 = [I0++] || [I3++] = R4 ; yading@10: LE$8E: DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5; yading@10: yading@10: M1 = 1; yading@10: I3 = b3; yading@10: I1 = b1; yading@10: I0 = b0; yading@10: yading@10: I0 += M1; yading@10: I1 += M1; yading@10: yading@10: LSETUP(LS$8O,LE$8O) LC0=P0; yading@10: DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; yading@10: LS$8O: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; yading@10: R4 = BYTEOP2P (R3:2,R1:0) (RNDH) || R0 = [I0++M0] || R2 =[I1++M0]; yading@10: R5 = BYTEOP2P (R3:2,R1:0) (RNDH,R) || R0 = [I0++] || R6 =[I3++]; yading@10: R4 = R4 +|+ R6 || R7 = [I3--]; yading@10: R5 = R5 +|+ R7 || [I3++] = R4; yading@10: LE$8O: DISALGNEXCPT || R2 =[I1++] || [I3++M2] = R5; yading@10: yading@10: (r7:4) = [sp++]; yading@10: unlink; yading@10: rts; yading@10: DEFUN_END(z_put_pixels8_xy2) yading@10: yading@10: DEFUN(put_pixels8_xy2_nornd,mL1, yading@10: (uint8_t *block, const uint8_t *s0, int line_size, int h)): yading@10: link 0; yading@10: [--sp] = (r7:4); yading@10: i3=r0; // dest yading@10: i0=r1; // src0--> pixels yading@10: i1=r1; // src1--> pixels + line_size yading@10: m3=r2; yading@10: r2+=-4; yading@10: m2=r2; yading@10: r2+=-4; yading@10: i1+=m3; /* src1 + line_size */ yading@10: m0=r2; /* line-size - 20 */ yading@10: p0=[fp+20]; // h yading@10: yading@10: yading@10: b0 = I0; yading@10: b1 = I1; yading@10: b3 = I3; yading@10: yading@10: LSETUP(LS$8ET,LE$8ET) LC0=P0; yading@10: DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; yading@10: yading@10: LS$8ET: DISALGNEXCPT || R1 = [I0++] || R3 = [I1++]; yading@10: R4 = BYTEOP2P (R3:2,R1:0) (TL) || R0 = [I0++M0] || R2 = [I1++M0]; yading@10: R5 = BYTEOP2P (R3:2,R1:0) (TL,R) || R0 = [I0++] || [I3++] = R4 ; yading@10: LE$8ET: DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5; yading@10: yading@10: M1 = 1; yading@10: I3 = b3; yading@10: I1 = b1; yading@10: I0 = b0; yading@10: yading@10: I0 += M1; yading@10: I1 += M1; yading@10: yading@10: LSETUP(LS$8OT,LE$8OT) LC0=P0; yading@10: DISALGNEXCPT || R0 = [I0++] || R2 = [I1++]; yading@10: yading@10: LS$8OT: DISALGNEXCPT || R1 = [I0++] || R3 = [I1++]; yading@10: R4 = BYTEOP2P (R3:2,R1:0) (TH) || R0 = [I0++M0] || R2 = [I1++M0]; yading@10: R5 = BYTEOP2P (R3:2,R1:0) (TH,R) || R0 = [I0++] || R6 = [I3++]; yading@10: R4 = R4 +|+ R6 || R7 = [I3--]; yading@10: R5 = R5 +|+ R7 || [I3++] = R4; yading@10: LE$8OT: DISALGNEXCPT || R2 =[I1++] || [I3++M2] = R5; yading@10: yading@10: (r7:4) = [sp++]; yading@10: unlink; yading@10: rts; yading@10: DEFUN_END(put_pixels8_xy2_nornd)