/* * H.265 video codec. * Copyright (c) 2013-2014 struktur AG, Dirk Farin * * This file is part of libde265. * * libde265 is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation, either version 3 of * the License, or (at your option) any later version. * * libde265 is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with libde265. If not, see . */ #include "fallback-motion.h" #include "util.h" #if defined(_MSC_VER) || defined(__MINGW32__) # include #elif defined(HAVE_ALLOCA_H) # include #endif #include void put_unweighted_pred_8_fallback(uint8_t *dst, ptrdiff_t dststride, const int16_t *src, ptrdiff_t srcstride, int width, int height) { int offset8bit = 32; int shift8bit = 6; assert((width&1)==0); for (int y=0;y>shift8bit); out[1] = Clip1_8bit((in[1] + offset8bit)>>shift8bit); out+=2; in+=2; } } } void put_weighted_pred_8_fallback(uint8_t *dst, ptrdiff_t dststride, const int16_t *src, ptrdiff_t srcstride, int width, int height, int w,int o,int log2WD) { assert(log2WD>=1); // TODO const int rnd = (1<<(log2WD-1)); for (int y=0;y>log2WD) + o); out++; in++; } } } void put_weighted_bipred_8_fallback(uint8_t *dst, ptrdiff_t dststride, const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, int width, int height, int w1,int o1, int w2,int o2, int log2WD) { assert(log2WD>=1); // TODO const int rnd = ((o1+o2+1) << log2WD); for (int y=0;y>(log2WD+1)); out++; in1++; in2++; } } } void put_weighted_pred_avg_8_fallback(uint8_t *dst, ptrdiff_t dststride, const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, int width, int height) { int offset8bit = 64; int shift8bit = 7; assert((width&1)==0); // I had a special case for 8-pixel parallel, unrolled code, // but I did not see any speedup. #if 0 for (int y=0;y>shift8bit); out++; in1++; in2++; } } #endif #if 0 if ((width&7)==0) { for (int y=0;y>shift8bit); out[1] = Clip1_8bit((in1[1] + in2[1] + offset8bit)>>shift8bit); out[2] = Clip1_8bit((in1[2] + in2[2] + offset8bit)>>shift8bit); out[3] = Clip1_8bit((in1[3] + in2[3] + offset8bit)>>shift8bit); out[4] = Clip1_8bit((in1[4] + in2[4] + offset8bit)>>shift8bit); out[5] = Clip1_8bit((in1[5] + in2[5] + offset8bit)>>shift8bit); out[6] = Clip1_8bit((in1[6] + in2[6] + offset8bit)>>shift8bit); out[7] = Clip1_8bit((in1[7] + in2[7] + offset8bit)>>shift8bit); out+=8; in1+=8; in2+=8; } } } else #endif { for (int y=0;y>shift8bit); out[1] = Clip1_8bit((in1[1] + in2[1] + offset8bit)>>shift8bit); out+=2; in1+=2; in2+=2; } } } } void put_unweighted_pred_16_fallback(uint16_t *dst, ptrdiff_t dststride, const int16_t *src, ptrdiff_t srcstride, int width, int height, int bit_depth) { int shift1 = 14-bit_depth; int offset1 = 0; if (shift1>0) { offset1 = 1<<(shift1-1); } assert((width&1)==0); for (int y=0;y>shift1, bit_depth); out[1] = Clip_BitDepth((in[1] + offset1)>>shift1, bit_depth); out+=2; in+=2; } } } #include void put_weighted_pred_16_fallback(uint16_t *dst, ptrdiff_t dststride, const int16_t *src, ptrdiff_t srcstride, int width, int height, int w,int o,int log2WD, int bit_depth) { assert(log2WD>=1); // TODO const int rnd = (1<<(log2WD-1)); for (int y=0;y>log2WD) + o, bit_depth); out++; in++; } } } void put_weighted_bipred_16_fallback(uint16_t *dst, ptrdiff_t dststride, const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, int width, int height, int w1,int o1, int w2,int o2, int log2WD, int bit_depth) { assert(log2WD>=1); // TODO const int rnd = ((o1+o2+1) << log2WD); for (int y=0;y>(log2WD+1), bit_depth); out++; in1++; in2++; } } } void put_weighted_pred_avg_16_fallback(uint16_t *dst, ptrdiff_t dststride, const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, int width, int height, int bit_depth) { int shift2 = 15-bit_depth; int offset2 = 1<<(shift2-1); assert((width&1)==0); for (int y=0;y>shift2, bit_depth); out[1] = Clip_BitDepth((in1[1] + in2[1] + offset2)>>shift2, bit_depth); out+=2; in1+=2; in2+=2; } } } void put_epel_8_fallback(int16_t *out, ptrdiff_t out_stride, const uint8_t *src, ptrdiff_t src_stride, int width, int height, int mx, int my, int16_t* mcbuffer) { int shift3 = 6; for (int y=0;y void put_epel_hv_fallback(int16_t *dst, ptrdiff_t dst_stride, const pixel_t *src, ptrdiff_t src_stride, int nPbWC, int nPbHC, int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth) { const int shift1 = bit_depth-8; const int shift2 = 6; //const int shift3 = 6; int extra_left = 1; int extra_top = 1; // int extra_right = 2; int extra_bottom= 2; int nPbH_extra = extra_top + nPbHC + extra_bottom; int16_t* tmp2buf = (int16_t*)alloca( nPbWC * nPbH_extra * sizeof(int16_t) ); /* int nPbW_extra = extra_left + nPbWC + extra_right; printf("x,y FracC: %d/%d\n",xFracC,yFracC); printf("---IN---\n"); for (int y=-extra_top;y>shift1; break; case 2: v = (-4*p[0]+54*p[1]+16*p[2]-2*p[3])>>shift1; break; case 3: v = (-6*p[0]+46*p[1]+28*p[2]-4*p[3])>>shift1; break; case 4: v = (-4*p[0]+36*p[1]+36*p[2]-4*p[3])>>shift1; break; case 5: v = (-4*p[0]+28*p[1]+46*p[2]-6*p[3])>>shift1; break; case 6: v = (-2*p[0]+16*p[1]+54*p[2]-4*p[3])>>shift1; break; default: case 7: v = (-2*p[0]+10*p[1]+58*p[2]-2*p[3])>>shift1; break; } //printf("%d %d %d %d -> %d\n",p[0],p[1],p[2],p[3],v); tmp2buf[y+extra_top + x*nPbH_extra] = v; p++; //printf("%05d ",tmp2buf[y+extra_top + x*nPbH_extra]); } //printf("\n"); } // V-filters int vshift = (xFracC==0 ? shift1 : shift2); for (int x=0;x>vshift; break; case 2: v = (-4*p[0]+54*p[1]+16*p[2]-2*p[3])>>vshift; break; case 3: v = (-6*p[0]+46*p[1]+28*p[2]-4*p[3])>>vshift; break; case 4: v = (-4*p[0]+36*p[1]+36*p[2]-4*p[3])>>vshift; break; case 5: v = (-4*p[0]+28*p[1]+46*p[2]-6*p[3])>>vshift; break; case 6: v = (-2*p[0]+16*p[1]+54*p[2]-4*p[3])>>vshift; break; default: case 7: v = (-2*p[0]+10*p[1]+58*p[2]-2*p[3])>>vshift; break; } dst[x + y*dst_stride] = v; p++; } } /* printf("---V---\n"); for (int y=0;y(int16_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, int nPbWC, int nPbHC, int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth); template void put_epel_hv_fallback(int16_t *dst, ptrdiff_t dst_stride, const uint16_t *src, ptrdiff_t src_stride, int nPbWC, int nPbHC, int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth); void put_qpel_0_0_fallback(int16_t *out, ptrdiff_t out_stride, const uint8_t *src, ptrdiff_t srcstride, int nPbW, int nPbH, int16_t* mcbuffer) { //const int shift1 = 0; // sps->BitDepth_Y-8; const int shift2 = 6; // straight copy for (int y=0;y void put_qpel_fallback(int16_t *out, ptrdiff_t out_stride, const pixel_t *src, ptrdiff_t srcstride, int nPbW, int nPbH, int16_t* mcbuffer, int xFracL, int yFracL, int bit_depth) { int extra_left = extra_before[xFracL]; //int extra_right = extra_after [xFracL]; int extra_top = extra_before[yFracL]; int extra_bottom = extra_after [yFracL]; //int nPbW_extra = extra_left + nPbW + extra_right; int nPbH_extra = extra_top + nPbH + extra_bottom; const int shift1 = bit_depth-8; const int shift2 = 6; // H-filters switch (xFracL) { case 0: for (int y=-extra_top;y>shift1; o += nPbH_extra; p++; } } break; case 2: for (int y=-extra_top;y>shift1; o += nPbH_extra; p++; } } break; case 3: for (int y=-extra_top;y>shift1; o += nPbH_extra; p++; } } break; } logtrace(LogMotion,"---H---\n"); for (int y=-extra_top;y>vshift; o+=out_stride; p++; } } break; case 2: for (int x=0;x>vshift; o+=out_stride; p++; } } break; case 3: for (int x=0;x>vshift; o+=out_stride; p++; } } break; } logtrace(LogMotion,"---V---\n"); for (int y=0;y