• R/O
  • HTTP
  • SSH
  • HTTPS

Commit

Tags
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

hardware/intel/intel-driver


Commit MetaInfo

Revisiond11b8895cbe373870e1f246c52fdc63ffc10cf7f (tree)
Time2015-03-19 11:01:29
AuthorZhong Li <zhong.li@inte...>
CommiterXiang, Haihao

Log Message

VP8 HWEnc: Add BSW VP8 HWEnc support

Add BSW vp8 encoding support, and let SKL and BDW use the same PAK pipeline.

Signed-off-by: Zhong Li <zhong.li@intel.com>
(cherry picked from commit c2be56ae6f3628ea246a1dd02e5cac18da84df56)

Change Summary

Incremental Difference

--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -43,6 +43,7 @@
4343 #include "gen6_vme.h"
4444 #include "intel_media.h"
4545 #include <va/va_enc_jpeg.h>
46+#include "vp8_probs.h"
4647
4748 #define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8
4849 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
@@ -131,6 +132,7 @@ static struct i965_kernel gen8_mfc_kernels[] = {
131132 #define INTER_16X8 0x01
132133 #define INTER_8X16 0x02
133134 #define SUBMB_SHAPE_MASK 0x00FF00
135+#define INTER_16X16 0x00
134136
135137 #define INTER_MV8 (4 << 20)
136138 #define INTER_MV32 (6 << 20)
@@ -146,7 +148,8 @@ gen8_mfc_pipe_mode_select(VADriverContextP ctx,
146148
147149 assert(standard_select == MFX_FORMAT_MPEG2 ||
148150 standard_select == MFX_FORMAT_AVC ||
149- standard_select == MFX_FORMAT_JPEG);
151+ standard_select == MFX_FORMAT_JPEG ||
152+ standard_select == MFX_FORMAT_VP8);
150153
151154 BEGIN_BCS_BATCH(batch, 5);
152155
@@ -157,6 +160,7 @@ gen8_mfc_pipe_mode_select(VADriverContextP ctx,
157160 (0 << 10) | /* Stream-Out Enable */
158161 ((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
159162 ((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
163+ (0 << 6) | /* frame statistics stream-out enable*/
160164 (0 << 5) | /* not in stitch mode */
161165 (1 << 4) | /* encoding mode */
162166 (standard_select << 0)); /* standard select: avc or mpeg2 or jpeg*/
@@ -221,9 +225,18 @@ gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
221225 OUT_BCS_BATCH(batch, 0);
222226 OUT_BCS_BATCH(batch, 0);
223227 OUT_BCS_BATCH(batch, 0);
228+
224229 /* the DW4-5 is the MFX upper bound */
225- OUT_BCS_BATCH(batch, 0);
226- OUT_BCS_BATCH(batch, 0);
230+ if (encoder_context->codec == CODEC_VP8) {
231+ OUT_BCS_RELOC(batch,
232+ mfc_context->mfc_indirect_pak_bse_object.bo,
233+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
234+ mfc_context->mfc_indirect_pak_bse_object.end_offset);
235+ OUT_BCS_BATCH(batch, 0);
236+ } else {
237+ OUT_BCS_BATCH(batch, 0);
238+ OUT_BCS_BATCH(batch, 0);
239+ }
227240
228241 if(encoder_context->codec != CODEC_JPEG) {
229242 vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks;
@@ -3201,6 +3214,877 @@ gen8_mfc_jpeg_encode_picture(VADriverContextP ctx,
32013214 return VA_STATUS_SUCCESS;
32023215 }
32033216
3217+static void vp8_enc_state_init(struct gen6_mfc_context *mfc_context,
3218+ VAEncPictureParameterBufferVP8 *pic_param,
3219+ VAQMatrixBufferVP8 *q_matrix)
3220+{
3221+
3222+ int is_key_frame = !pic_param->pic_flags.bits.frame_type;
3223+ unsigned char *coeff_probs_stream_in_buffer;
3224+
3225+ mfc_context->vp8_state.frame_header_lf_update_pos = 0;
3226+ mfc_context->vp8_state.frame_header_qindex_update_pos = 0;
3227+ mfc_context->vp8_state.frame_header_token_update_pos = 0;
3228+ mfc_context->vp8_state.frame_header_bin_mv_upate_pos = 0;
3229+
3230+ mfc_context->vp8_state.prob_skip_false = 255;
3231+ memset(mfc_context->vp8_state.mb_segment_tree_probs, 0, sizeof(mfc_context->vp8_state.mb_segment_tree_probs));
3232+ memcpy(mfc_context->vp8_state.mv_probs, vp8_default_mv_context, sizeof(mfc_context->vp8_state.mv_probs));
3233+
3234+ if (is_key_frame) {
3235+ memcpy(mfc_context->vp8_state.y_mode_probs, vp8_kf_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
3236+ memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_kf_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
3237+
3238+ mfc_context->vp8_state.prob_intra = 255;
3239+ mfc_context->vp8_state.prob_last = 128;
3240+ mfc_context->vp8_state.prob_gf = 128;
3241+ } else {
3242+ memcpy(mfc_context->vp8_state.y_mode_probs, vp8_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
3243+ memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
3244+
3245+ mfc_context->vp8_state.prob_intra = 63;
3246+ mfc_context->vp8_state.prob_last = 128;
3247+ mfc_context->vp8_state.prob_gf = 128;
3248+ }
3249+
3250+ mfc_context->vp8_state.prob_skip_false = vp8_base_skip_false_prob[q_matrix->quantization_index[0]];
3251+
3252+ dri_bo_map(mfc_context->vp8_state.coeff_probs_stream_in_bo, 1);
3253+ coeff_probs_stream_in_buffer = (unsigned char *)mfc_context->vp8_state.coeff_probs_stream_in_bo->virtual;
3254+ assert(coeff_probs_stream_in_buffer);
3255+ memcpy(coeff_probs_stream_in_buffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs));
3256+ dri_bo_unmap(mfc_context->vp8_state.coeff_probs_stream_in_bo);
3257+}
3258+
3259+static void vp8_enc_state_update(struct gen6_mfc_context *mfc_context,
3260+ VAQMatrixBufferVP8 *q_matrix)
3261+{
3262+
3263+ /*some other probabilities need to be updated*/
3264+}
3265+
3266+extern void binarize_vp8_frame_header(VAEncSequenceParameterBufferVP8 *seq_param,
3267+ VAEncPictureParameterBufferVP8 *pic_param,
3268+ VAQMatrixBufferVP8 *q_matrix,
3269+ struct gen6_mfc_context *mfc_context);
3270+
3271+static void vp8_enc_frame_header_binarize(struct encode_state *encode_state,
3272+ struct gen6_mfc_context *mfc_context)
3273+{
3274+ VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3275+ VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3276+ VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3277+ unsigned char *frame_header_buffer;
3278+
3279+ binarize_vp8_frame_header(seq_param, pic_param, q_matrix, mfc_context);
3280+
3281+ dri_bo_map(mfc_context->vp8_state.frame_header_bo, 1);
3282+ frame_header_buffer = (unsigned char *)mfc_context->vp8_state.frame_header_bo->virtual;
3283+ assert(frame_header_buffer);
3284+ memcpy(frame_header_buffer, mfc_context->vp8_state.vp8_frame_header, (mfc_context->vp8_state.frame_header_bit_count + 7) / 8);
3285+ dri_bo_unmap(mfc_context->vp8_state.frame_header_bo);
3286+}
3287+
3288+#define MAX_VP8_FRAME_HEADER_SIZE 0x2000
3289+#define VP8_TOKEN_STATISTICS_BUFFER_SIZE 0x2000
3290+
3291+static void gen8_mfc_vp8_init(VADriverContextP ctx,
3292+ struct encode_state *encode_state,
3293+ struct intel_encoder_context *encoder_context)
3294+{
3295+ struct i965_driver_data *i965 = i965_driver_data(ctx);
3296+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3297+ dri_bo *bo;
3298+ int i;
3299+ int width_in_mbs = 0;
3300+ int height_in_mbs = 0;
3301+ int slice_batchbuffer_size;
3302+
3303+ VAEncSequenceParameterBufferVP8 *pSequenceParameter = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3304+ VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3305+ VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3306+
3307+ width_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
3308+ height_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
3309+
3310+ slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
3311+ (SLICE_HEADER + SLICE_TAIL);
3312+
3313+ /*Encode common setup for MFC*/
3314+ dri_bo_unreference(mfc_context->post_deblocking_output.bo);
3315+ mfc_context->post_deblocking_output.bo = NULL;
3316+
3317+ dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
3318+ mfc_context->pre_deblocking_output.bo = NULL;
3319+
3320+ dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
3321+ mfc_context->uncompressed_picture_source.bo = NULL;
3322+
3323+ dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
3324+ mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
3325+
3326+ for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
3327+ if ( mfc_context->direct_mv_buffers[i].bo != NULL)
3328+ dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
3329+ mfc_context->direct_mv_buffers[i].bo = NULL;
3330+ }
3331+
3332+ for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
3333+ if (mfc_context->reference_surfaces[i].bo != NULL)
3334+ dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
3335+ mfc_context->reference_surfaces[i].bo = NULL;
3336+ }
3337+
3338+ dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
3339+ bo = dri_bo_alloc(i965->intel.bufmgr,
3340+ "Buffer",
3341+ width_in_mbs * 64 * 16,
3342+ 64);
3343+ assert(bo);
3344+ mfc_context->intra_row_store_scratch_buffer.bo = bo;
3345+
3346+ dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
3347+ bo = dri_bo_alloc(i965->intel.bufmgr,
3348+ "Buffer",
3349+ width_in_mbs * height_in_mbs * 16,
3350+ 64);
3351+ assert(bo);
3352+ mfc_context->macroblock_status_buffer.bo = bo;
3353+
3354+ dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
3355+ bo = dri_bo_alloc(i965->intel.bufmgr,
3356+ "Buffer",
3357+ 16 * width_in_mbs * 64, /* 16 * width_in_mbs * 64 */
3358+ 64);
3359+ assert(bo);
3360+ mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
3361+
3362+ dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
3363+ bo = dri_bo_alloc(i965->intel.bufmgr,
3364+ "Buffer",
3365+ 16 * width_in_mbs * 64, /* 16 * width_in_mbs * 64 */
3366+ 0x1000);
3367+ assert(bo);
3368+ mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
3369+
3370+ dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
3371+ mfc_context->mfc_batchbuffer_surface.bo = NULL;
3372+
3373+ dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
3374+ mfc_context->aux_batchbuffer_surface.bo = NULL;
3375+
3376+ if (mfc_context->aux_batchbuffer)
3377+ intel_batchbuffer_free(mfc_context->aux_batchbuffer);
3378+
3379+ mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
3380+ mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
3381+ dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
3382+ mfc_context->aux_batchbuffer_surface.pitch = 16;
3383+ mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
3384+ mfc_context->aux_batchbuffer_surface.size_block = 16;
3385+
3386+ i965_gpe_context_init(ctx, &mfc_context->gpe_context);
3387+
3388+ /* alloc vp8 encoding buffers*/
3389+ dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
3390+ bo = dri_bo_alloc(i965->intel.bufmgr,
3391+ "Buffer",
3392+ MAX_VP8_FRAME_HEADER_SIZE,
3393+ 0x1000);
3394+ assert(bo);
3395+ mfc_context->vp8_state.frame_header_bo = bo;
3396+
3397+ mfc_context->vp8_state.intermediate_buffer_max_size = width_in_mbs * height_in_mbs * 256 * 9;
3398+ for(i = 0; i < 8; i++) {
3399+ mfc_context->vp8_state.intermediate_partition_offset[i] = width_in_mbs * height_in_mbs * 256 * (i + 1);
3400+ }
3401+ dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
3402+ bo = dri_bo_alloc(i965->intel.bufmgr,
3403+ "Buffer",
3404+ mfc_context->vp8_state.intermediate_buffer_max_size,
3405+ 0x1000);
3406+ assert(bo);
3407+ mfc_context->vp8_state.intermediate_bo = bo;
3408+
3409+ dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
3410+ bo = dri_bo_alloc(i965->intel.bufmgr,
3411+ "Buffer",
3412+ width_in_mbs * height_in_mbs * 16,
3413+ 0x1000);
3414+ assert(bo);
3415+ mfc_context->vp8_state.stream_out_bo = bo;
3416+
3417+ dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
3418+ bo = dri_bo_alloc(i965->intel.bufmgr,
3419+ "Buffer",
3420+ sizeof(vp8_default_coef_probs),
3421+ 0x1000);
3422+ assert(bo);
3423+ mfc_context->vp8_state.coeff_probs_stream_in_bo = bo;
3424+
3425+ dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
3426+ bo = dri_bo_alloc(i965->intel.bufmgr,
3427+ "Buffer",
3428+ VP8_TOKEN_STATISTICS_BUFFER_SIZE,
3429+ 0x1000);
3430+ assert(bo);
3431+ mfc_context->vp8_state.token_statistics_bo = bo;
3432+
3433+ dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
3434+ bo = dri_bo_alloc(i965->intel.bufmgr,
3435+ "Buffer",
3436+ width_in_mbs * 16 * 64,
3437+ 0x1000);
3438+ assert(bo);
3439+ mfc_context->vp8_state.mpc_row_store_bo = bo;
3440+
3441+ vp8_enc_state_init(mfc_context, pic_param, q_matrix);
3442+ vp8_enc_frame_header_binarize(encode_state, mfc_context);
3443+}
3444+
3445+static VAStatus
3446+intel_mfc_vp8_prepare(VADriverContextP ctx,
3447+ struct encode_state *encode_state,
3448+ struct intel_encoder_context *encoder_context)
3449+{
3450+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3451+ struct object_surface *obj_surface;
3452+ struct object_buffer *obj_buffer;
3453+ struct i965_coded_buffer_segment *coded_buffer_segment;
3454+ VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3455+ VAStatus vaStatus = VA_STATUS_SUCCESS;
3456+ dri_bo *bo;
3457+ int i;
3458+
3459+ /* reconstructed surface */
3460+ obj_surface = encode_state->reconstructed_object;
3461+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
3462+ if (pic_param->loop_filter_level[0] == 0) {
3463+ mfc_context->pre_deblocking_output.bo = obj_surface->bo;
3464+ dri_bo_reference(mfc_context->pre_deblocking_output.bo);
3465+ } else {
3466+ mfc_context->post_deblocking_output.bo = obj_surface->bo;
3467+ dri_bo_reference(mfc_context->post_deblocking_output.bo);
3468+ }
3469+
3470+ mfc_context->surface_state.width = obj_surface->orig_width;
3471+ mfc_context->surface_state.height = obj_surface->orig_height;
3472+ mfc_context->surface_state.w_pitch = obj_surface->width;
3473+ mfc_context->surface_state.h_pitch = obj_surface->height;
3474+
3475+ /* set vp8 reference frames */
3476+ for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
3477+ obj_surface = encode_state->reference_objects[i];
3478+
3479+ if (obj_surface && obj_surface->bo) {
3480+ mfc_context->reference_surfaces[i].bo = obj_surface->bo;
3481+ dri_bo_reference(mfc_context->reference_surfaces[i].bo);
3482+ } else {
3483+ mfc_context->reference_surfaces[i].bo = NULL;
3484+ }
3485+ }
3486+
3487+ /* input YUV surface */
3488+ obj_surface = encode_state->input_yuv_object;
3489+ mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
3490+ dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
3491+
3492+ /* coded buffer */
3493+ obj_buffer = encode_state->coded_buf_object;
3494+ bo = obj_buffer->buffer_store->bo;
3495+ mfc_context->mfc_indirect_pak_bse_object.bo = bo;
3496+ mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
3497+ mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
3498+ dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
3499+
3500+ dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
3501+ mfc_context->vp8_state.final_frame_bo = mfc_context->mfc_indirect_pak_bse_object.bo;
3502+ mfc_context->vp8_state.final_frame_byte_offset = I965_CODEDBUFFER_HEADER_SIZE;
3503+ dri_bo_reference(mfc_context->vp8_state.final_frame_bo);
3504+
3505+ /* set the internal flag to 0 to indicate the coded size is unknown */
3506+ dri_bo_map(bo, 1);
3507+ coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
3508+ coded_buffer_segment->mapped = 0;
3509+ coded_buffer_segment->codec = encoder_context->codec;
3510+ dri_bo_unmap(bo);
3511+
3512+ return vaStatus;
3513+}
3514+
3515+static void
3516+gen8_mfc_vp8_encoder_cfg(VADriverContextP ctx,
3517+ struct encode_state *encode_state,
3518+ struct intel_encoder_context *encoder_context)
3519+{
3520+ struct intel_batchbuffer *batch = encoder_context->base.batch;
3521+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3522+ VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3523+ VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3524+
3525+ BEGIN_BCS_BATCH(batch, 30);
3526+ OUT_BCS_BATCH(batch, MFX_VP8_ENCODER_CFG | (30 - 2)); /* SKL should be 31-2 ? */
3527+
3528+ OUT_BCS_BATCH(batch,
3529+ 0 << 9 | /* compressed bitstream output disable */
3530+ 1 << 7 | /* disable per-segment delta qindex and loop filter in RC */
3531+ 1 << 6 | /* RC initial pass */
3532+ 0 << 4 | /* upate segment feature date flag */
3533+ 1 << 3 | /* bitstream statistics output enable */
3534+ 1 << 2 | /* token statistics output enable */
3535+ 0 << 1 | /* final bitstream output disable */
3536+ 0 << 0); /*DW1*/
3537+
3538+ OUT_BCS_BATCH(batch, 0); /*DW2*/
3539+
3540+ OUT_BCS_BATCH(batch,
3541+ 0xfff << 16 | /* max intra mb bit count limit */
3542+ 0xfff << 0 /* max inter mb bit count limit */
3543+ ); /*DW3*/
3544+
3545+ OUT_BCS_BATCH(batch, 0); /*DW4*/
3546+ OUT_BCS_BATCH(batch, 0); /*DW5*/
3547+ OUT_BCS_BATCH(batch, 0); /*DW6*/
3548+ OUT_BCS_BATCH(batch, 0); /*DW7*/
3549+ OUT_BCS_BATCH(batch, 0); /*DW8*/
3550+ OUT_BCS_BATCH(batch, 0); /*DW9*/
3551+ OUT_BCS_BATCH(batch, 0); /*DW10*/
3552+ OUT_BCS_BATCH(batch, 0); /*DW11*/
3553+ OUT_BCS_BATCH(batch, 0); /*DW12*/
3554+ OUT_BCS_BATCH(batch, 0); /*DW13*/
3555+ OUT_BCS_BATCH(batch, 0); /*DW14*/
3556+ OUT_BCS_BATCH(batch, 0); /*DW15*/
3557+ OUT_BCS_BATCH(batch, 0); /*DW16*/
3558+ OUT_BCS_BATCH(batch, 0); /*DW17*/
3559+ OUT_BCS_BATCH(batch, 0); /*DW18*/
3560+ OUT_BCS_BATCH(batch, 0); /*DW19*/
3561+ OUT_BCS_BATCH(batch, 0); /*DW20*/
3562+ OUT_BCS_BATCH(batch, 0); /*DW21*/
3563+
3564+ OUT_BCS_BATCH(batch,
3565+ pic_param->pic_flags.bits.show_frame << 23 |
3566+ pic_param->pic_flags.bits.version << 20
3567+ ); /*DW22*/
3568+
3569+ OUT_BCS_BATCH(batch,
3570+ (seq_param->frame_height_scale << 14 | seq_param->frame_height) << 16 |
3571+ (seq_param->frame_width_scale << 14 | seq_param->frame_width) << 0
3572+ );
3573+
3574+ /*DW24*/
3575+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bit_count); /* frame header bit count */
3576+
3577+ /*DW25*/
3578+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_qindex_update_pos); /* frame header bin buffer qindex update pointer */
3579+
3580+ /*DW26*/
3581+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_lf_update_pos); /* frame header bin buffer loop filter update pointer*/
3582+
3583+ /*DW27*/
3584+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_token_update_pos); /* frame header bin buffer token update pointer */
3585+
3586+ /*DW28*/
3587+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bin_mv_upate_pos); /*frame header bin buffer mv update pointer */
3588+
3589+ /*DW29*/
3590+ OUT_BCS_BATCH(batch, 0);
3591+
3592+ ADVANCE_BCS_BATCH(batch);
3593+}
3594+
3595+static void
3596+gen8_mfc_vp8_pic_state(VADriverContextP ctx,
3597+ struct encode_state *encode_state,
3598+ struct intel_encoder_context *encoder_context)
3599+{
3600+ struct intel_batchbuffer *batch = encoder_context->base.batch;
3601+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3602+ VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3603+ VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3604+ VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3605+ int i, j, log2num;
3606+
3607+ log2num = pic_param->pic_flags.bits.num_token_partitions;
3608+
3609+ /*update mode and token probs*/
3610+ vp8_enc_state_update(mfc_context, q_matrix);
3611+
3612+ BEGIN_BCS_BATCH(batch, 38);
3613+ OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
3614+ OUT_BCS_BATCH(batch,
3615+ (ALIGN(seq_param->frame_height, 16) / 16 - 1) << 16 |
3616+ (ALIGN(seq_param->frame_width, 16) / 16 - 1) << 0);
3617+
3618+ OUT_BCS_BATCH(batch,
3619+ log2num << 24 |
3620+ pic_param->sharpness_level << 16 |
3621+ pic_param->pic_flags.bits.sign_bias_alternate << 13 |
3622+ pic_param->pic_flags.bits.sign_bias_golden << 12 |
3623+ pic_param->pic_flags.bits.loop_filter_adj_enable << 11 |
3624+ pic_param->pic_flags.bits.mb_no_coeff_skip << 10 |
3625+ pic_param->pic_flags.bits.update_mb_segmentation_map << 9 |
3626+ pic_param->pic_flags.bits.segmentation_enabled << 8 |
3627+ !pic_param->pic_flags.bits.frame_type << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
3628+ (pic_param->pic_flags.bits.version / 2) << 4 |
3629+ (pic_param->pic_flags.bits.version == 3) << 1 | /* full pixel mode for version 3 */
3630+ !!pic_param->pic_flags.bits.version << 0); /* version 0: 6 tap */
3631+
3632+ OUT_BCS_BATCH(batch,
3633+ pic_param->loop_filter_level[3] << 24 |
3634+ pic_param->loop_filter_level[2] << 16 |
3635+ pic_param->loop_filter_level[1] << 8 |
3636+ pic_param->loop_filter_level[0] << 0);
3637+
3638+ OUT_BCS_BATCH(batch,
3639+ q_matrix->quantization_index[3] << 24 |
3640+ q_matrix->quantization_index[2] << 16 |
3641+ q_matrix->quantization_index[1] << 8 |
3642+ q_matrix->quantization_index[0] << 0);
3643+
3644+ OUT_BCS_BATCH(batch,
3645+ ((unsigned short)(q_matrix->quantization_index_delta[4]) >> 15) << 28 |
3646+ abs(q_matrix->quantization_index_delta[4]) << 24 |
3647+ ((unsigned short)(q_matrix->quantization_index_delta[3]) >> 15) << 20 |
3648+ abs(q_matrix->quantization_index_delta[3]) << 16 |
3649+ ((unsigned short)(q_matrix->quantization_index_delta[2]) >> 15) << 12 |
3650+ abs(q_matrix->quantization_index_delta[2]) << 8 |
3651+ ((unsigned short)(q_matrix->quantization_index_delta[1]) >> 15) << 4 |
3652+ abs(q_matrix->quantization_index_delta[1]) << 0);
3653+
3654+ OUT_BCS_BATCH(batch,
3655+ ((unsigned short)(q_matrix->quantization_index_delta[0]) >> 15) << 4 |
3656+ abs(q_matrix->quantization_index_delta[0]) << 0);
3657+
3658+ OUT_BCS_BATCH(batch,
3659+ pic_param->clamp_qindex_high << 8 |
3660+ pic_param->clamp_qindex_low << 0);
3661+
3662+ for (i = 8; i < 19; i++) {
3663+ OUT_BCS_BATCH(batch, 0xffffffff);
3664+ }
3665+
3666+ OUT_BCS_BATCH(batch,
3667+ mfc_context->vp8_state.mb_segment_tree_probs[2] << 16 |
3668+ mfc_context->vp8_state.mb_segment_tree_probs[1] << 8 |
3669+ mfc_context->vp8_state.mb_segment_tree_probs[0] << 0);
3670+
3671+ OUT_BCS_BATCH(batch,
3672+ mfc_context->vp8_state.prob_skip_false << 24 |
3673+ mfc_context->vp8_state.prob_intra << 16 |
3674+ mfc_context->vp8_state.prob_last << 8 |
3675+ mfc_context->vp8_state.prob_gf << 0);
3676+
3677+ OUT_BCS_BATCH(batch,
3678+ mfc_context->vp8_state.y_mode_probs[3] << 24 |
3679+ mfc_context->vp8_state.y_mode_probs[2] << 16 |
3680+ mfc_context->vp8_state.y_mode_probs[1] << 8 |
3681+ mfc_context->vp8_state.y_mode_probs[0] << 0);
3682+
3683+ OUT_BCS_BATCH(batch,
3684+ mfc_context->vp8_state.uv_mode_probs[2] << 16 |
3685+ mfc_context->vp8_state.uv_mode_probs[1] << 8 |
3686+ mfc_context->vp8_state.uv_mode_probs[0] << 0);
3687+
3688+ /* MV update value, DW23-DW32 */
3689+ for (i = 0; i < 2; i++) {
3690+ for (j = 0; j < 20; j += 4) {
3691+ OUT_BCS_BATCH(batch,
3692+ (j + 3 == 19 ? 0 : mfc_context->vp8_state.mv_probs[i][j + 3]) << 24 |
3693+ mfc_context->vp8_state.mv_probs[i][j + 2] << 16 |
3694+ mfc_context->vp8_state.mv_probs[i][j + 1] << 8 |
3695+ mfc_context->vp8_state.mv_probs[i][j + 0] << 0);
3696+ }
3697+ }
3698+
3699+ OUT_BCS_BATCH(batch,
3700+ (pic_param->ref_lf_delta[3] & 0x7f) << 24 |
3701+ (pic_param->ref_lf_delta[2] & 0x7f) << 16 |
3702+ (pic_param->ref_lf_delta[1] & 0x7f) << 8 |
3703+ (pic_param->ref_lf_delta[0] & 0x7f) << 0);
3704+
3705+ OUT_BCS_BATCH(batch,
3706+ (pic_param->mode_lf_delta[3] & 0x7f) << 24 |
3707+ (pic_param->mode_lf_delta[2] & 0x7f) << 16 |
3708+ (pic_param->mode_lf_delta[1] & 0x7f) << 8 |
3709+ (pic_param->mode_lf_delta[0] & 0x7f) << 0);
3710+
3711+ OUT_BCS_BATCH(batch, 0);
3712+ OUT_BCS_BATCH(batch, 0);
3713+ OUT_BCS_BATCH(batch, 0);
3714+
3715+ ADVANCE_BCS_BATCH(batch);
3716+}
3717+
3718+#define OUT_VP8_BUFFER(bo, offset) \
3719+ if (bo) \
3720+ OUT_BCS_RELOC(batch, \
3721+ bo, \
3722+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, \
3723+ offset); \
3724+ else \
3725+ OUT_BCS_BATCH(batch, 0); \
3726+ OUT_BCS_BATCH(batch, 0); \
3727+ OUT_BCS_BATCH(batch, 0);
3728+
3729+static void
3730+gen8_mfc_vp8_bsp_buf_base_addr_state(VADriverContextP ctx,
3731+ struct encode_state *encode_state,
3732+ struct intel_encoder_context *encoder_context)
3733+{
3734+ struct intel_batchbuffer *batch = encoder_context->base.batch;
3735+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3736+
3737+ BEGIN_BCS_BATCH(batch, 32);
3738+ OUT_BCS_BATCH(batch, MFX_VP8_BSP_BUF_BASE_ADDR_STATE | (32 - 2));
3739+
3740+ OUT_VP8_BUFFER(mfc_context->vp8_state.frame_header_bo, 0);
3741+
3742+ OUT_VP8_BUFFER(mfc_context->vp8_state.intermediate_bo, 0);
3743+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[0]);
3744+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[1]);
3745+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[2]);
3746+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[3]);
3747+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[4]);
3748+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[5]);
3749+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[6]);
3750+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[7]);
3751+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_buffer_max_size);
3752+
3753+ OUT_VP8_BUFFER(mfc_context->vp8_state.final_frame_bo, I965_CODEDBUFFER_HEADER_SIZE);
3754+ OUT_BCS_BATCH(batch, 0);
3755+
3756+ OUT_VP8_BUFFER(mfc_context->vp8_state.stream_out_bo, 0);
3757+ OUT_VP8_BUFFER(mfc_context->vp8_state.coeff_probs_stream_in_bo, 0);
3758+ OUT_VP8_BUFFER(mfc_context->vp8_state.token_statistics_bo, 0);
3759+ OUT_VP8_BUFFER(mfc_context->vp8_state.mpc_row_store_bo, 0);
3760+
3761+ ADVANCE_BCS_BATCH(batch);
3762+}
3763+
3764+static void
3765+gen8_mfc_vp8_pipeline_picture_programing(VADriverContextP ctx,
3766+ struct encode_state *encode_state,
3767+ struct intel_encoder_context *encoder_context)
3768+{
3769+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3770+
3771+ mfc_context->pipe_mode_select(ctx, MFX_FORMAT_VP8, encoder_context);
3772+ mfc_context->set_surface_state(ctx, encoder_context);
3773+ mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
3774+ gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
3775+ gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
3776+ gen8_mfc_vp8_bsp_buf_base_addr_state(ctx, encode_state, encoder_context);
3777+ gen8_mfc_vp8_pic_state(ctx, encode_state,encoder_context);
3778+ gen8_mfc_vp8_encoder_cfg(ctx, encode_state, encoder_context);
3779+}
3780+
3781+static const unsigned char
3782+vp8_intra_mb_mode_map[VME_MB_INTRA_MODE_COUNT] = {
3783+ PAK_V_PRED,
3784+ PAK_H_PRED,
3785+ PAK_DC_PRED,
3786+ PAK_TM_PRED
3787+};
3788+
3789+static const unsigned char
3790+vp8_intra_block_mode_map[VME_B_INTRA_MODE_COUNT] = {
3791+ PAK_B_VE_PRED,
3792+ PAK_B_HE_PRED,
3793+ PAK_B_DC_PRED,
3794+ PAK_B_LD_PRED,
3795+ PAK_B_RD_PRED,
3796+ PAK_B_VR_PRED,
3797+ PAK_B_HD_PRED,
3798+ PAK_B_VL_PRED,
3799+ PAK_B_HU_PRED
3800+};
3801+
3802+static int inline gen8_mfc_vp8_intra_mb_mode_map(unsigned int vme_pred_mode, int is_luma_4x4)
3803+{
3804+ unsigned int i, pak_pred_mode = 0;
3805+ unsigned int vme_sub_blocks_pred_mode[8], pak_sub_blocks_pred_mode[8]; /* 8 blocks's intra mode */
3806+
3807+ if (!is_luma_4x4) {
3808+ pak_pred_mode = vp8_intra_mb_mode_map[vme_pred_mode & 0x3];
3809+ } else {
3810+ for (i = 0; i < 8; i++) {
3811+ vme_sub_blocks_pred_mode[i] = ((vme_pred_mode >> (4 * i)) & 0xf);
3812+ assert(vme_sub_blocks_pred_mode[i] < VME_B_INTRA_MODE_COUNT);
3813+ pak_sub_blocks_pred_mode[i] = vp8_intra_block_mode_map[vme_sub_blocks_pred_mode[i]];
3814+ pak_pred_mode |= (pak_sub_blocks_pred_mode[i] << (4 * i));
3815+ }
3816+ }
3817+
3818+ return pak_pred_mode;
3819+}
3820+static void
3821+gen8_mfc_vp8_pak_object_intra(VADriverContextP ctx,
3822+ struct intel_encoder_context *encoder_context,
3823+ unsigned int *msg,
3824+ int x, int y,
3825+ struct intel_batchbuffer *batch)
3826+{
3827+ unsigned int vme_intra_mb_mode, vme_chroma_pred_mode;
3828+ unsigned int pak_intra_mb_mode, pak_chroma_pred_mode;
3829+ unsigned int vme_luma_pred_mode[2], pak_luma_pred_mode[2];
3830+
3831+ if (batch == NULL)
3832+ batch = encoder_context->base.batch;
3833+
3834+ vme_intra_mb_mode = ((msg[0] & 0x30) >> 4);
3835+ assert((vme_intra_mb_mode == 0) || (vme_intra_mb_mode == 2)); //vp8 only support intra_16x16 and intra_4x4
3836+ pak_intra_mb_mode = (vme_intra_mb_mode >> 1);
3837+
3838+ vme_luma_pred_mode[0] = msg[1];
3839+ vme_luma_pred_mode[1] = msg[2];
3840+ vme_chroma_pred_mode = msg[3] & 0x3;
3841+
3842+ pak_luma_pred_mode[0] = gen8_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[0], pak_intra_mb_mode);
3843+ pak_luma_pred_mode[1] = gen8_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[1], pak_intra_mb_mode);
3844+ pak_chroma_pred_mode = gen8_mfc_vp8_intra_mb_mode_map(vme_chroma_pred_mode, 0);
3845+
3846+ BEGIN_BCS_BATCH(batch, 7);
3847+
3848+ OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
3849+ OUT_BCS_BATCH(batch, 0);
3850+ OUT_BCS_BATCH(batch, 0);
3851+ OUT_BCS_BATCH(batch,
3852+ (0 << 20) | /* mv format: intra mb */
3853+ (0 << 18) | /* Segment ID */
3854+ (0 << 17) | /* disable coeff clamp */
3855+ (1 << 13) | /* intra mb flag */
3856+ (0 << 11) | /* refer picture select: last frame */
3857+ (pak_intra_mb_mode << 8) | /* mb type */
3858+ (pak_chroma_pred_mode << 4) | /* mb uv mode */
3859+ (0 << 2) | /* skip mb flag: disable */
3860+ 0);
3861+
3862+ OUT_BCS_BATCH(batch, (y << 16) | x);
3863+ OUT_BCS_BATCH(batch, pak_luma_pred_mode[0]);
3864+ OUT_BCS_BATCH(batch, pak_luma_pred_mode[1]);
3865+
3866+ ADVANCE_BCS_BATCH(batch);
3867+}
3868+
3869+static void
3870+gen8_mfc_vp8_pak_object_inter(VADriverContextP ctx,
3871+ struct intel_encoder_context *encoder_context,
3872+ unsigned int *msg,
3873+ int offset,
3874+ int x, int y,
3875+ struct intel_batchbuffer *batch)
3876+{
3877+ int i;
3878+
3879+ if (batch == NULL)
3880+ batch = encoder_context->base.batch;
3881+
3882+ /* only support inter_16x16 now */
3883+ assert((msg[AVC_INTER_MSG_OFFSET] & INTER_MODE_MASK) == INTER_16X16);
3884+ /* for inter_16x16, all 16 MVs should be same,
3885+ * and move mv to the vme mb start address to make sure offset is 64 bytes aligned */
3886+ msg[0] = (msg[AVC_INTER_MV_OFFSET/4] & 0xfffefffe);
3887+ for (i = 1; i < 16; i++) {
3888+ msg[i] = msg[0];
3889+ }
3890+
3891+ BEGIN_BCS_BATCH(batch, 7);
3892+
3893+ OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
3894+ OUT_BCS_BATCH(batch,
3895+ (0 << 29) | /* enable inline mv data: disable */
3896+ 64);
3897+ OUT_BCS_BATCH(batch,
3898+ offset);
3899+ OUT_BCS_BATCH(batch,
3900+ (4 << 20) | /* mv format: inter */
3901+ (0 << 18) | /* Segment ID */
3902+ (0 << 17) | /* coeff clamp: disable */
3903+ (0 << 13) | /* intra mb flag: inter mb */
3904+ (0 << 11) | /* refer picture select: last frame */
3905+ (0 << 8) | /* mb type: 16x16 */
3906+ (0 << 4) | /* mb uv mode: dc_pred */
3907+ (0 << 2) | /* skip mb flag: disable */
3908+ 0);
3909+
3910+ OUT_BCS_BATCH(batch, (y << 16) | x);
3911+
3912+ /*new mv*/
3913+ OUT_BCS_BATCH(batch, 0x8);
3914+ OUT_BCS_BATCH(batch, 0x8);
3915+
3916+ ADVANCE_BCS_BATCH(batch);
3917+}
3918+
3919+static void
3920+gen8_mfc_vp8_pak_pipeline(VADriverContextP ctx,
3921+ struct encode_state *encode_state,
3922+ struct intel_encoder_context *encoder_context,
3923+ struct intel_batchbuffer *slice_batch)
3924+{
3925+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
3926+ VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3927+ VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3928+ int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
3929+ int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
3930+ unsigned int *msg = NULL;
3931+ unsigned char *msg_ptr = NULL;
3932+ unsigned int i, offset, is_intra_frame;
3933+
3934+ is_intra_frame = !pic_param->pic_flags.bits.frame_type;
3935+
3936+ dri_bo_map(vme_context->vme_output.bo , 1);
3937+ msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
3938+
3939+ for( i = 0; i < width_in_mbs * height_in_mbs; i++) {
3940+ int h_pos = i % width_in_mbs;
3941+ int v_pos = i / width_in_mbs;
3942+ msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
3943+
3944+ if (is_intra_frame) {
3945+ gen8_mfc_vp8_pak_object_intra(ctx,
3946+ encoder_context,
3947+ msg,
3948+ h_pos, v_pos,
3949+ slice_batch);
3950+ } else {
3951+ int inter_rdo, intra_rdo;
3952+ inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
3953+ intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
3954+
3955+ if (intra_rdo < inter_rdo) {
3956+ gen8_mfc_vp8_pak_object_intra(ctx,
3957+ encoder_context,
3958+ msg,
3959+ h_pos, v_pos,
3960+ slice_batch);
3961+ } else {
3962+ offset = i * vme_context->vme_output.size_block;
3963+ gen8_mfc_vp8_pak_object_inter(ctx,
3964+ encoder_context,
3965+ msg,
3966+ offset,
3967+ h_pos, v_pos,
3968+ slice_batch);
3969+ }
3970+ }
3971+ }
3972+
3973+ dri_bo_unmap(vme_context->vme_output.bo);
3974+}
3975+
3976+/*
3977+ * A batch buffer for vp8 pak object commands
3978+ */
3979+static dri_bo *
3980+gen8_mfc_vp8_software_batchbuffer(VADriverContextP ctx,
3981+ struct encode_state *encode_state,
3982+ struct intel_encoder_context *encoder_context)
3983+{
3984+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3985+ struct intel_batchbuffer *batch;
3986+ dri_bo *batch_bo;
3987+
3988+ batch = mfc_context->aux_batchbuffer;
3989+ batch_bo = batch->buffer;
3990+
3991+ gen8_mfc_vp8_pak_pipeline(ctx, encode_state, encoder_context, batch);
3992+
3993+ intel_batchbuffer_align(batch, 8);
3994+
3995+ BEGIN_BCS_BATCH(batch, 2);
3996+ OUT_BCS_BATCH(batch, 0);
3997+ OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
3998+ ADVANCE_BCS_BATCH(batch);
3999+
4000+ dri_bo_reference(batch_bo);
4001+ intel_batchbuffer_free(batch);
4002+ mfc_context->aux_batchbuffer = NULL;
4003+
4004+ return batch_bo;
4005+}
4006+
4007+static void
4008+gen8_mfc_vp8_pipeline_programing(VADriverContextP ctx,
4009+ struct encode_state *encode_state,
4010+ struct intel_encoder_context *encoder_context)
4011+{
4012+ struct intel_batchbuffer *batch = encoder_context->base.batch;
4013+ dri_bo *slice_batch_bo;
4014+
4015+ slice_batch_bo = gen8_mfc_vp8_software_batchbuffer(ctx, encode_state, encoder_context);
4016+
4017+ // begin programing
4018+ intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
4019+ intel_batchbuffer_emit_mi_flush(batch);
4020+
4021+ // picture level programing
4022+ gen8_mfc_vp8_pipeline_picture_programing(ctx, encode_state, encoder_context);
4023+
4024+ BEGIN_BCS_BATCH(batch, 4);
4025+ OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
4026+ OUT_BCS_RELOC(batch,
4027+ slice_batch_bo,
4028+ I915_GEM_DOMAIN_COMMAND, 0,
4029+ 0);
4030+ OUT_BCS_BATCH(batch, 0);
4031+ OUT_BCS_BATCH(batch, 0);
4032+ ADVANCE_BCS_BATCH(batch);
4033+
4034+ // end programing
4035+ intel_batchbuffer_end_atomic(batch);
4036+
4037+ dri_bo_unreference(slice_batch_bo);
4038+}
4039+
4040+static void gen8_mfc_calc_vp8_coded_buffer_size(VADriverContextP ctx,
4041+ struct encode_state *encode_state,
4042+ struct intel_encoder_context *encoder_context)
4043+{
4044+ struct i965_driver_data *i965 = i965_driver_data(ctx);
4045+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4046+ VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
4047+ unsigned char is_intra_frame = !pic_param->pic_flags.bits.frame_type;
4048+ unsigned int *vp8_encoding_status, i, first_partition_bytes, token_partition_bytes, vp8_coded_bytes;
4049+
4050+ int partition_num = 1 << pic_param->pic_flags.bits.num_token_partitions;
4051+
4052+ first_partition_bytes = token_partition_bytes = vp8_coded_bytes = 0;
4053+
4054+ dri_bo_map(mfc_context->vp8_state.token_statistics_bo, 0);
4055+
4056+ vp8_encoding_status = (unsigned int *)mfc_context->vp8_state.token_statistics_bo->virtual;
4057+ first_partition_bytes = (vp8_encoding_status[0] + 7) / 8;
4058+
4059+ for (i = 1; i <= partition_num; i++)
4060+ token_partition_bytes += (vp8_encoding_status[i] + 7) / 8;
4061+
4062+ /*coded_bytes includes P0~P8 partitions bytes + uncompresse date bytes + partion_size bytes in bitstream + 3 extra bytes */
4063+ /*it seems the last partition size in vp8 status buffer is smaller than reality. so add 3 extra bytes */
4064+ vp8_coded_bytes = first_partition_bytes + token_partition_bytes + (3 + 7 * !!is_intra_frame) + (partition_num - 1) * 3 + 3;
4065+
4066+ dri_bo_unmap(mfc_context->vp8_state.token_statistics_bo);
4067+
4068+ dri_bo_map(mfc_context->vp8_state.final_frame_bo, 0);
4069+ struct i965_coded_buffer_segment *coded_buffer_segment = (struct i965_coded_buffer_segment *)(mfc_context->vp8_state.final_frame_bo->virtual);
4070+ coded_buffer_segment->base.size = vp8_coded_bytes;
4071+ dri_bo_unmap(mfc_context->vp8_state.final_frame_bo);
4072+}
4073+
4074+static VAStatus
4075+gen8_mfc_vp8_encode_picture(VADriverContextP ctx,
4076+ struct encode_state *encode_state,
4077+ struct intel_encoder_context *encoder_context)
4078+{
4079+ gen8_mfc_vp8_init(ctx, encode_state, encoder_context);
4080+ intel_mfc_vp8_prepare(ctx, encode_state, encoder_context);
4081+ /*Programing bcs pipeline*/
4082+ gen8_mfc_vp8_pipeline_programing(ctx, encode_state, encoder_context);
4083+ gen8_mfc_run(ctx, encode_state, encoder_context);
4084+ gen8_mfc_calc_vp8_coded_buffer_size(ctx, encode_state, encoder_context);
4085+
4086+ return VA_STATUS_SUCCESS;
4087+}
32044088
32054089 static void
32064090 gen8_mfc_context_destroy(void *context)
@@ -3256,6 +4140,27 @@ gen8_mfc_context_destroy(void *context)
32564140
32574141 mfc_context->aux_batchbuffer = NULL;
32584142
4143+ dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
4144+ mfc_context->vp8_state.coeff_probs_stream_in_bo = NULL;
4145+
4146+ dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
4147+ mfc_context->vp8_state.final_frame_bo = NULL;
4148+
4149+ dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
4150+ mfc_context->vp8_state.frame_header_bo = NULL;
4151+
4152+ dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
4153+ mfc_context->vp8_state.intermediate_bo = NULL;
4154+
4155+ dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
4156+ mfc_context->vp8_state.mpc_row_store_bo = NULL;
4157+
4158+ dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
4159+ mfc_context->vp8_state.stream_out_bo = NULL;
4160+
4161+ dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
4162+ mfc_context->vp8_state.token_statistics_bo = NULL;
4163+
32594164 free(mfc_context);
32604165 }
32614166
@@ -3285,7 +4190,11 @@ static VAStatus gen8_mfc_pipeline(VADriverContextP ctx,
32854190 jpeg_init_default_qmatrix(ctx, encoder_context);
32864191 vaStatus = gen8_mfc_jpeg_encode_picture(ctx, encode_state, encoder_context);
32874192 break;
3288-
4193+
4194+ case VAProfileVP8Version0_3:
4195+ vaStatus = gen8_mfc_vp8_encode_picture(ctx, encode_state, encoder_context);
4196+ break;
4197+
32894198 default:
32904199 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
32914200 break;
--- a/src/gen8_vme.c
+++ b/src/gen8_vme.c
@@ -120,6 +120,31 @@ static struct i965_kernel gen8_vme_mpeg2_kernels[] = {
120120 },
121121 };
122122
123+static const uint32_t gen8_vme_vp8_intra_frame[][4] = {
124+#include "shaders/vme/vp8_intra_frame_gen8.g8b"
125+};
126+
127+static const uint32_t gen8_vme_vp8_inter_frame[][4] = {
128+#include "shaders/vme/vp8_inter_frame_gen8.g8b"
129+};
130+
131+static struct i965_kernel gen8_vme_vp8_kernels[] = {
132+ {
133+ "VME Intra Frame",
134+ VME_INTRA_SHADER, /*index*/
135+ gen8_vme_vp8_intra_frame,
136+ sizeof(gen8_vme_vp8_intra_frame),
137+ NULL
138+ },
139+ {
140+ "VME inter Frame",
141+ VME_INTER_SHADER,
142+ gen8_vme_vp8_inter_frame,
143+ sizeof(gen8_vme_vp8_inter_frame),
144+ NULL
145+ },
146+};
147+
123148 /* only used for VME source surface state */
124149 static void
125150 gen8_vme_source_surface_state(VADriverContextP ctx,
@@ -170,16 +195,14 @@ static void
170195 gen8_vme_output_buffer_setup(VADriverContextP ctx,
171196 struct encode_state *encode_state,
172197 int index,
173- struct intel_encoder_context *encoder_context)
198+ struct intel_encoder_context *encoder_context,
199+ int is_intra,
200+ int width_in_mbs,
201+ int height_in_mbs)
174202
175203 {
176204 struct i965_driver_data *i965 = i965_driver_data(ctx);
177205 struct gen6_vme_context *vme_context = encoder_context->vme_context;
178- VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
179- VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
180- int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
181- int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
182- int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
183206
184207 vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
185208 vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
@@ -194,7 +217,7 @@ gen8_vme_output_buffer_setup(VADriverContextP ctx,
194217 * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
195218 */
196219
197- vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
220+ vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
198221 "VME output buffer",
199222 vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
200223 0x1000);
@@ -207,32 +230,57 @@ gen8_vme_output_buffer_setup(VADriverContextP ctx,
207230 }
208231
209232 static void
233+gen8_vme_avc_output_buffer_setup(VADriverContextP ctx,
234+ struct encode_state *encode_state,
235+ int index,
236+ struct intel_encoder_context *encoder_context)
237+{
238+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
239+ VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
240+ int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
241+ int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
242+ int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
243+
244+ gen8_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
245+
246+}
247+
248+static void
210249 gen8_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
211250 struct encode_state *encode_state,
212251 int index,
213- struct intel_encoder_context *encoder_context)
214-
252+ struct intel_encoder_context *encoder_context,
253+ int width_in_mbs,
254+ int height_in_mbs)
215255 {
216256 struct i965_driver_data *i965 = i965_driver_data(ctx);
217257 struct gen6_vme_context *vme_context = encoder_context->vme_context;
218- VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
219- int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
220- int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
221258
222259 vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
223260 vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
224261 vme_context->vme_batchbuffer.pitch = 16;
225- vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
262+ vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
226263 "VME batchbuffer",
227264 vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
228265 0x1000);
229- /*
230266 vme_context->vme_buffer_suface_setup(ctx,
231267 &vme_context->gpe_context,
232268 &vme_context->vme_batchbuffer,
233269 BINDING_TABLE_OFFSET(index),
234270 SURFACE_STATE_OFFSET(index));
235- */
271+}
272+
273+static void
274+gen8_vme_avc_output_vme_batchbuffer_setup(VADriverContextP ctx,
275+ struct encode_state *encode_state,
276+ int index,
277+ struct intel_encoder_context *encoder_context)
278+{
279+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
280+ int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
281+ int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
282+
283+ gen8_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
236284 }
237285
238286 static VAStatus
@@ -264,8 +312,8 @@ gen8_vme_surface_setup(VADriverContextP ctx,
264312 }
265313
266314 /* VME output */
267- gen8_vme_output_buffer_setup(ctx, encode_state, 3, encoder_context);
268- gen8_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
315+ gen8_vme_avc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
316+ gen8_vme_avc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
269317
270318 return VA_STATUS_SUCCESS;
271319 }
@@ -724,37 +772,12 @@ gen8_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
724772 int index,
725773 int is_intra,
726774 struct intel_encoder_context *encoder_context)
727-
728775 {
729- struct i965_driver_data *i965 = i965_driver_data(ctx);
730- struct gen6_vme_context *vme_context = encoder_context->vme_context;
731776 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
732777 int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
733778 int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
734779
735- vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
736- vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
737-
738- if (is_intra)
739- vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
740- else
741- vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
742- /*
743- * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
744- * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
745- * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
746- */
747-
748- vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
749- "VME output buffer",
750- vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
751- 0x1000);
752- assert(vme_context->vme_output.bo);
753- vme_context->vme_buffer_suface_setup(ctx,
754- &vme_context->gpe_context,
755- &vme_context->vme_output,
756- BINDING_TABLE_OFFSET(index),
757- SURFACE_STATE_OFFSET(index));
780+ gen8_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
758781 }
759782
760783 static void
@@ -762,26 +785,12 @@ gen8_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
762785 struct encode_state *encode_state,
763786 int index,
764787 struct intel_encoder_context *encoder_context)
765-
766788 {
767- struct i965_driver_data *i965 = i965_driver_data(ctx);
768- struct gen6_vme_context *vme_context = encoder_context->vme_context;
769789 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
770790 int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
771791 int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
772792
773- vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
774- vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
775- vme_context->vme_batchbuffer.pitch = 16;
776- vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
777- "VME batchbuffer",
778- vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
779- 0x1000);
780- vme_context->vme_buffer_suface_setup(ctx,
781- &vme_context->gpe_context,
782- &vme_context->vme_batchbuffer,
783- BINDING_TABLE_OFFSET(index),
784- SURFACE_STATE_OFFSET(index));
793+ gen8_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
785794 }
786795
787796 static VAStatus
@@ -1130,6 +1139,139 @@ gen8_vme_mpeg2_pipeline(VADriverContextP ctx,
11301139 }
11311140
11321141 static void
1142+gen8_vme_vp8_output_buffer_setup(VADriverContextP ctx,
1143+ struct encode_state *encode_state,
1144+ int index,
1145+ int is_intra,
1146+ struct intel_encoder_context *encoder_context)
1147+{
1148+ VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1149+ int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1150+ int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1151+
1152+ gen8_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
1153+}
1154+
1155+static void
1156+gen8_vme_vp8_output_vme_batchbuffer_setup(VADriverContextP ctx,
1157+ struct encode_state *encode_state,
1158+ int index,
1159+ struct intel_encoder_context *encoder_context)
1160+{
1161+ VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1162+ int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1163+ int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1164+
1165+ gen8_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
1166+}
1167+
1168+static VAStatus
1169+gen8_vme_vp8_surface_setup(VADriverContextP ctx,
1170+ struct encode_state *encode_state,
1171+ int is_intra,
1172+ struct intel_encoder_context *encoder_context)
1173+{
1174+ struct object_surface *obj_surface;
1175+
1176+ /*Setup surfaces state*/
1177+ /* current picture for encoding */
1178+ obj_surface = encode_state->input_yuv_object;
1179+ gen8_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
1180+ gen8_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
1181+ gen8_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
1182+
1183+ if (!is_intra) {
1184+ /* reference 0 */
1185+ obj_surface = encode_state->reference_objects[0];
1186+
1187+ if (obj_surface->bo != NULL)
1188+ gen8_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
1189+
1190+ /* reference 1 */
1191+ obj_surface = encode_state->reference_objects[1];
1192+
1193+ if (obj_surface && obj_surface->bo != NULL)
1194+ gen8_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
1195+ }
1196+
1197+ /* VME output */
1198+ gen8_vme_vp8_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
1199+ gen8_vme_vp8_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
1200+
1201+ return VA_STATUS_SUCCESS;
1202+}
1203+
1204+static void
1205+gen8_vme_vp8_pipeline_programing(VADriverContextP ctx,
1206+ struct encode_state *encode_state,
1207+ int is_intra,
1208+ struct intel_encoder_context *encoder_context)
1209+{
1210+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
1211+ struct intel_batchbuffer *batch = encoder_context->base.batch;
1212+ VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1213+ int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1214+ int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1215+ int kernel_shader = (is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER);
1216+
1217+ gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1218+ encode_state,
1219+ width_in_mbs, height_in_mbs,
1220+ kernel_shader,
1221+ encoder_context);
1222+
1223+ intel_batchbuffer_start_atomic(batch, 0x1000);
1224+ gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1225+ BEGIN_BATCH(batch, 4);
1226+ OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1227+ OUT_RELOC(batch,
1228+ vme_context->vme_batchbuffer.bo,
1229+ I915_GEM_DOMAIN_COMMAND, 0,
1230+ 0);
1231+ OUT_BATCH(batch, 0);
1232+ OUT_BATCH(batch, 0);
1233+ ADVANCE_BATCH(batch);
1234+
1235+ intel_batchbuffer_end_atomic(batch);
1236+}
1237+
1238+static VAStatus gen8_vme_vp8_prepare(VADriverContextP ctx,
1239+ struct encode_state *encode_state,
1240+ struct intel_encoder_context *encoder_context)
1241+{
1242+ VAStatus vaStatus = VA_STATUS_SUCCESS;
1243+ VAEncPictureParameterBufferVP8 *pPicParameter = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
1244+ int is_intra = !pPicParameter->pic_flags.bits.frame_type;
1245+
1246+ /* update vp8 mbmv cost */
1247+ intel_vme_vp8_update_mbmv_cost(ctx, encode_state, encoder_context);
1248+
1249+ /*Setup all the memory object*/
1250+ gen8_vme_vp8_surface_setup(ctx, encode_state, is_intra, encoder_context);
1251+ gen8_vme_interface_setup(ctx, encode_state, encoder_context);
1252+ gen8_vme_constant_setup(ctx, encode_state, encoder_context);
1253+
1254+ /*Programing media pipeline*/
1255+ gen8_vme_vp8_pipeline_programing(ctx, encode_state, is_intra, encoder_context);
1256+
1257+ return vaStatus;
1258+}
1259+
1260+static VAStatus
1261+gen8_vme_vp8_pipeline(VADriverContextP ctx,
1262+ VAProfile profile,
1263+ struct encode_state *encode_state,
1264+ struct intel_encoder_context *encoder_context)
1265+{
1266+ gen8_vme_media_init(ctx, encoder_context);
1267+ gen8_vme_vp8_prepare(ctx, encode_state, encoder_context);
1268+ gen8_vme_run(ctx, encode_state, encoder_context);
1269+ gen8_vme_stop(ctx, encode_state, encoder_context);
1270+
1271+ return VA_STATUS_SUCCESS;
1272+}
1273+
1274+static void
11331275 gen8_vme_context_destroy(void *context)
11341276 {
11351277 struct gen6_vme_context *vme_context = context;
@@ -1180,6 +1322,12 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
11801322 encoder_context->vme_context_destroy = NULL;
11811323 break;
11821324
1325+ case CODEC_VP8:
1326+ vme_kernel_list = gen8_vme_vp8_kernels;
1327+ encoder_context->vme_pipeline = gen8_vme_vp8_pipeline;
1328+ i965_kernel_num = sizeof(gen8_vme_vp8_kernels) / sizeof(struct i965_kernel);
1329+ break;
1330+
11831331 default:
11841332 /* never get here */
11851333 assert(0);
--- a/src/gen9_mfc.c
+++ b/src/gen9_mfc.c
@@ -42,7 +42,6 @@
4242 #include "gen6_mfc.h"
4343 #include "gen6_vme.h"
4444 #include "intel_media.h"
45-#include "vp8_probs.h"
4645
4746 #define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8
4847 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
@@ -177,17 +176,8 @@ gen9_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
177176
178177 vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks;
179178
180- /* the DW4-5 is the MFX upper bound */
181- if (encoder_context->codec == CODEC_VP8) {
182- OUT_BCS_RELOC(batch,
183- mfc_context->mfc_indirect_pak_bse_object.bo,
184- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
185- mfc_context->mfc_indirect_pak_bse_object.end_offset);
186- OUT_BCS_BATCH(batch, 0);
187- } else {
188- OUT_BCS_BATCH(batch, 0);
189- OUT_BCS_BATCH(batch, 0);
190- }
179+ OUT_BCS_BATCH(batch, 0);
180+ OUT_BCS_BATCH(batch, 0);
191181
192182 /* the DW6-10 is for MFX Indirect MV Object Base Address */
193183 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
@@ -2366,873 +2356,6 @@ gen9_mfc_mpeg2_encode_picture(VADriverContextP ctx,
23662356 return VA_STATUS_SUCCESS;
23672357 }
23682358
2369-static void vp8_enc_state_init(struct gen6_mfc_context *mfc_context,
2370- VAEncPictureParameterBufferVP8 *pic_param,
2371- VAQMatrixBufferVP8 *q_matrix)
2372-{
2373-
2374- int is_key_frame = !pic_param->pic_flags.bits.frame_type;
2375- unsigned char *coeff_probs_stream_in_buffer;
2376-
2377- mfc_context->vp8_state.frame_header_lf_update_pos = 0;
2378- mfc_context->vp8_state.frame_header_qindex_update_pos = 0;
2379- mfc_context->vp8_state.frame_header_token_update_pos = 0;
2380- mfc_context->vp8_state.frame_header_bin_mv_upate_pos = 0;
2381-
2382- mfc_context->vp8_state.prob_skip_false = 255;
2383- memset(mfc_context->vp8_state.mb_segment_tree_probs, 0, sizeof(mfc_context->vp8_state.mb_segment_tree_probs));
2384- memcpy(mfc_context->vp8_state.mv_probs, vp8_default_mv_context, sizeof(mfc_context->vp8_state.mv_probs));
2385-
2386- if (is_key_frame) {
2387- memcpy(mfc_context->vp8_state.y_mode_probs, vp8_kf_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
2388- memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_kf_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
2389-
2390- mfc_context->vp8_state.prob_intra = 255;
2391- mfc_context->vp8_state.prob_last = 128;
2392- mfc_context->vp8_state.prob_gf = 128;
2393- } else {
2394- memcpy(mfc_context->vp8_state.y_mode_probs, vp8_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
2395- memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
2396-
2397- mfc_context->vp8_state.prob_intra = 63;
2398- mfc_context->vp8_state.prob_last = 128;
2399- mfc_context->vp8_state.prob_gf = 128;
2400- }
2401-
2402- mfc_context->vp8_state.prob_skip_false = vp8_base_skip_false_prob[q_matrix->quantization_index[0]];
2403-
2404- dri_bo_map(mfc_context->vp8_state.coeff_probs_stream_in_bo, 1);
2405- coeff_probs_stream_in_buffer = (unsigned char *)mfc_context->vp8_state.coeff_probs_stream_in_bo->virtual;
2406- assert(coeff_probs_stream_in_buffer);
2407- memcpy(coeff_probs_stream_in_buffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs));
2408- dri_bo_unmap(mfc_context->vp8_state.coeff_probs_stream_in_bo);
2409-}
2410-
2411-static void vp8_enc_state_update(struct gen6_mfc_context *mfc_context,
2412- VAQMatrixBufferVP8 *q_matrix)
2413-{
2414-
2415- /*some other probabilities need to be updated*/
2416-}
2417-
2418-extern void binarize_vp8_frame_header(VAEncSequenceParameterBufferVP8 *seq_param,
2419- VAEncPictureParameterBufferVP8 *pic_param,
2420- VAQMatrixBufferVP8 *q_matrix,
2421- struct gen6_mfc_context *mfc_context);
2422-
2423-static void vp8_enc_frame_header_binarize(struct encode_state *encode_state,
2424- struct gen6_mfc_context *mfc_context)
2425-{
2426- VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
2427- VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2428- VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
2429- unsigned char *frame_header_buffer;
2430-
2431- binarize_vp8_frame_header(seq_param, pic_param, q_matrix, mfc_context);
2432-
2433- dri_bo_map(mfc_context->vp8_state.frame_header_bo, 1);
2434- frame_header_buffer = (unsigned char *)mfc_context->vp8_state.frame_header_bo->virtual;
2435- assert(frame_header_buffer);
2436- memcpy(frame_header_buffer, mfc_context->vp8_state.vp8_frame_header, (mfc_context->vp8_state.frame_header_bit_count + 7) / 8);
2437- dri_bo_unmap(mfc_context->vp8_state.frame_header_bo);
2438-}
2439-
2440-#define MAX_VP8_FRAME_HEADER_SIZE 0x2000
2441-#define VP8_TOKEN_STATISTICS_BUFFER_SIZE 0x2000
2442-
2443-static void gen9_mfc_vp8_init(VADriverContextP ctx,
2444- struct encode_state *encode_state,
2445- struct intel_encoder_context *encoder_context)
2446-{
2447- struct i965_driver_data *i965 = i965_driver_data(ctx);
2448- struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2449- dri_bo *bo;
2450- int i;
2451- int width_in_mbs = 0;
2452- int height_in_mbs = 0;
2453- int slice_batchbuffer_size;
2454-
2455- VAEncSequenceParameterBufferVP8 *pSequenceParameter = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
2456- VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2457- VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
2458-
2459- width_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
2460- height_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
2461-
2462- slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
2463- (SLICE_HEADER + SLICE_TAIL);
2464-
2465- /*Encode common setup for MFC*/
2466- dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2467- mfc_context->post_deblocking_output.bo = NULL;
2468-
2469- dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2470- mfc_context->pre_deblocking_output.bo = NULL;
2471-
2472- dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2473- mfc_context->uncompressed_picture_source.bo = NULL;
2474-
2475- dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
2476- mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2477-
2478- for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2479- if ( mfc_context->direct_mv_buffers[i].bo != NULL)
2480- dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2481- mfc_context->direct_mv_buffers[i].bo = NULL;
2482- }
2483-
2484- for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2485- if (mfc_context->reference_surfaces[i].bo != NULL)
2486- dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2487- mfc_context->reference_surfaces[i].bo = NULL;
2488- }
2489-
2490- dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2491- bo = dri_bo_alloc(i965->intel.bufmgr,
2492- "Buffer",
2493- width_in_mbs * 64,
2494- 64);
2495- assert(bo);
2496- mfc_context->intra_row_store_scratch_buffer.bo = bo;
2497-
2498- dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2499- bo = dri_bo_alloc(i965->intel.bufmgr,
2500- "Buffer",
2501- width_in_mbs * height_in_mbs * 16,
2502- 64);
2503- assert(bo);
2504- mfc_context->macroblock_status_buffer.bo = bo;
2505-
2506- dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2507- bo = dri_bo_alloc(i965->intel.bufmgr,
2508- "Buffer",
2509- 4 * width_in_mbs * 64, /* 4 * width_in_mbs * 64 */
2510- 64);
2511- assert(bo);
2512- mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2513-
2514- dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2515- bo = dri_bo_alloc(i965->intel.bufmgr,
2516- "Buffer",
2517- 2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
2518- 0x1000);
2519- assert(bo);
2520- mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2521-
2522- dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2523- mfc_context->mfc_batchbuffer_surface.bo = NULL;
2524-
2525- dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2526- mfc_context->aux_batchbuffer_surface.bo = NULL;
2527-
2528- if (mfc_context->aux_batchbuffer)
2529- intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2530-
2531- mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
2532- mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
2533- dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
2534- mfc_context->aux_batchbuffer_surface.pitch = 16;
2535- mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
2536- mfc_context->aux_batchbuffer_surface.size_block = 16;
2537-
2538- i965_gpe_context_init(ctx, &mfc_context->gpe_context);
2539-
2540- /* alloc vp8 encoding buffers*/
2541- dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
2542- bo = dri_bo_alloc(i965->intel.bufmgr,
2543- "Buffer",
2544- MAX_VP8_FRAME_HEADER_SIZE,
2545- 0x1000);
2546- assert(bo);
2547- mfc_context->vp8_state.frame_header_bo = bo;
2548-
2549- mfc_context->vp8_state.intermediate_buffer_max_size = width_in_mbs * height_in_mbs * 256 * 9;
2550- for(i = 0; i < 8; i++) {
2551- mfc_context->vp8_state.intermediate_partition_offset[i] = width_in_mbs * height_in_mbs * 256 * (i + 1);
2552- }
2553- dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
2554- bo = dri_bo_alloc(i965->intel.bufmgr,
2555- "Buffer",
2556- mfc_context->vp8_state.intermediate_buffer_max_size,
2557- 0x1000);
2558- assert(bo);
2559- mfc_context->vp8_state.intermediate_bo = bo;
2560-
2561- dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
2562- bo = dri_bo_alloc(i965->intel.bufmgr,
2563- "Buffer",
2564- width_in_mbs * height_in_mbs * 16,
2565- 0x1000);
2566- assert(bo);
2567- mfc_context->vp8_state.stream_out_bo = bo;
2568-
2569- dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
2570- bo = dri_bo_alloc(i965->intel.bufmgr,
2571- "Buffer",
2572- sizeof(vp8_default_coef_probs),
2573- 0x1000);
2574- assert(bo);
2575- mfc_context->vp8_state.coeff_probs_stream_in_bo = bo;
2576-
2577- dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
2578- bo = dri_bo_alloc(i965->intel.bufmgr,
2579- "Buffer",
2580- VP8_TOKEN_STATISTICS_BUFFER_SIZE,
2581- 0x1000);
2582- assert(bo);
2583- mfc_context->vp8_state.token_statistics_bo = bo;
2584-
2585- dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
2586- bo = dri_bo_alloc(i965->intel.bufmgr,
2587- "Buffer",
2588- width_in_mbs * 16 * 64,
2589- 0x1000);
2590- assert(bo);
2591- mfc_context->vp8_state.mpc_row_store_bo = bo;
2592-
2593- vp8_enc_state_init(mfc_context, pic_param, q_matrix);
2594- vp8_enc_frame_header_binarize(encode_state, mfc_context);
2595-}
2596-
2597-static VAStatus
2598-intel_mfc_vp8_prepare(VADriverContextP ctx,
2599- struct encode_state *encode_state,
2600- struct intel_encoder_context *encoder_context)
2601-{
2602- struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2603- struct object_surface *obj_surface;
2604- struct object_buffer *obj_buffer;
2605- struct i965_coded_buffer_segment *coded_buffer_segment;
2606- VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2607- VAStatus vaStatus = VA_STATUS_SUCCESS;
2608- dri_bo *bo;
2609- int i;
2610-
2611- /* reconstructed surface */
2612- obj_surface = encode_state->reconstructed_object;
2613- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2614- if (pic_param->loop_filter_level[0] == 0) {
2615- mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2616- dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2617- } else {
2618- mfc_context->post_deblocking_output.bo = obj_surface->bo;
2619- dri_bo_reference(mfc_context->post_deblocking_output.bo);
2620- }
2621-
2622- mfc_context->surface_state.width = obj_surface->orig_width;
2623- mfc_context->surface_state.height = obj_surface->orig_height;
2624- mfc_context->surface_state.w_pitch = obj_surface->width;
2625- mfc_context->surface_state.h_pitch = obj_surface->height;
2626-
2627- /* set vp8 reference frames */
2628- for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2629- obj_surface = encode_state->reference_objects[i];
2630-
2631- if (obj_surface && obj_surface->bo) {
2632- mfc_context->reference_surfaces[i].bo = obj_surface->bo;
2633- dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2634- } else {
2635- mfc_context->reference_surfaces[i].bo = NULL;
2636- }
2637- }
2638-
2639- /* input YUV surface */
2640- obj_surface = encode_state->input_yuv_object;
2641- mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2642- dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2643-
2644- /* coded buffer */
2645- obj_buffer = encode_state->coded_buf_object;
2646- bo = obj_buffer->buffer_store->bo;
2647- mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2648- mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2649- mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2650- dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2651-
2652- dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
2653- mfc_context->vp8_state.final_frame_bo = mfc_context->mfc_indirect_pak_bse_object.bo;
2654- mfc_context->vp8_state.final_frame_byte_offset = I965_CODEDBUFFER_HEADER_SIZE;
2655- dri_bo_reference(mfc_context->vp8_state.final_frame_bo);
2656-
2657- /* set the internal flag to 0 to indicate the coded size is unknown */
2658- dri_bo_map(bo, 1);
2659- coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2660- coded_buffer_segment->mapped = 0;
2661- coded_buffer_segment->codec = encoder_context->codec;
2662- dri_bo_unmap(bo);
2663-
2664- return vaStatus;
2665-}
2666-
2667-static void
2668-gen9_mfc_vp8_encoder_cfg(VADriverContextP ctx,
2669- struct encode_state *encode_state,
2670- struct intel_encoder_context *encoder_context)
2671-{
2672- struct intel_batchbuffer *batch = encoder_context->base.batch;
2673- struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2674- VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
2675- VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2676-
2677- BEGIN_BCS_BATCH(batch, 30);
2678- OUT_BCS_BATCH(batch, MFX_VP8_ENCODER_CFG | (30 - 2)); /* SKL should be 31-2 ? */
2679-
2680- OUT_BCS_BATCH(batch,
2681- 0 << 9 | /* compressed bitstream output disable */
2682- 1 << 7 | /* disable per-segment delta qindex and loop filter in RC */
2683- 1 << 6 | /* RC initial pass */
2684- 0 << 4 | /* upate segment feature date flag */
2685- 1 << 3 | /* bitstream statistics output enable */
2686- 1 << 2 | /* token statistics output enable */
2687- 0 << 1 | /* final bitstream output disable */
2688- 0 << 0); /*DW1*/
2689-
2690- OUT_BCS_BATCH(batch, 0); /*DW2*/
2691-
2692- OUT_BCS_BATCH(batch,
2693- 0xfff << 16 | /* max intra mb bit count limit */
2694- 0xfff << 0 /* max inter mb bit count limit */
2695- ); /*DW3*/
2696-
2697- OUT_BCS_BATCH(batch, 0); /*DW4*/
2698- OUT_BCS_BATCH(batch, 0); /*DW5*/
2699- OUT_BCS_BATCH(batch, 0); /*DW6*/
2700- OUT_BCS_BATCH(batch, 0); /*DW7*/
2701- OUT_BCS_BATCH(batch, 0); /*DW8*/
2702- OUT_BCS_BATCH(batch, 0); /*DW9*/
2703- OUT_BCS_BATCH(batch, 0); /*DW10*/
2704- OUT_BCS_BATCH(batch, 0); /*DW11*/
2705- OUT_BCS_BATCH(batch, 0); /*DW12*/
2706- OUT_BCS_BATCH(batch, 0); /*DW13*/
2707- OUT_BCS_BATCH(batch, 0); /*DW14*/
2708- OUT_BCS_BATCH(batch, 0); /*DW15*/
2709- OUT_BCS_BATCH(batch, 0); /*DW16*/
2710- OUT_BCS_BATCH(batch, 0); /*DW17*/
2711- OUT_BCS_BATCH(batch, 0); /*DW18*/
2712- OUT_BCS_BATCH(batch, 0); /*DW19*/
2713- OUT_BCS_BATCH(batch, 0); /*DW20*/
2714- OUT_BCS_BATCH(batch, 0); /*DW21*/
2715-
2716- OUT_BCS_BATCH(batch,
2717- pic_param->pic_flags.bits.show_frame << 23 |
2718- pic_param->pic_flags.bits.version << 20
2719- ); /*DW22*/
2720-
2721- OUT_BCS_BATCH(batch,
2722- (seq_param->frame_height_scale << 14 | seq_param->frame_height) << 16 |
2723- (seq_param->frame_width_scale << 14 | seq_param->frame_width) << 0
2724- );
2725-
2726- /*DW24*/
2727- OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bit_count); /* frame header bit count */
2728-
2729- /*DW25*/
2730- OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_qindex_update_pos); /* frame header bin buffer qindex update pointer */
2731-
2732- /*DW26*/
2733- OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_lf_update_pos); /* frame header bin buffer loop filter update pointer*/
2734-
2735- /*DW27*/
2736- OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_token_update_pos); /* frame header bin buffer token update pointer */
2737-
2738- /*DW28*/
2739- OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bin_mv_upate_pos); /*frame header bin buffer mv update pointer */
2740-
2741- /*DW29*/
2742- OUT_BCS_BATCH(batch, 0);
2743-
2744- ADVANCE_BCS_BATCH(batch);
2745-}
2746-
2747-static void
2748-gen9_mfc_vp8_pic_state(VADriverContextP ctx,
2749- struct encode_state *encode_state,
2750- struct intel_encoder_context *encoder_context)
2751-{
2752- struct intel_batchbuffer *batch = encoder_context->base.batch;
2753- struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2754- VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
2755- VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2756- VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
2757- int i, j, log2num;
2758-
2759- assert(pic_param->pic_flags.bits.num_token_partitions > 0);
2760- assert(pic_param->pic_flags.bits.num_token_partitions < 9);
2761- log2num = (int)log2(pic_param->pic_flags.bits.num_token_partitions);
2762-
2763- /*update mode and token probs*/
2764- vp8_enc_state_update(mfc_context, q_matrix);
2765-
2766- BEGIN_BCS_BATCH(batch, 38);
2767- OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2768- OUT_BCS_BATCH(batch,
2769- (ALIGN(seq_param->frame_height, 16) / 16 - 1) << 16 |
2770- (ALIGN(seq_param->frame_width, 16) / 16 - 1) << 0);
2771-
2772- OUT_BCS_BATCH(batch,
2773- log2num << 24 |
2774- pic_param->sharpness_level << 16 |
2775- pic_param->pic_flags.bits.sign_bias_alternate << 13 |
2776- pic_param->pic_flags.bits.sign_bias_golden << 12 |
2777- pic_param->pic_flags.bits.loop_filter_adj_enable << 11 |
2778- pic_param->pic_flags.bits.mb_no_coeff_skip << 10 |
2779- pic_param->pic_flags.bits.update_mb_segmentation_map << 9 |
2780- pic_param->pic_flags.bits.segmentation_enabled << 8 |
2781- !pic_param->pic_flags.bits.frame_type << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2782- (pic_param->pic_flags.bits.version / 2) << 4 |
2783- (pic_param->pic_flags.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2784- !!pic_param->pic_flags.bits.version << 0); /* version 0: 6 tap */
2785-
2786- OUT_BCS_BATCH(batch,
2787- pic_param->loop_filter_level[3] << 24 |
2788- pic_param->loop_filter_level[2] << 16 |
2789- pic_param->loop_filter_level[1] << 8 |
2790- pic_param->loop_filter_level[0] << 0);
2791-
2792- OUT_BCS_BATCH(batch,
2793- q_matrix->quantization_index[3] << 24 |
2794- q_matrix->quantization_index[2] << 16 |
2795- q_matrix->quantization_index[1] << 8 |
2796- q_matrix->quantization_index[0] << 0);
2797-
2798- OUT_BCS_BATCH(batch,
2799- ((unsigned short)(q_matrix->quantization_index_delta[4]) >> 15) << 28 |
2800- abs(q_matrix->quantization_index_delta[4]) << 24 |
2801- ((unsigned short)(q_matrix->quantization_index_delta[3]) >> 15) << 20 |
2802- abs(q_matrix->quantization_index_delta[3]) << 16 |
2803- ((unsigned short)(q_matrix->quantization_index_delta[2]) >> 15) << 12 |
2804- abs(q_matrix->quantization_index_delta[2]) << 8 |
2805- ((unsigned short)(q_matrix->quantization_index_delta[1]) >> 15) << 4 |
2806- abs(q_matrix->quantization_index_delta[1]) << 0);
2807-
2808- OUT_BCS_BATCH(batch,
2809- ((unsigned short)(q_matrix->quantization_index_delta[0]) >> 15) << 4 |
2810- abs(q_matrix->quantization_index_delta[0]) << 0);
2811-
2812- OUT_BCS_BATCH(batch,
2813- pic_param->clamp_qindex_high << 8 |
2814- pic_param->clamp_qindex_low << 0);
2815-
2816- for (i = 8; i < 19; i++) {
2817- OUT_BCS_BATCH(batch, 0xffffffff);
2818- }
2819-
2820- OUT_BCS_BATCH(batch,
2821- mfc_context->vp8_state.mb_segment_tree_probs[2] << 16 |
2822- mfc_context->vp8_state.mb_segment_tree_probs[1] << 8 |
2823- mfc_context->vp8_state.mb_segment_tree_probs[0] << 0);
2824-
2825- OUT_BCS_BATCH(batch,
2826- mfc_context->vp8_state.prob_skip_false << 24 |
2827- mfc_context->vp8_state.prob_intra << 16 |
2828- mfc_context->vp8_state.prob_last << 8 |
2829- mfc_context->vp8_state.prob_gf << 0);
2830-
2831- OUT_BCS_BATCH(batch,
2832- mfc_context->vp8_state.y_mode_probs[3] << 24 |
2833- mfc_context->vp8_state.y_mode_probs[2] << 16 |
2834- mfc_context->vp8_state.y_mode_probs[1] << 8 |
2835- mfc_context->vp8_state.y_mode_probs[0] << 0);
2836-
2837- OUT_BCS_BATCH(batch,
2838- mfc_context->vp8_state.uv_mode_probs[2] << 16 |
2839- mfc_context->vp8_state.uv_mode_probs[1] << 8 |
2840- mfc_context->vp8_state.uv_mode_probs[0] << 0);
2841-
2842- /* MV update value, DW23-DW32 */
2843- for (i = 0; i < 2; i++) {
2844- for (j = 0; j < 20; j += 4) {
2845- OUT_BCS_BATCH(batch,
2846- (j + 3 == 19 ? 0 : mfc_context->vp8_state.mv_probs[i][j + 3]) << 24 |
2847- mfc_context->vp8_state.mv_probs[i][j + 2] << 16 |
2848- mfc_context->vp8_state.mv_probs[i][j + 1] << 8 |
2849- mfc_context->vp8_state.mv_probs[i][j + 0] << 0);
2850- }
2851- }
2852-
2853- OUT_BCS_BATCH(batch,
2854- (pic_param->ref_lf_delta[3] & 0x7f) << 24 |
2855- (pic_param->ref_lf_delta[2] & 0x7f) << 16 |
2856- (pic_param->ref_lf_delta[1] & 0x7f) << 8 |
2857- (pic_param->ref_lf_delta[0] & 0x7f) << 0);
2858-
2859- OUT_BCS_BATCH(batch,
2860- (pic_param->mode_lf_delta[3] & 0x7f) << 24 |
2861- (pic_param->mode_lf_delta[2] & 0x7f) << 16 |
2862- (pic_param->mode_lf_delta[1] & 0x7f) << 8 |
2863- (pic_param->mode_lf_delta[0] & 0x7f) << 0);
2864-
2865- OUT_BCS_BATCH(batch, 0);
2866- OUT_BCS_BATCH(batch, 0);
2867- OUT_BCS_BATCH(batch, 0);
2868-
2869- ADVANCE_BCS_BATCH(batch);
2870-}
2871-
2872-#define OUT_VP8_BUFFER(bo, offset) \
2873- if (bo) \
2874- OUT_BCS_RELOC(batch, \
2875- bo, \
2876- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, \
2877- offset); \
2878- else \
2879- OUT_BCS_BATCH(batch, 0); \
2880- OUT_BCS_BATCH(batch, 0); \
2881- OUT_BCS_BATCH(batch, 0);
2882-
2883-static void
2884-gen9_mfc_vp8_bsp_buf_base_addr_state(VADriverContextP ctx,
2885- struct encode_state *encode_state,
2886- struct intel_encoder_context *encoder_context)
2887-{
2888- struct intel_batchbuffer *batch = encoder_context->base.batch;
2889- struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2890-
2891- BEGIN_BCS_BATCH(batch, 32);
2892- OUT_BCS_BATCH(batch, MFX_VP8_BSP_BUF_BASE_ADDR_STATE | (32 - 2));
2893-
2894- OUT_VP8_BUFFER(mfc_context->vp8_state.frame_header_bo, 0);
2895-
2896- OUT_VP8_BUFFER(mfc_context->vp8_state.intermediate_bo, 0);
2897- OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[0]);
2898- OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[1]);
2899- OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[2]);
2900- OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[3]);
2901- OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[4]);
2902- OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[5]);
2903- OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[6]);
2904- OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[7]);
2905- OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_buffer_max_size);
2906-
2907- OUT_VP8_BUFFER(mfc_context->vp8_state.final_frame_bo, I965_CODEDBUFFER_HEADER_SIZE);
2908- OUT_BCS_BATCH(batch, 0);
2909-
2910- OUT_VP8_BUFFER(mfc_context->vp8_state.stream_out_bo, 0);
2911- OUT_VP8_BUFFER(mfc_context->vp8_state.coeff_probs_stream_in_bo, 0);
2912- OUT_VP8_BUFFER(mfc_context->vp8_state.token_statistics_bo, 0);
2913- OUT_VP8_BUFFER(mfc_context->vp8_state.mpc_row_store_bo, 0);
2914-
2915- ADVANCE_BCS_BATCH(batch);
2916-}
2917-
2918-static void
2919-gen9_mfc_vp8_pipeline_picture_programing(VADriverContextP ctx,
2920- struct encode_state *encode_state,
2921- struct intel_encoder_context *encoder_context)
2922-{
2923- struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2924-
2925- mfc_context->pipe_mode_select(ctx, MFX_FORMAT_VP8, encoder_context);
2926- mfc_context->set_surface_state(ctx, encoder_context);
2927- mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2928- gen9_mfc_pipe_buf_addr_state(ctx, encoder_context);
2929- gen9_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2930- gen9_mfc_vp8_bsp_buf_base_addr_state(ctx, encode_state, encoder_context);
2931- gen9_mfc_vp8_pic_state(ctx, encode_state,encoder_context);
2932- gen9_mfc_vp8_encoder_cfg(ctx, encode_state, encoder_context);
2933-}
2934-
2935-static const unsigned char
2936-vp8_intra_mb_mode_map[VME_MB_INTRA_MODE_COUNT] = {
2937- PAK_V_PRED,
2938- PAK_H_PRED,
2939- PAK_DC_PRED,
2940- PAK_TM_PRED
2941-};
2942-
2943-static const unsigned char
2944-vp8_intra_block_mode_map[VME_B_INTRA_MODE_COUNT] = {
2945- PAK_B_VE_PRED,
2946- PAK_B_HE_PRED,
2947- PAK_B_DC_PRED,
2948- PAK_B_LD_PRED,
2949- PAK_B_RD_PRED,
2950- PAK_B_VR_PRED,
2951- PAK_B_HD_PRED,
2952- PAK_B_VL_PRED,
2953- PAK_B_HU_PRED
2954-};
2955-
2956-static int inline gen9_mfc_vp8_intra_mb_mode_map(unsigned int vme_pred_mode, int is_luma_4x4)
2957-{
2958- unsigned int i, j, pak_pred_mode = 0;
2959- unsigned int vme_sub_blocks_pred_mode[8], pak_sub_blocks_pred_mode[8]; /* 8 blocks's intra mode */
2960-
2961- if (!is_luma_4x4) {
2962- pak_pred_mode = vp8_intra_mb_mode_map[vme_pred_mode & 0x3];
2963- } else {
2964- for (i = 0; i < 8; i++) {
2965- vme_sub_blocks_pred_mode[i] = ((vme_pred_mode >> (4 * i)) & 0xf);
2966- assert(vme_sub_blocks_pred_mode[i] < VME_B_INTRA_MODE_COUNT);
2967- pak_sub_blocks_pred_mode[i] = vp8_intra_block_mode_map[vme_sub_blocks_pred_mode[i]];
2968- pak_pred_mode |= (pak_sub_blocks_pred_mode[i] << (4 * i));
2969- }
2970- }
2971-
2972- return pak_pred_mode;
2973-}
2974-static void
2975-gen9_mfc_vp8_pak_object_intra(VADriverContextP ctx,
2976- struct intel_encoder_context *encoder_context,
2977- unsigned int *msg,
2978- int x, int y,
2979- struct intel_batchbuffer *batch)
2980-{
2981- unsigned int vme_intra_mb_mode, vme_chroma_pred_mode;
2982- unsigned int pak_intra_mb_mode, pak_chroma_pred_mode;
2983- unsigned int vme_luma_pred_mode[2], pak_luma_pred_mode[2];
2984-
2985- if (batch == NULL)
2986- batch = encoder_context->base.batch;
2987-
2988- vme_intra_mb_mode = ((msg[0] & 0x30) >> 4);
2989- assert((vme_intra_mb_mode == 0) || (vme_intra_mb_mode == 2)); //vp8 only support intra_16x16 and intra_4x4
2990- pak_intra_mb_mode = (vme_intra_mb_mode >> 1);
2991-
2992- vme_luma_pred_mode[0] = msg[1];
2993- vme_luma_pred_mode[1] = msg[2];
2994- vme_chroma_pred_mode = msg[3] & 0x3;
2995-
2996- pak_luma_pred_mode[0] = gen9_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[0], pak_intra_mb_mode);
2997- pak_luma_pred_mode[1] = gen9_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[1], pak_intra_mb_mode);
2998- pak_chroma_pred_mode = gen9_mfc_vp8_intra_mb_mode_map(vme_chroma_pred_mode, 0);
2999-
3000- BEGIN_BCS_BATCH(batch, 7);
3001-
3002- OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
3003- OUT_BCS_BATCH(batch, 0);
3004- OUT_BCS_BATCH(batch, 0);
3005- OUT_BCS_BATCH(batch,
3006- (0 << 20) | /* mv format: intra mb */
3007- (0 << 18) | /* Segment ID */
3008- (0 << 17) | /* disable coeff clamp */
3009- (1 << 13) | /* intra mb flag */
3010- (0 << 11) | /* refer picture select: last frame */
3011- (pak_intra_mb_mode << 8) | /* mb type */
3012- (pak_chroma_pred_mode << 4) | /* mb uv mode */
3013- (0 << 2) | /* skip mb flag: disable */
3014- 0);
3015-
3016- OUT_BCS_BATCH(batch, (y << 16) | x);
3017- OUT_BCS_BATCH(batch, pak_luma_pred_mode[0]);
3018- OUT_BCS_BATCH(batch, pak_luma_pred_mode[1]);
3019-
3020- ADVANCE_BCS_BATCH(batch);
3021-}
3022-
3023-static void
3024-gen9_mfc_vp8_pak_object_inter(VADriverContextP ctx,
3025- struct intel_encoder_context *encoder_context,
3026- unsigned int *msg,
3027- int offset,
3028- int x, int y,
3029- struct intel_batchbuffer *batch)
3030-{
3031- int i;
3032-
3033- if (batch == NULL)
3034- batch = encoder_context->base.batch;
3035-
3036- /* only support inter_16x16 now */
3037- assert((msg[AVC_INTER_MSG_OFFSET] & INTER_MODE_MASK) == INTER_16X16);
3038- /* for inter_16x16, all 16 MVs should be same,
3039- * and move mv to the vme mb start address to make sure offset is 64 bytes aligned */
3040- msg[0] = (msg[AVC_INTER_MV_OFFSET/4] & 0xfffefffe);
3041- for (i = 1; i < 16; i++) {
3042- msg[i] = msg[0];
3043- }
3044-
3045- BEGIN_BCS_BATCH(batch, 7);
3046-
3047- OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
3048- OUT_BCS_BATCH(batch,
3049- (0 << 29) | /* enable inline mv data: disable */
3050- 64);
3051- OUT_BCS_BATCH(batch,
3052- offset);
3053- OUT_BCS_BATCH(batch,
3054- (4 << 20) | /* mv format: inter */
3055- (0 << 18) | /* Segment ID */
3056- (0 << 17) | /* coeff clamp: disable */
3057- (0 << 13) | /* intra mb flag: inter mb */
3058- (0 << 11) | /* refer picture select: last frame */
3059- (0 << 8) | /* mb type: 16x16 */
3060- (0 << 4) | /* mb uv mode: dc_pred */
3061- (0 << 2) | /* skip mb flag: disable */
3062- 0);
3063-
3064- OUT_BCS_BATCH(batch, (y << 16) | x);
3065-
3066- /*new mv*/
3067- OUT_BCS_BATCH(batch, 0x8);
3068- OUT_BCS_BATCH(batch, 0x8);
3069-
3070- ADVANCE_BCS_BATCH(batch);
3071-}
3072-
3073-static void
3074-gen9_mfc_vp8_pak_pipeline(VADriverContextP ctx,
3075- struct encode_state *encode_state,
3076- struct intel_encoder_context *encoder_context,
3077- struct intel_batchbuffer *slice_batch)
3078-{
3079- struct gen6_vme_context *vme_context = encoder_context->vme_context;
3080- VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3081- VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3082- int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
3083- int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
3084- unsigned int *msg = NULL;
3085- unsigned char *msg_ptr = NULL;
3086- unsigned int i, offset, is_intra_frame;
3087-
3088- is_intra_frame = !pic_param->pic_flags.bits.frame_type;
3089-
3090- dri_bo_map(vme_context->vme_output.bo , 1);
3091- msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
3092-
3093- for( i = 0; i < width_in_mbs * height_in_mbs; i++) {
3094- int h_pos = i % width_in_mbs;
3095- int v_pos = i / width_in_mbs;
3096- msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
3097-
3098- if (is_intra_frame) {
3099- gen9_mfc_vp8_pak_object_intra(ctx,
3100- encoder_context,
3101- msg,
3102- h_pos, v_pos,
3103- slice_batch);
3104- } else {
3105- int inter_rdo, intra_rdo;
3106- inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
3107- intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
3108-
3109- if (intra_rdo < inter_rdo) {
3110- gen9_mfc_vp8_pak_object_intra(ctx,
3111- encoder_context,
3112- msg,
3113- h_pos, v_pos,
3114- slice_batch);
3115- } else {
3116- offset = i * vme_context->vme_output.size_block;
3117- gen9_mfc_vp8_pak_object_inter(ctx,
3118- encoder_context,
3119- msg,
3120- offset,
3121- h_pos, v_pos,
3122- slice_batch);
3123- }
3124- }
3125- }
3126-
3127- dri_bo_unmap(vme_context->vme_output.bo);
3128-}
3129-
3130-/*
3131- * A batch buffer for vp8 pak object commands
3132- */
3133-static dri_bo *
3134-gen9_mfc_vp8_software_batchbuffer(VADriverContextP ctx,
3135- struct encode_state *encode_state,
3136- struct intel_encoder_context *encoder_context)
3137-{
3138- struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3139- struct intel_batchbuffer *batch;
3140- dri_bo *batch_bo;
3141-
3142- batch = mfc_context->aux_batchbuffer;
3143- batch_bo = batch->buffer;
3144-
3145- gen9_mfc_vp8_pak_pipeline(ctx, encode_state, encoder_context, batch);
3146-
3147- intel_batchbuffer_align(batch, 8);
3148-
3149- BEGIN_BCS_BATCH(batch, 2);
3150- OUT_BCS_BATCH(batch, 0);
3151- OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
3152- ADVANCE_BCS_BATCH(batch);
3153-
3154- dri_bo_reference(batch_bo);
3155- intel_batchbuffer_free(batch);
3156- mfc_context->aux_batchbuffer = NULL;
3157-
3158- return batch_bo;
3159-}
3160-
3161-static void
3162-gen9_mfc_vp8_pipeline_programing(VADriverContextP ctx,
3163- struct encode_state *encode_state,
3164- struct intel_encoder_context *encoder_context)
3165-{
3166- struct intel_batchbuffer *batch = encoder_context->base.batch;
3167- dri_bo *slice_batch_bo;
3168-
3169- slice_batch_bo = gen9_mfc_vp8_software_batchbuffer(ctx, encode_state, encoder_context);
3170-
3171- // begin programing
3172- intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
3173- intel_batchbuffer_emit_mi_flush(batch);
3174-
3175- // picture level programing
3176- gen9_mfc_vp8_pipeline_picture_programing(ctx, encode_state, encoder_context);
3177-
3178- BEGIN_BCS_BATCH(batch, 4);
3179- OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
3180- OUT_BCS_RELOC(batch,
3181- slice_batch_bo,
3182- I915_GEM_DOMAIN_COMMAND, 0,
3183- 0);
3184- OUT_BCS_BATCH(batch, 0);
3185- OUT_BCS_BATCH(batch, 0);
3186- ADVANCE_BCS_BATCH(batch);
3187-
3188- // end programing
3189- intel_batchbuffer_end_atomic(batch);
3190-
3191- dri_bo_unreference(slice_batch_bo);
3192-}
3193-
3194-static void gen9_mfc_calc_vp8_coded_buffer_size(VADriverContextP ctx,
3195- struct encode_state *encode_state,
3196- struct intel_encoder_context *encoder_context)
3197-{
3198- struct i965_driver_data *i965 = i965_driver_data(ctx);
3199- struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3200- VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3201- unsigned char is_intra_frame = !pic_param->pic_flags.bits.frame_type;
3202- unsigned int *vp8_encoding_status, first_partition_bytes, token_partition_bytes, vp8_coded_bytes;
3203-
3204- dri_bo_map(mfc_context->vp8_state.token_statistics_bo, 0);
3205-
3206- vp8_encoding_status = (unsigned int *)mfc_context->vp8_state.token_statistics_bo->virtual;
3207- first_partition_bytes = (*vp8_encoding_status + 7) / 8;
3208- token_partition_bytes = (*(unsigned int *)(vp8_encoding_status + 9) + 7) / 8;
3209-
3210- /*coded_bytes includes P0~P8 partitions bytes + uncompresse date bytes + partion_size bytes in bitstream */
3211- vp8_coded_bytes = first_partition_bytes + token_partition_bytes + (3 + 7 * !!is_intra_frame) + (pic_param->pic_flags.bits.num_token_partitions - 1) * 3;
3212-
3213- dri_bo_unmap(mfc_context->vp8_state.token_statistics_bo);
3214-
3215- dri_bo_map(mfc_context->vp8_state.final_frame_bo, 0);
3216- struct i965_coded_buffer_segment *coded_buffer_segment = (struct i965_coded_buffer_segment *)(mfc_context->vp8_state.final_frame_bo->virtual);
3217- coded_buffer_segment->base.size = vp8_coded_bytes;
3218- dri_bo_unmap(mfc_context->vp8_state.final_frame_bo);
3219-}
3220-
3221-static VAStatus
3222-gen9_mfc_vp8_encode_picture(VADriverContextP ctx,
3223- struct encode_state *encode_state,
3224- struct intel_encoder_context *encoder_context)
3225-{
3226- gen9_mfc_vp8_init(ctx, encode_state, encoder_context);
3227- intel_mfc_vp8_prepare(ctx, encode_state, encoder_context);
3228- /*Programing bcs pipeline*/
3229- gen9_mfc_vp8_pipeline_programing(ctx, encode_state, encoder_context);
3230- gen9_mfc_run(ctx, encode_state, encoder_context);
3231- gen9_mfc_calc_vp8_coded_buffer_size(ctx, encode_state, encoder_context);
3232-
3233- return VA_STATUS_SUCCESS;
3234-}
3235-
32362359 static void
32372360 gen9_mfc_context_destroy(void *context)
32382361 {
@@ -3287,27 +2410,6 @@ gen9_mfc_context_destroy(void *context)
32872410
32882411 mfc_context->aux_batchbuffer = NULL;
32892412
3290- dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
3291- mfc_context->vp8_state.coeff_probs_stream_in_bo = NULL;
3292-
3293- dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
3294- mfc_context->vp8_state.final_frame_bo = NULL;
3295-
3296- dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
3297- mfc_context->vp8_state.frame_header_bo = NULL;
3298-
3299- dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
3300- mfc_context->vp8_state.intermediate_bo = NULL;
3301-
3302- dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
3303- mfc_context->vp8_state.mpc_row_store_bo = NULL;
3304-
3305- dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
3306- mfc_context->vp8_state.stream_out_bo = NULL;
3307-
3308- dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
3309- mfc_context->vp8_state.token_statistics_bo = NULL;
3310-
33112413 free(mfc_context);
33122414 }
33132415
@@ -3333,10 +2435,6 @@ static VAStatus gen9_mfc_pipeline(VADriverContextP ctx,
33332435 vaStatus = gen9_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
33342436 break;
33352437
3336- case VAProfileVP8Version0_3:
3337- vaStatus = gen9_mfc_vp8_encode_picture(ctx, encode_state, encoder_context);
3338- break;
3339-
33402438 default:
33412439 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
33422440 break;
@@ -3358,6 +2456,9 @@ Bool gen9_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e
33582456 }
33592457 #endif
33602458
2459+ if (encoder_context->codec == CODEC_VP8)
2460+ return gen8_mfc_context_init(ctx, encoder_context);
2461+
33612462 mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
33622463 mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
33632464
--- a/src/i965_device_info.c
+++ b/src/i965_device_info.c
@@ -294,6 +294,7 @@ static struct hw_codec_info chv_hw_codec_info = {
294294 .has_di_motion_adptive = 1,
295295 .has_di_motion_compensated = 1,
296296 .has_vp8_decoding = 1,
297+ .has_vp8_encoding = 1,
297298 .has_h264_mvc_encoding = 1,
298299
299300 .num_filters = 5,
--- a/src/i965_encoder_utils.c
+++ b/src/i965_encoder_utils.c
@@ -522,7 +522,7 @@ void binarize_vp8_frame_header(VAEncSequenceParameterBufferVP8 *seq_param,
522522 avc_bitstream bs;
523523 int i, j;
524524 int is_intra_frame = !pic_param->pic_flags.bits.frame_type;
525- int log2num = (int)log2(pic_param->pic_flags.bits.num_token_partitions);
525+ int log2num = pic_param->pic_flags.bits.num_token_partitions;
526526
527527 /* modify picture paramters */
528528 pic_param->pic_flags.bits.loop_filter_adj_enable = 1;
@@ -531,6 +531,10 @@ void binarize_vp8_frame_header(VAEncSequenceParameterBufferVP8 *seq_param,
531531 pic_param->pic_flags.bits.refresh_entropy_probs = 1;
532532 pic_param->pic_flags.bits.segmentation_enabled = 0;
533533
534+ pic_param->pic_flags.bits.loop_filter_type = pic_param->pic_flags.bits.version / 2;
535+ if (pic_param->pic_flags.bits.version > 1)
536+ pic_param->loop_filter_level[0] = 0;
537+
534538 avc_bitstream_start(&bs);
535539
536540 if (is_intra_frame) {
--- a/src/shaders/vme/Makefile.am
+++ b/src/shaders/vme/Makefile.am
@@ -20,8 +20,8 @@ INTEL_GEN75_INC = batchbuffer.inc vme75.inc vme75_mpeg2.inc
2020 INTEL_GEN75_ASM = $(INTEL_G75A:%.g75a=%.gen75.asm)
2121
2222
23-INTEL_G8B = intra_frame_gen8.g8b inter_frame_gen8.g8b inter_bframe_gen8.g8b mpeg2_inter_gen8.g8b
24-INTEL_G8A = intra_frame_gen8.g8a inter_frame_gen8.g8a inter_bframe_gen8.g8a mpeg2_inter_gen8.g8a
23+INTEL_G8B = intra_frame_gen8.g8b inter_frame_gen8.g8b inter_bframe_gen8.g8b mpeg2_inter_gen8.g8b vp8_intra_frame_gen8.g8b vp8_inter_frame_gen8.g8b
24+INTEL_G8A = intra_frame_gen8.g8a inter_frame_gen8.g8a inter_bframe_gen8.g8a mpeg2_inter_gen8.g8a vp8_intra_frame_gen8.g8a vp8_inter_frame_gen8.g8a
2525 INTEL_GEN8_INC = vme8.inc vme75_mpeg2.inc
2626 INTEL_GEN8_ASM = $(INTEL_G8A:%.g8a=%.gen8.asm)
2727
--- /dev/null
+++ b/src/shaders/vme/vp8_inter_frame_gen8.asm
@@ -0,0 +1,739 @@
1+/*
2+ * Copyright © 2014 Intel Corporation
3+ *
4+ * Permission is hereby granted, free of charge, to any person obtaining a
5+ * copy of this software and associated documentation files (the
6+ * "Software"), to deal in the Software without restriction, including
7+ * without limitation the rights to use, copy, modify, merge, publish,
8+ * distribute, sub license, and/or sell copies of the Software, and to
9+ * permit persons to whom the Software is furnished to do so, subject to
10+ * the following conditions:
11+ *
12+ * The above copyright notice and this permission notice (including the
13+ * next paragraph) shall be included in all copies or substantial portions
14+ * of the Software.
15+ *
16+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23+ *
24+ * Authors:
25+ * Zhao Yakui <yakui.zhao@intel.com>
26+ * Xiang Haihao <haihao.xiang@intel.com>
27+ * Li Zhong <zhong.li@intel.com>
28+ *
29+ */
30+
31+#define SAVE_RET add (1) RETURN_REG<1>:ud ip:ud 32:ud
32+#define RETURN mov (1) ip:ud RETURN_REG<0,1,0>:ud
33+
34+/*
35+ * __START
36+ */
37+__INTER_START:
38+mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1};
39+mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1};
40+mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ;
41+mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ;
42+
43+shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
44+add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */
45+add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */
46+mov (1) read0_header.8<1>:UD BLOCK_32X1 {align1};
47+mov (1) read0_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */
48+
49+shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
50+add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */
51+mov (1) read1_header.8<1>:UD BLOCK_4X16 {align1};
52+mov (1) read1_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */
53+
54+shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
55+mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
56+
57+mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
58+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
59+mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 24:UD {align1};
60+mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
61+
62+/*
63+ * Media Read Message -- fetch Luma neighbor edge pixels
64+ */
65+/* ROW */
66+mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1};
67+send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
68+
69+/* COL */
70+mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1};
71+send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
72+
73+/*
74+ * Media Read Message -- fetch Chroma neighbor edge pixels
75+ */
76+/* ROW */
77+shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16 , y * 8 */
78+mul (1) read0_header.0<1>:D read0_header.0<0,1,0>:D 2:W {align1};
79+add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */
80+add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */
81+mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1};
82+send (8) msg_ind CHROMA_ROW<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1};
83+
84+/* COL */
85+shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16, y * 8 */
86+mul (1) read1_header.0<1>:D read1_header.0<0,1,0>:D 2:W {align1};
87+add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */
88+mov (1) read1_header.8<1>:UD BLOCK_8X4 {align1};
89+mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1};
90+send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1};
91+
92+mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1};
93+mov (8) mb_ref_win.0<1>:ud 0:ud {align1};
94+and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1};
95+(f0.0) jmpi (1) __mb_hwdep_end;
96+/* read back the data for MB A */
97+/* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag),
98+* rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID)
99+*/
100+mov (8) mba_result.0<1>:ud 0x0:ud {align1};
101+mov (8) mbb_result.0<1>:ud 0x0:ud {align1};
102+mov (8) mbc_result.0<1>:ud 0x0:ud {align1};
103+mba_start:
104+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
105+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1};
106+/* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */
107+(f0.0) mov (2) mba_result.20<1>:w -1:w {align1};
108+(f0.0) jmpi (1) mbb_start;
109+mov (1) mba_result.0<1>:d MB_AVAIL {align1};
110+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
111+add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1};
112+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
113+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
114+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
115+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
116+
117+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
118+send (16)
119+ mb_ind
120+ mb_wb.0<1>:ud
121+ NULL
122+ data_port(
123+ OBR_CACHE_TYPE,
124+ OBR_MESSAGE_TYPE,
125+ OBR_CONTROL_4,
126+ OBR_BIND_IDX,
127+ OBR_WRITE_COMMIT_CATEGORY,
128+ OBR_HEADER_PRESENT
129+ )
130+ mlen 1
131+ rlen 2
132+ {align1};
133+
134+/* TODO: RefID is required after multi-references are added */
135+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
136+(f0.0) mov (2) mba_result.20<1>:w -1:w {align1};
137+(f0.0) jmpi (1) mbb_start;
138+
139+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
140+/* Read MV for MB A */
141+/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
142+send (16)
143+ mb_ind
144+ mb_mv0.0<1>:ud
145+ NULL
146+ data_port(
147+ OBR_CACHE_TYPE,
148+ OBR_MESSAGE_TYPE,
149+ OBR_CONTROL_8,
150+ OBR_BIND_IDX,
151+ OBR_WRITE_COMMIT_CATEGORY,
152+ OBR_HEADER_PRESENT
153+ )
154+ mlen 1
155+ rlen 4
156+ {align1};
157+/* TODO: RefID is required after multi-references are added */
158+/* MV */
159+mov (2) mba_result.4<1>:ud mb_mv1.8<2,2,1>:ud {align1};
160+mov (1) mba_result.16<1>:w MB_PRED_FLAG {align1};
161+
162+mbb_start:
163+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
164+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1};
165+/* MB B doesn't exist. Zero MV. mba_flag is zero */
166+/* If MB B doesn't exist, neither MB C nor D exists */
167+(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1};
168+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
169+(f0.0) jmpi (1) mb_mvp_start;
170+mov (1) mbb_result.0<1>:d MB_AVAIL {align1};
171+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
172+add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1};
173+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
174+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
175+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
176+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
177+
178+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
179+send (16)
180+ mb_ind
181+ mb_wb.0<1>:ud
182+ NULL
183+ data_port(
184+ OBR_CACHE_TYPE,
185+ OBR_MESSAGE_TYPE,
186+ OBR_CONTROL_4,
187+ OBR_BIND_IDX,
188+ OBR_WRITE_COMMIT_CATEGORY,
189+ OBR_HEADER_PRESENT
190+ )
191+ mlen 1
192+ rlen 2
193+ {align1};
194+
195+/* TODO: RefID is required after multi-references are added */
196+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
197+(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1};
198+(f0.0) jmpi (1) mbc_start;
199+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
200+/* Read MV for MB B */
201+/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
202+send (16)
203+ mb_ind
204+ mb_mv0.0<1>:ud
205+ NULL
206+ data_port(
207+ OBR_CACHE_TYPE,
208+ OBR_MESSAGE_TYPE,
209+ OBR_CONTROL_8,
210+ OBR_BIND_IDX,
211+ OBR_WRITE_COMMIT_CATEGORY,
212+ OBR_HEADER_PRESENT
213+ )
214+ mlen 1
215+ rlen 4
216+ {align1};
217+/* TODO: RefID is required after multi-references are added */
218+mov (2) mbb_result.4<1>:ud mb_mv2.16<2,2,1>:ud {align1};
219+mov (1) mbb_result.16<1>:w MB_PRED_FLAG {align1};
220+
221+mbc_start:
222+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
223+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_C:uw {align1};
224+/* MB C doesn't exist. Zero MV. mba_flag is zero */
225+/* Based on h264 spec the MB D will be replaced if MB C doesn't exist */
226+(f0.0) jmpi (1) mbd_start;
227+mov (1) mbc_result.0<1>:d MB_AVAIL {align1};
228+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
229+add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1};
230+add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
231+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
232+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
233+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
234+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
235+
236+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
237+send (16)
238+ mb_ind
239+ mb_wb.0<1>:ud
240+ NULL
241+ data_port(
242+ OBR_CACHE_TYPE,
243+ OBR_MESSAGE_TYPE,
244+ OBR_CONTROL_4,
245+ OBR_BIND_IDX,
246+ OBR_WRITE_COMMIT_CATEGORY,
247+ OBR_HEADER_PRESENT
248+ )
249+ mlen 1
250+ rlen 2
251+ {align1};
252+
253+/* TODO: RefID is required after multi-references are added */
254+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
255+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
256+(f0.0) jmpi (1) mb_mvp_start;
257+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
258+/* Read MV for MB C */
259+/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
260+send (16)
261+ mb_ind
262+ mb_mv0.0<1>:ud
263+ NULL
264+ data_port(
265+ OBR_CACHE_TYPE,
266+ OBR_MESSAGE_TYPE,
267+ OBR_CONTROL_8,
268+ OBR_BIND_IDX,
269+ OBR_WRITE_COMMIT_CATEGORY,
270+ OBR_HEADER_PRESENT
271+ )
272+ mlen 1
273+ rlen 4
274+ {align1};
275+/* TODO: RefID is required after multi-references are added */
276+/* Forward MV */
277+mov (2) mbc_result.4<1>:ud mb_mv2.16<2,2,1>:ud {align1};
278+mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1};
279+
280+jmpi (1) mb_mvp_start;
281+mbd_start:
282+mov (8) mb_msg0.0<1>:ud 0:ud {align1};
283+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_D:uw {align1};
284+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
285+(f0.0) jmpi (1) mb_mvp_start;
286+mov (1) mbc_result.0<1>:d MB_AVAIL {align1};
287+mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1};
288+add (2) tmp_reg0.0<1>:w tmp_reg0.0<2,2,1>:w -1:w {align1};
289+mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
290+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1};
291+mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1};
292+mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
293+
294+/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
295+send (16)
296+ mb_ind
297+ mb_wb.0<1>:ud
298+ NULL
299+ data_port(
300+ OBR_CACHE_TYPE,
301+ OBR_MESSAGE_TYPE,
302+ OBR_CONTROL_4,
303+ OBR_BIND_IDX,
304+ OBR_WRITE_COMMIT_CATEGORY,
305+ OBR_HEADER_PRESENT
306+ )
307+ mlen 1
308+ rlen 2
309+ {align1};
310+
311+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
312+(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1};
313+(f0.0) jmpi (1) mb_mvp_start;
314+
315+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
316+/* Read MV for MB D */
317+/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
318+send (16)
319+ mb_ind
320+ mb_mv0.0<1>:ub
321+ NULL
322+ data_port(
323+ OBR_CACHE_TYPE,
324+ OBR_MESSAGE_TYPE,
325+ OBR_CONTROL_8,
326+ OBR_BIND_IDX,
327+ OBR_WRITE_COMMIT_CATEGORY,
328+ OBR_HEADER_PRESENT
329+ )
330+ mlen 1
331+ rlen 4
332+ {align1};
333+
334+/* TODO: RefID is required after multi-references are added */
335+
336+/* Forward MV */
337+mov (2) mbc_result.4<1>:ud mb_mv3.24<2,2,1>:ud {align1};
338+mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1};
339+
340+mb_mvp_start:
341+/*TODO: Add the skip prediction */
342+/* Check whether both MB B and C are inavailable */
343+add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1};
344+cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1};
345+(-f0.0) jmpi (1) mb_median_start;
346+cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1};
347+(f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1};
348+(f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1};
349+(f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1};
350+(f0.0) mov (1) mbc_result.20<1>:uw mba_result.20<0,1,0>:uw {align1};
351+(f0.0) mov (1) mb_mvp_ref.0<1>:ud mba_result.4<0,1,0>:ud {align1};
352+(-f0.0) mov (1) mb_mvp_ref.0<1>:ud 0:ud {align1};
353+jmpi (1) __mb_hwdep_end;
354+
355+mb_median_start:
356+/* check whether only one neighbour MB has the same ref ID with the current MB */
357+mov (8) tmp_reg0.0<1>:ud 0:ud {align1};
358+cmp.z.f0.0 (1) null:d mba_result.20<0,1,0>:w 0:w {align1};
359+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
360+(f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.4<0,1,0>:ud {align1};
361+cmp.z.f0.0 (1) null:d mbb_result.20<0,1,0>:w 0:w {align1};
362+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
363+(f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.4<0,1,0>:ud {align1};
364+cmp.z.f0.0 (1) null:d mbc_result.20<0,1,0>:w 0:w {align1};
365+(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1};
366+(f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.4<0,1,0>:ud {align1};
367+cmp.e.f0.0 (1) null:d tmp_reg0.0<0,1,0>:w 1:w {align1};
368+(f0.0) mov (1) mb_mvp_ref.0<1>:ud tmp_reg0.4<0,1,0>:ud {align1};
369+(f0.0) jmpi (1) __mb_hwdep_end;
370+
371+mov (1) INPUT_ARG0.0<1>:w mba_result.4<0,1,0>:w {align1};
372+mov (1) INPUT_ARG0.4<1>:w mbb_result.4<0,1,0>:w {align1};
373+mov (1) INPUT_ARG0.8<1>:w mbc_result.4<0,1,0>:w {align1};
374+SAVE_RET {align1};
375+ jmpi (1) word_imedian;
376+mov (1) mb_mvp_ref.0<1>:w RET_ARG<0,1,0>:w {align1};
377+mov (1) INPUT_ARG0.0<1>:w mba_result.6<0,1,0>:w {align1};
378+mov (1) INPUT_ARG0.4<1>:w mbb_result.6<0,1,0>:w {align1};
379+mov (1) INPUT_ARG0.8<1>:w mbc_result.6<0,1,0>:w {align1};
380+SAVE_RET {align1};
381+jmpi (1) word_imedian;
382+mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1};
383+
384+__mb_hwdep_end:
385+asr (2) mb_ref_win.0<1>:w mb_mvp_ref.0<2,2,1>:w 2:w {align1};
386+add (2) mb_ref_win.8<1>:w mb_ref_win.0<2,2,1>:w 3:w {align1};
387+and (2) mb_ref_win.16<1>:uw mb_ref_win.8<2,2,1>:uw 0xFFFC:uw {align1};
388+/* m2, get the MV/Mb cost passed from constant buffer when
389+spawning thread by MEDIA_OBJECT */
390+mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1};
391+
392+mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1};
393+
394+/* m3 FWD/BWD cost center*/
395+mov (8) vme_msg_3<1>:UD 0x0:UD {align1};
396+
397+/* m4 skip center*/
398+mov (8) vme_msg_4<1>:UD 0x0:UD {align1};
399+
400+/* m5 */
401+mov (1) INEP_ROW.0<1>:UD 0x0:UD {align1};
402+and (1) INEP_ROW.4<1>:UD INEP_ROW.4<0,1,0>:UD 0xFF000000:UD {align1};
403+mov (8) vme_msg_5<1>:UD INEP_ROW.0<8,8,1>:UD {align1};
404+
405+
406+/* Use the Luma mode */
407+mov (1) tmp_reg0.0<1>:UW LUMA_INTRA_MODE:UW {align1};
408+mov (1) vme_msg_5.5<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
409+
410+/* m6 */
411+mov (8) vme_msg_6<1>:UD 0x0:UD {align1};
412+mov (16) vme_msg_6.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1};
413+mov (1) vme_msg_6.16<1>:UD INTRA_PREDICTORE_MODE {align1};
414+
415+/* the penalty for Intra mode */
416+mov (1) vme_msg_6.28<1>:UD 0x010101:UD {align1};
417+mov (1) vme_msg_6.20<1>:UW CHROMA_ROW.6<0,1,0>:UW {align1};
418+
419+
420+/* m7 */
421+
422+mov (4) vme_msg_7.16<1>:UD CHROMA_ROW.8<4,4,1>:UD {align1};
423+mov (8) vme_msg_7.0<1>:UW CHROMA_COL.2<16,8,2>:UW {align1};
424+
425+/*
426+ * SIC VME message
427+ */
428+
429+/* m1 */
430+mov (1) intra_flag<1>:UW 0x0:UW {align1};
431+mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE {align1}; /* vp8 don't support intra_8x8 mode*/
432+
433+/* assign MB intra struct from the thread payload*/
434+mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1};
435+
436+/* Disable DC HAAR component when calculating HARR SATD block */
437+mov (1) tmp_reg0.0<1>:UW DC_HARR_DISABLE:UW {align1};
438+mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
439+mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1};
440+
441+/* m0 */
442+mov (1) vme_m0.12<1>:UD INTRA_SAD_HAAR:UD {align1}; /* 16x16 Source, Intra_harr */
443+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
444+
445+/* after verification it will be passed by using payload */
446+send (8)
447+ vme_msg_ind
448+ vme_wb<1>:UD
449+ null
450+ cre(
451+ BIND_IDX_VME,
452+ VME_SIC_MESSAGE_TYPE
453+ )
454+ mlen sic_vme_msg_length
455+ rlen vme_wb_length
456+ {align1};
457+/*
458+ * Oword Block Write message
459+ */
460+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
461+
462+mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1};
463+mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1};
464+mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1};
465+mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1};
466+
467+/* Distortion, Intra (17-16), */
468+mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1};
469+
470+mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1};
471+/* VME clock counts */
472+mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1};
473+
474+mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1};
475+
476+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
477+send (16)
478+ msg_ind
479+ obw_wb
480+ null
481+ data_port(
482+ OBW_CACHE_TYPE,
483+ OBW_MESSAGE_TYPE,
484+ OBW_CONTROL_2,
485+ OBW_BIND_IDX,
486+ OBW_WRITE_COMMIT_CATEGORY,
487+ OBW_HEADER_PRESENT
488+ )
489+ mlen 2
490+ rlen obw_wb_length
491+ {align1};
492+
493+/* IME search */
494+mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + VP8_INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */
495+mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */
496+
497+mov (1) vme_m0.0<1>:UD vme_m0.8<0,1,0>:UD {align1};
498+
499+add (1) vme_m0.0<1>:W vme_m0.0<0,1,0>:W -16:W {align1}; /* Reference = (x-16,y-12)-(x+32,y+28) */
500+add (1) vme_m0.2<1>:W vme_m0.2<0,1,0>:W -12:W {align1};
501+
502+mov (1) vme_m0.0<1>:W -16:W {align1};
503+mov (1) vme_m0.2<1>:W -12:W {align1};
504+
505+mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1};
506+
507+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1};
508+(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 12:w {align1};
509+and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1};
510+(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 8:w {align1};
511+
512+add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1};
513+add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1};
514+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
515+
516+mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ;
517+/* the Max MV number is passed by constant buffer */
518+mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1};
519+mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1};
520+mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
521+
522+/* Setup the Cost center */
523+/* currently four 8x8 share the same cost center */
524+mov (4) vme_m3.0<2>:ud mb_mvp_ref.0<0,1,0>:ud {align1};
525+mov (4) vme_m3.4<2>:ud mb_mvp_ref.0<0,1,0>:ud {align1};
526+
527+mov (8) vme_msg_3<1>:UD vme_m3.0<8,8,1>:UD {align1};
528+mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1};
529+
530+/* M4/M5 search path */
531+mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1};
532+mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1};
533+mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1};
534+mov (1) vme_msg_4.12<1>:UD 0x100F0F0F:UD {align1};
535+mov (1) vme_msg_4.16<1>:UD 0x01010101:UD {align1};
536+mov (1) vme_msg_4.20<1>:UD 0x10010101:UD {align1};
537+mov (1) vme_msg_4.24<1>:UD 0x0F0F0F0F:UD {align1};
538+mov (1) vme_msg_4.28<1>:UD 0x100F0F0F:UD {align1};
539+
540+mov (1) vme_msg_5.0<1>:UD 0x01010101:UD {align1};
541+mov (1) vme_msg_5.4<1>:UD 0x10010101:UD {align1};
542+mov (1) vme_msg_5.8<1>:UD 0x0F0F0F0F:UD {align1};
543+mov (1) vme_msg_5.12<1>:UD 0x000F0F0F:UD {align1};
544+
545+mov (4) vme_msg_5.16<1>:UD 0x0:UD {align1};
546+
547+send (8)
548+ vme_msg_ind
549+ vme_wb<1>:UD
550+ null
551+ vme(
552+ BIND_IDX_VME,
553+ 0,
554+ 0,
555+ VME_IME_MESSAGE_TYPE
556+ )
557+ mlen ime_vme_msg_length
558+ rlen vme_wb_length {align1};
559+
560+/* Set Macroblock-shape/mode for FBR */
561+
562+mov (1) vme_m2.20<1>:UD 0x0:UD {align1};
563+mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1};
564+mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1};
565+
566+and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1};
567+mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
568+
569+/* Send FBR message into CRE */
570+
571+mov (8) vme_msg_4.0<1>:UD vme_wb1.0<8,8,1>:UD {align1};
572+mov (8) vme_msg_5.0<1>:ud vme_wb2.0<8,8,1>:ud {align1};
573+mov (8) vme_msg_6.0<1>:ud vme_wb3.0<8,8,1>:ud {align1};
574+mov (8) vme_msg_7.0<1>:ud vme_wb4.0<8,8,1>:ud {align1};
575+
576+mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER + FBR_BME_DISABLE:UD {align1}; /* 16x16 Source, 1/4 pixel, harr, BME disable */
577+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
578+mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
579+
580+mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1};
581+mov (8) vme_msg_3.0<1>:UD vme_m3.0<8,8,1>:UD {align1};
582+
583+/* after verification it will be passed by using payload */
584+send (8)
585+ vme_msg_ind
586+ vme_wb<1>:UD
587+ null
588+ cre(
589+ BIND_IDX_VME,
590+ VME_FBR_MESSAGE_TYPE
591+ )
592+ mlen fbr_vme_msg_length
593+ rlen vme_wb_length
594+ {align1};
595+
596+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1};
597+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
598+/* write FME info */
599+mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1};
600+
601+mov (1) msg_reg1.4<1>:UD vme_wb.24<0,1,0>:UD {align1};
602+/* Inter distortion of FME */
603+mov (1) msg_reg1.8<1>:UD vme_wb.8<0,1,0>:UD {align1};
604+
605+mov (1) msg_reg1.12<1>:UD vme_m2.20<0,1,0>:UD {align1};
606+
607+/* bind index 3, write oword (16bytes), msg type: 8(OWord Block Write) */
608+send (16)
609+ msg_ind
610+ obw_wb
611+ null
612+ data_port(
613+ OBW_CACHE_TYPE,
614+ OBW_MESSAGE_TYPE,
615+ OBW_CONTROL_0,
616+ OBW_BIND_IDX,
617+ OBW_WRITE_COMMIT_CATEGORY,
618+ OBW_HEADER_PRESENT
619+ )
620+ mlen 2
621+ rlen obw_wb_length
622+ {align1};
623+
624+/* Write FME/BME MV */
625+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x01:UD {align1};
626+mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1};
627+
628+
629+mov (8) msg_reg1.0<1>:UD vme_wb1.0<8,8,1>:UD {align1};
630+mov (8) msg_reg2.0<1>:ud vme_wb2.0<8,8,1>:ud {align1};
631+mov (8) msg_reg3.0<1>:ud vme_wb3.0<8,8,1>:ud {align1};
632+mov (8) msg_reg4.0<1>:ud vme_wb4.0<8,8,1>:ud {align1};
633+/* bind index 3, write 8 oword (128 bytes), msg type: 8(OWord Block Write) */
634+send (16)
635+ msg_ind
636+ obw_wb
637+ null
638+ data_port(
639+ OBW_CACHE_TYPE,
640+ OBW_MESSAGE_TYPE,
641+ OBW_CONTROL_8,
642+ OBW_BIND_IDX,
643+ OBW_WRITE_COMMIT_CATEGORY,
644+ OBW_HEADER_PRESENT
645+ )
646+ mlen 5
647+ rlen obw_wb_length
648+ {align1};
649+
650+/* Write FME/BME RefID */
651+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x08:UD {align1};
652+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
653+
654+mov (8) msg_reg1.0<1>:UD vme_wb6.0<8,8,1>:UD {align1};
655+
656+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
657+send (16)
658+ msg_ind
659+ obw_wb
660+ null
661+ data_port(
662+ OBW_CACHE_TYPE,
663+ OBW_MESSAGE_TYPE,
664+ OBW_CONTROL_2,
665+ OBW_BIND_IDX,
666+ OBW_WRITE_COMMIT_CATEGORY,
667+ OBW_HEADER_PRESENT
668+ )
669+ mlen 2
670+ rlen obw_wb_length
671+ {align1};
672+
673+/* Issue message fence so that the previous write message is committed */
674+send (16)
675+ mb_ind
676+ mb_wb.0<1>:ud
677+ NULL
678+ data_port(
679+ OBR_CACHE_TYPE,
680+ OBR_MESSAGE_FENCE,
681+ OBR_MF_COMMIT,
682+ OBR_BIND_IDX,
683+ OBR_WRITE_COMMIT_CATEGORY,
684+ OBR_HEADER_PRESENT
685+ )
686+ mlen 1
687+ rlen 1
688+ {align1};
689+
690+__EXIT:
691+/*
692+ * kill thread
693+ */
694+mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1};
695+send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
696+
697+
698+ nop ;
699+ nop ;
700+/* Compare three word data to get the min value */
701+word_imin:
702+ cmp.le.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1};
703+ (f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
704+ (-f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
705+ cmp.le.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
706+ (f0.0) mov (1) RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w {align1};
707+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
708+ RETURN {align1};
709+
710+/* Compare three word data to get the max value */
711+word_imax:
712+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1};
713+ (f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
714+ (-f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
715+ cmp.ge.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
716+ (f0.0) mov (1) RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w {align1};
717+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
718+ RETURN {align1};
719+
720+word_imedian:
721+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1};
722+ (f0.0) jmpi (1) cmp_a_ge_b;
723+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
724+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
725+ (f0.0) jmpi (1) cmp_end;
726+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
727+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
728+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
729+ jmpi (1) cmp_end;
730+cmp_a_ge_b:
731+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
732+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
733+ (f0.0) jmpi (1) cmp_end;
734+ cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
735+ (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
736+ (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
737+cmp_end:
738+ RETURN {align1};
739+
--- /dev/null
+++ b/src/shaders/vme/vp8_inter_frame_gen8.g8a
@@ -0,0 +1,2 @@
1+#include "vme8.inc"
2+#include "vp8_inter_frame_gen8.asm"
--- /dev/null
+++ b/src/shaders/vme/vp8_inter_frame_gen8.g8b
@@ -0,0 +1,299 @@
1+ { 0x00800001, 0x24000608, 0x00000000, 0x00000000 },
2+ { 0x00800001, 0x24400608, 0x00000000, 0x00000000 },
3+ { 0x00800001, 0x24800608, 0x00000000, 0x00000000 },
4+ { 0x00800001, 0x24c00608, 0x00000000, 0x00000000 },
5+ { 0x00200009, 0x24002228, 0x164500a0, 0x00040004 },
6+ { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 },
7+ { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff },
8+ { 0x00000001, 0x24080e08, 0x08000000, 0x0000001f },
9+ { 0x00000001, 0x24142288, 0x00000014, 0x00000000 },
10+ { 0x00200009, 0x24202228, 0x164500a0, 0x00040004 },
11+ { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc },
12+ { 0x00000001, 0x24280e08, 0x08000000, 0x000f0003 },
13+ { 0x00000001, 0x24342288, 0x00000014, 0x00000000 },
14+ { 0x00200009, 0x24482248, 0x164500a0, 0x00040004 },
15+ { 0x00000001, 0x24542288, 0x00000014, 0x00000000 },
16+ { 0x00000041, 0x24881208, 0x220000a2, 0x000000a1 },
17+ { 0x00000040, 0x24880208, 0x22000488, 0x000000a0 },
18+ { 0x00000041, 0x24880208, 0x06000488, 0x00000018 },
19+ { 0x00000001, 0x24942288, 0x00000014, 0x00000000 },
20+ { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 },
21+ { 0x04600031, 0x23800a88, 0x0e000800, 0x02190004 },
22+ { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 },
23+ { 0x04600031, 0x23a00a88, 0x0e000800, 0x02290004 },
24+ { 0x00200009, 0x24002228, 0x164500a0, 0x00030003 },
25+ { 0x00000041, 0x24000a28, 0x1e000400, 0x00020002 },
26+ { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 },
27+ { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff },
28+ { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 },
29+ { 0x04600031, 0x26000a88, 0x0e000800, 0x02190006 },
30+ { 0x00200009, 0x24202228, 0x164500a0, 0x00030003 },
31+ { 0x00000041, 0x24200a28, 0x1e000420, 0x00020002 },
32+ { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc },
33+ { 0x00000001, 0x24280e08, 0x08000000, 0x00070003 },
34+ { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 },
35+ { 0x04600031, 0x26200a88, 0x0e000800, 0x02190006 },
36+ { 0x00600001, 0x2ac00608, 0x00000000, 0x00000000 },
37+ { 0x00600001, 0x2a800608, 0x00000000, 0x00000000 },
38+ { 0x01000005, 0x20001240, 0x160000a6, 0x00040004 },
39+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000750 },
40+ { 0x00600001, 0x2ae00608, 0x00000000, 0x00000000 },
41+ { 0x00600001, 0x2b000608, 0x00000000, 0x00000000 },
42+ { 0x00600001, 0x2b200608, 0x00000000, 0x00000000 },
43+ { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 },
44+ { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 },
45+ { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff },
46+ { 0x00010020, 0x34000000, 0x0e001400, 0x000000f0 },
47+ { 0x00000001, 0x2ae00e28, 0x08000000, 0x00000001 },
48+ { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 },
49+ { 0x00000040, 0x24001a68, 0x1e000400, 0xffffffff },
50+ { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 },
51+ { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 },
52+ { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 },
53+ { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 },
54+ { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 },
55+ { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 },
56+ { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff },
57+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 },
58+ { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 },
59+ { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 },
60+ { 0x00200001, 0x2ae40208, 0x00450bc8, 0x00000000 },
61+ { 0x00000001, 0x2af01e68, 0x18000000, 0x00010001 },
62+ { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 },
63+ { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 },
64+ { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff },
65+ { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff },
66+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000360 },
67+ { 0x00000001, 0x2b000e28, 0x08000000, 0x00000001 },
68+ { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 },
69+ { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff },
70+ { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 },
71+ { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 },
72+ { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 },
73+ { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 },
74+ { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 },
75+ { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 },
76+ { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff },
77+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 },
78+ { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 },
79+ { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 },
80+ { 0x00200001, 0x2b040208, 0x00450bf0, 0x00000000 },
81+ { 0x00000001, 0x2b101e68, 0x18000000, 0x00010001 },
82+ { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 },
83+ { 0x01000005, 0x20002240, 0x160000a5, 0x00080008 },
84+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000110 },
85+ { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 },
86+ { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 },
87+ { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff },
88+ { 0x00000040, 0x24001a68, 0x1e000400, 0x00010001 },
89+ { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 },
90+ { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 },
91+ { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 },
92+ { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 },
93+ { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 },
94+ { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 },
95+ { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff },
96+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000180 },
97+ { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 },
98+ { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 },
99+ { 0x00200001, 0x2b240208, 0x00450bf0, 0x00000000 },
100+ { 0x00000001, 0x2b301e68, 0x18000000, 0x00010001 },
101+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000130 },
102+ { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 },
103+ { 0x01000005, 0x20002240, 0x160000a5, 0x00040004 },
104+ { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff },
105+ { 0x00010020, 0x34000000, 0x0e001400, 0x000000f0 },
106+ { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 },
107+ { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 },
108+ { 0x00200040, 0x24001a68, 0x1e450400, 0xffffffff },
109+ { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 },
110+ { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 },
111+ { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 },
112+ { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 },
113+ { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 },
114+ { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 },
115+ { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff },
116+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 },
117+ { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 },
118+ { 0x0a800031, 0x2ba00a88, 0x0e000b40, 0x02480403 },
119+ { 0x00200001, 0x2b240208, 0x00450c18, 0x00000000 },
120+ { 0x00000001, 0x2b301e68, 0x18000000, 0x00010001 },
121+ { 0x00000040, 0x24000a28, 0x0a000b00, 0x00000b20 },
122+ { 0x01000010, 0x20000a20, 0x0e000400, 0x00000000 },
123+ { 0x00110020, 0x34000000, 0x0e001400, 0x00000080 },
124+ { 0x02000010, 0x20000a20, 0x0e000ae0, 0x00000000 },
125+ { 0x00010001, 0x2b040208, 0x00000ae4, 0x00000000 },
126+ { 0x00010001, 0x2b240208, 0x00000ae4, 0x00000000 },
127+ { 0x00010001, 0x2b141248, 0x00000af4, 0x00000000 },
128+ { 0x00010001, 0x2b341248, 0x00000af4, 0x00000000 },
129+ { 0x00010001, 0x2ac00208, 0x00000ae4, 0x00000000 },
130+ { 0x00110001, 0x2ac00608, 0x00000000, 0x00000000 },
131+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000190 },
132+ { 0x00600001, 0x24000608, 0x00000000, 0x00000000 },
133+ { 0x01000010, 0x20001a20, 0x1e000af4, 0x00000000 },
134+ { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 },
135+ { 0x00010001, 0x24040208, 0x00000ae4, 0x00000000 },
136+ { 0x01000010, 0x20001a20, 0x1e000b14, 0x00000000 },
137+ { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 },
138+ { 0x00010001, 0x24040208, 0x00000b04, 0x00000000 },
139+ { 0x01000010, 0x20001a20, 0x1e000b34, 0x00000000 },
140+ { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 },
141+ { 0x00010001, 0x24040208, 0x00000b24, 0x00000000 },
142+ { 0x01000010, 0x20001a20, 0x1e000400, 0x00010001 },
143+ { 0x00010001, 0x2ac00208, 0x00000404, 0x00000000 },
144+ { 0x00010020, 0x34000000, 0x0e001400, 0x000000c0 },
145+ { 0x00000001, 0x2fa01a68, 0x00000ae4, 0x00000000 },
146+ { 0x00000001, 0x2fa41a68, 0x00000b04, 0x00000000 },
147+ { 0x00000001, 0x2fa81a68, 0x00000b24, 0x00000000 },
148+ { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 },
149+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000860 },
150+ { 0x00000001, 0x2ac01a68, 0x00000fe4, 0x00000000 },
151+ { 0x00000001, 0x2fa01a68, 0x00000ae6, 0x00000000 },
152+ { 0x00000001, 0x2fa41a68, 0x00000b06, 0x00000000 },
153+ { 0x00000001, 0x2fa81a68, 0x00000b26, 0x00000000 },
154+ { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 },
155+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000800 },
156+ { 0x00000001, 0x2ac21a68, 0x00000fe4, 0x00000000 },
157+ { 0x0020000c, 0x2a801a68, 0x1e450ac0, 0x00020002 },
158+ { 0x00200040, 0x2a881a68, 0x1e450a80, 0x00030003 },
159+ { 0x00200005, 0x2a901248, 0x16450a88, 0xfffcfffc },
160+ { 0x00600001, 0x25600208, 0x008d0020, 0x00000000 },
161+ { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 },
162+ { 0x00600001, 0x28600608, 0x00000000, 0x00000000 },
163+ { 0x00600001, 0x28800608, 0x00000000, 0x00000000 },
164+ { 0x00000001, 0x23800608, 0x00000000, 0x00000000 },
165+ { 0x00000005, 0x23840208, 0x06000384, 0xff000000 },
166+ { 0x00600001, 0x28a00208, 0x008d0380, 0x00000000 },
167+ { 0x00000001, 0x24001648, 0x10000000, 0x00010001 },
168+ { 0x00000001, 0x28a52288, 0x00000400, 0x00000000 },
169+ { 0x00600001, 0x28c00608, 0x00000000, 0x00000000 },
170+ { 0x00800001, 0x28c02288, 0x00cf03a3, 0x00000000 },
171+ { 0x00000001, 0x28d00608, 0x00000000, 0x11111111 },
172+ { 0x00000001, 0x28dc0608, 0x00000000, 0x00010101 },
173+ { 0x00000001, 0x28d41248, 0x00000606, 0x00000000 },
174+ { 0x00400001, 0x28f00208, 0x00690608, 0x00000000 },
175+ { 0x00600001, 0x28e01248, 0x00ae0622, 0x00000000 },
176+ { 0x00000001, 0x247c1648, 0x10000000, 0x00000000 },
177+ { 0x00000001, 0x247c0e88, 0x08000000, 0x00000002 },
178+ { 0x00000001, 0x247d2288, 0x000000a5, 0x00000000 },
179+ { 0x00000001, 0x24001648, 0x10000000, 0x00200020 },
180+ { 0x00000001, 0x247e2288, 0x00000400, 0x00000000 },
181+ { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 },
182+ { 0x00000001, 0x244c0608, 0x00000000, 0x00800000 },
183+ { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 },
184+ { 0x0d600031, 0x21800a08, 0x0e000800, 0x10782000 },
185+ { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
186+ { 0x00000001, 0x28200208, 0x00000180, 0x00000000 },
187+ { 0x00000001, 0x28240208, 0x00000190, 0x00000000 },
188+ { 0x00000001, 0x28280208, 0x00000194, 0x00000000 },
189+ { 0x00000001, 0x282c0208, 0x00000198, 0x00000000 },
190+ { 0x00000001, 0x28301248, 0x0000018c, 0x00000000 },
191+ { 0x00000001, 0x28340208, 0x00000188, 0x00000000 },
192+ { 0x00000001, 0x28380208, 0x0000019c, 0x00000000 },
193+ { 0x00000001, 0x283c0208, 0x00000488, 0x00000000 },
194+ { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 },
195+ { 0x00000001, 0x244c0608, 0x00000000, 0x7e200000 },
196+ { 0x00000001, 0x24561648, 0x10000000, 0x28302830 },
197+ { 0x00000001, 0x24400208, 0x00000448, 0x00000000 },
198+ { 0x00000040, 0x24401a68, 0x1e000440, 0xfff0fff0 },
199+ { 0x00000040, 0x24421a68, 0x1e000442, 0xfff4fff4 },
200+ { 0x00000001, 0x24401e68, 0x18000000, 0xfff0fff0 },
201+ { 0x00000001, 0x24421e68, 0x18000000, 0xfff4fff4 },
202+ { 0x00000001, 0x24440208, 0x00000440, 0x00000000 },
203+ { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 },
204+ { 0x00010040, 0x24401a68, 0x1e000440, 0x000c000c },
205+ { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 },
206+ { 0x00010040, 0x24421a68, 0x1e000442, 0x00080008 },
207+ { 0x00200040, 0x24401a68, 0x1a450440, 0x00450a90 },
208+ { 0x00200040, 0x24441a68, 0x1a450444, 0x00450a90 },
209+ { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 },
210+ { 0x00000001, 0x24600608, 0x00000000, 0x00000002 },
211+ { 0x00000001, 0x24642288, 0x0000009c, 0x00000000 },
212+ { 0x00000001, 0x24680608, 0x00000000, 0x30003030 },
213+ { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 },
214+ { 0x00400001, 0x45800208, 0x00000ac0, 0x00000000 },
215+ { 0x00400001, 0x45840208, 0x00000ac0, 0x00000000 },
216+ { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 },
217+ { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 },
218+ { 0x00000001, 0x28800608, 0x00000000, 0x01010101 },
219+ { 0x00000001, 0x28840608, 0x00000000, 0x10010101 },
220+ { 0x00000001, 0x28880608, 0x00000000, 0x0f0f0f0f },
221+ { 0x00000001, 0x288c0608, 0x00000000, 0x100f0f0f },
222+ { 0x00000001, 0x28900608, 0x00000000, 0x01010101 },
223+ { 0x00000001, 0x28940608, 0x00000000, 0x10010101 },
224+ { 0x00000001, 0x28980608, 0x00000000, 0x0f0f0f0f },
225+ { 0x00000001, 0x289c0608, 0x00000000, 0x100f0f0f },
226+ { 0x00000001, 0x28a00608, 0x00000000, 0x01010101 },
227+ { 0x00000001, 0x28a40608, 0x00000000, 0x10010101 },
228+ { 0x00000001, 0x28a80608, 0x00000000, 0x0f0f0f0f },
229+ { 0x00000001, 0x28ac0608, 0x00000000, 0x000f0f0f },
230+ { 0x00400001, 0x28b00608, 0x00000000, 0x00000000 },
231+ { 0x08600031, 0x21800a08, 0x0e000800, 0x0c784000 },
232+ { 0x00000001, 0x25740608, 0x00000000, 0x00000000 },
233+ { 0x00000001, 0x25752288, 0x00000199, 0x00000000 },
234+ { 0x00000001, 0x25762288, 0x0000019a, 0x00000000 },
235+ { 0x00000005, 0x24001248, 0x16000180, 0x00030003 },
236+ { 0x00000001, 0x25742288, 0x00000400, 0x00000000 },
237+ { 0x00600001, 0x28800208, 0x008d01a0, 0x00000000 },
238+ { 0x00600001, 0x28a00208, 0x008d01c0, 0x00000000 },
239+ { 0x00600001, 0x28c00208, 0x008d01e0, 0x00000000 },
240+ { 0x00600001, 0x28e00208, 0x008d0200, 0x00000000 },
241+ { 0x00000001, 0x244c0608, 0x00000000, 0x00243000 },
242+ { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 },
243+ { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 },
244+ { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 },
245+ { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 },
246+ { 0x0d600031, 0x21800a08, 0x0e000800, 0x10786000 },
247+ { 0x00000040, 0x24880208, 0x06000488, 0x00000002 },
248+ { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
249+ { 0x00000001, 0x28200208, 0x00000180, 0x00000000 },
250+ { 0x00000001, 0x28240208, 0x00000198, 0x00000000 },
251+ { 0x00000001, 0x28280208, 0x00000188, 0x00000000 },
252+ { 0x00000001, 0x282c0208, 0x00000574, 0x00000000 },
253+ { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0003 },
254+ { 0x00000040, 0x24880208, 0x06000488, 0x00000001 },
255+ { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
256+ { 0x00600001, 0x28200208, 0x008d01a0, 0x00000000 },
257+ { 0x00600001, 0x28400208, 0x008d01c0, 0x00000000 },
258+ { 0x00600001, 0x28600208, 0x008d01e0, 0x00000000 },
259+ { 0x00600001, 0x28800208, 0x008d0200, 0x00000000 },
260+ { 0x0a800031, 0x20000a60, 0x0e000800, 0x0a0a0403 },
261+ { 0x00000040, 0x24880208, 0x06000488, 0x00000008 },
262+ { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
263+ { 0x00600001, 0x28200208, 0x008d0240, 0x00000000 },
264+ { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 },
265+ { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x0219e003 },
266+ { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 },
267+ { 0x07800031, 0x24000a40, 0x0e000e00, 0x82000010 },
268+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
269+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
270+ { 0x06000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 },
271+ { 0x00010001, 0x2f601a68, 0x00000fa0, 0x00000000 },
272+ { 0x00110001, 0x2f601a68, 0x00000fa4, 0x00000000 },
273+ { 0x06000010, 0x20001a60, 0x1a000f60, 0x00000fa8 },
274+ { 0x00010001, 0x2fe41a68, 0x00000f60, 0x00000000 },
275+ { 0x00110001, 0x2fe41a68, 0x00000fa8, 0x00000000 },
276+ { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 },
277+ { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 },
278+ { 0x00010001, 0x2f601a68, 0x00000fa0, 0x00000000 },
279+ { 0x00110001, 0x2f601a68, 0x00000fa4, 0x00000000 },
280+ { 0x04000010, 0x20001a60, 0x1a000f60, 0x00000fa8 },
281+ { 0x00010001, 0x2fe41a68, 0x00000f60, 0x00000000 },
282+ { 0x00110001, 0x2fe41a68, 0x00000fa8, 0x00000000 },
283+ { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 },
284+ { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 },
285+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000070 },
286+ { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 },
287+ { 0x00010001, 0x2fe41a68, 0x00000fa0, 0x00000000 },
288+ { 0x00010020, 0x34000000, 0x0e001400, 0x000000a0 },
289+ { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 },
290+ { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 },
291+ { 0x00110001, 0x2fe41a68, 0x00000fa4, 0x00000000 },
292+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000060 },
293+ { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 },
294+ { 0x00010001, 0x2fe41a68, 0x00000fa4, 0x00000000 },
295+ { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 },
296+ { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 },
297+ { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 },
298+ { 0x00110001, 0x2fe41a68, 0x00000fa0, 0x00000000 },
299+ { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 },
--- /dev/null
+++ b/src/shaders/vme/vp8_intra_frame_gen8.asm
@@ -0,0 +1,200 @@
1+/*
2+ * Copyright © 2014 Intel Corporation
3+ *
4+ * Permission is hereby granted, free of charge, to any person obtaining a
5+ * copy of this software and associated documentation files (the
6+ * "Software"), to deal in the Software without restriction, including
7+ * without limitation the rights to use, copy, modify, merge, publish,
8+ * distribute, sub license, and/or sell copies of the Software, and to
9+ * permit persons to whom the Software is furnished to do so, subject to
10+ * the following conditions:
11+ *
12+ * The above copyright notice and this permission notice (including the
13+ * next paragraph) shall be included in all copies or substantial portions
14+ * of the Software.
15+ *
16+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23+ *
24+ * Authors:
25+ * Zhao Yakui <yakui.zhao@intel.com>
26+ * Xiang Haihao <haihao.xiang@intel.com>
27+ * Li Zhong <zhong.li@intel.com>
28+ *
29+ */
30+
31+/*
32+ * __START
33+ */
34+__INTRA_START:
35+mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1};
36+mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1};
37+mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ;
38+mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ;
39+
40+shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
41+add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */
42+add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */
43+mov (1) read0_header.8<1>:UD BLOCK_32X1 {align1};
44+mov (1) read0_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */
45+
46+shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
47+add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */
48+mov (1) read1_header.8<1>:UD BLOCK_4X16 {align1};
49+mov (1) read1_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */
50+
51+shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
52+mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
53+
54+mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
55+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
56+mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1};
57+mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
58+
59+/*
60+ * Media Read Message -- fetch Luma neighbor edge pixels
61+ */
62+/* ROW */
63+mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1};
64+send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
65+
66+/* COL */
67+mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1};
68+send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
69+
70+/*
71+ * Media Read Message -- fetch Chroma neighbor edge pixels
72+ */
73+/* ROW */
74+shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16 , y * 8 */
75+mul (1) read0_header.0<1>:D read0_header.0<0,1,0>:D 2:W {align1};
76+add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */
77+add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */
78+mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1};
79+send (8) msg_ind CHROMA_ROW<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1};
80+
81+/* COL */
82+shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16, y * 8 */
83+mul (1) read1_header.0<1>:D read1_header.0<0,1,0>:D 2:W {align1};
84+add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */
85+mov (1) read1_header.8<1>:UD BLOCK_8X4 {align1};
86+mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1};
87+send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1};
88+
89+/* m2, get the MV/Mb cost passed by constant buffer
90+when creating EU thread by MEDIA_OBJECT */
91+mov (8) vme_msg_2<1>:UD r1.0<8,8,1>:UD {align1};
92+
93+/* m3. This is changed for FWD/BWD cost center */
94+mov (8) vme_msg_3<1>:UD 0x0:UD {align1};
95+
96+/* m4.*/
97+mov (8) vme_msg_4<1>:ud 0x0:ud {align1};
98+
99+/* m5 */
100+mov (1) INEP_ROW.0<1>:UD 0x0:UD {align1};
101+and (1) INEP_ROW.4<1>:UD INEP_ROW.4<0,1,0>:UD 0xFF000000:UD {align1};
102+mov (8) vme_msg_5<1>:UD INEP_ROW.0<8,8,1>:UD {align1};
103+
104+mov (1) tmp_reg0.0<1>:UB INTRA_PLANAR_MODE_MASK {align1}; /* vp8 don't support planar intra mode */
105+mov (1) tmp_reg0.1<1>:UB LUMA_CHROMA_MODE {align1}; /* Intra type: Luma + Chroma */
106+
107+/* Intra mode mask && Intra compute type */
108+mov (1) vme_msg_5.4<1>:UW tmp_reg0.0<0,1,0>:UW {align1};
109+
110+/* m6 */
111+mov (8) vme_msg_6<1>:UD 0x0:UD {align1};
112+mov (16) vme_msg_6.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1};
113+mov (1) vme_msg_6.16<1>:UD INTRA_PREDICTORE_MODE {align1};
114+
115+/* the penalty for Intra mode */
116+mov (1) vme_msg_6.28<1>:UD 0x010101:UD {align1};
117+mov (1) vme_msg_6.20<1>:UW CHROMA_ROW.6<0,1,0>:UW {align1};
118+
119+
120+/* m7 */
121+
122+mov (4) vme_msg_7.16<1>:UD CHROMA_ROW.8<4,4,1>:UD {align1};
123+mov (8) vme_msg_7.0<1>:UW CHROMA_COL.2<16,8,2>:UW {align1};
124+
125+/*
126+ * VME message
127+ */
128+
129+/* m1 */
130+mov (1) intra_flag<1>:UW 0x0:UW {align1};
131+mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE {align1}; /* vp8 don't support intra_8x8 mode*/
132+
133+/* assign MB intra struct from the thread payload*/
134+mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1};
135+
136+/* Disable DC HAAR component when calculating HARR SATD block */
137+mov (1) tmp_reg0.0<1>:UW DC_HARR_DISABLE:UW {align1};
138+mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
139+
140+mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1};
141+
142+/* m0 */
143+add (1) vme_m0.12<1>:UD vme_m0.12<0,1,0>:ud INTRA_SAD_HAAR:UD {align1};/* 16x16 Source, Intra_harr */
144+mov (1) vme_m0.15<1>:UB SUB_PART_8x4_DISABLE + SUB_PART_4x8_DISABLE {align1}; /* vp8 don't support 8x4 and 4x8 partion */
145+mov (8) vme_msg_0<1>:UD vme_m0.0<8,8,1>:UD {align1};
146+
147+/* after verification it will be passed by using payload */
148+send (8)
149+ vme_msg_ind
150+ vme_wb<1>:UD
151+ null
152+ cre(
153+ BIND_IDX_VME,
154+ VME_SIC_MESSAGE_TYPE
155+ )
156+ mlen sic_vme_msg_length
157+ rlen vme_wb_length
158+ {align1};
159+/*
160+ * Oword Block Write message
161+ */
162+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
163+
164+mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1};
165+mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1};
166+mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1};
167+mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1};
168+
169+/* Distortion, Intra (17-16), */
170+mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1};
171+
172+mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1};
173+/* VME clock counts */
174+mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1};
175+
176+mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1};
177+
178+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
179+send (16)
180+ msg_ind
181+ obw_wb
182+ null
183+ data_port(
184+ OBW_CACHE_TYPE,
185+ OBW_MESSAGE_TYPE,
186+ OBW_CONTROL_2,
187+ OBW_BIND_IDX,
188+ OBW_WRITE_COMMIT_CATEGORY,
189+ OBW_HEADER_PRESENT
190+ )
191+ mlen 2
192+ rlen obw_wb_length
193+ {align1};
194+
195+__EXIT:
196+/*
197+ * kill thread
198+ */
199+mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1};
200+send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
--- /dev/null
+++ b/src/shaders/vme/vp8_intra_frame_gen8.g8a
@@ -0,0 +1,2 @@
1+#include "vme8.inc"
2+#include "vp8_intra_frame_gen8.asm"
--- /dev/null
+++ b/src/shaders/vme/vp8_intra_frame_gen8.g8b
@@ -0,0 +1,73 @@
1+ { 0x00800001, 0x24000608, 0x00000000, 0x00000000 },
2+ { 0x00800001, 0x24400608, 0x00000000, 0x00000000 },
3+ { 0x00800001, 0x24800608, 0x00000000, 0x00000000 },
4+ { 0x00800001, 0x24c00608, 0x00000000, 0x00000000 },
5+ { 0x00200009, 0x24002228, 0x164500a0, 0x00040004 },
6+ { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 },
7+ { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff },
8+ { 0x00000001, 0x24080e08, 0x08000000, 0x0000001f },
9+ { 0x00000001, 0x24142288, 0x00000014, 0x00000000 },
10+ { 0x00200009, 0x24202228, 0x164500a0, 0x00040004 },
11+ { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc },
12+ { 0x00000001, 0x24280e08, 0x08000000, 0x000f0003 },
13+ { 0x00000001, 0x24342288, 0x00000014, 0x00000000 },
14+ { 0x00200009, 0x24482248, 0x164500a0, 0x00040004 },
15+ { 0x00000001, 0x24542288, 0x00000014, 0x00000000 },
16+ { 0x00000041, 0x24881208, 0x220000a2, 0x000000a1 },
17+ { 0x00000040, 0x24880208, 0x22000488, 0x000000a0 },
18+ { 0x00000041, 0x24880208, 0x06000488, 0x00000002 },
19+ { 0x00000001, 0x24942288, 0x00000014, 0x00000000 },
20+ { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 },
21+ { 0x04600031, 0x23800a88, 0x0e000800, 0x02190004 },
22+ { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 },
23+ { 0x04600031, 0x23a00a88, 0x0e000800, 0x02290004 },
24+ { 0x00200009, 0x24002228, 0x164500a0, 0x00030003 },
25+ { 0x00000041, 0x24000a28, 0x1e000400, 0x00020002 },
26+ { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 },
27+ { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff },
28+ { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 },
29+ { 0x04600031, 0x26000a88, 0x0e000800, 0x02190006 },
30+ { 0x00200009, 0x24202228, 0x164500a0, 0x00030003 },
31+ { 0x00000041, 0x24200a28, 0x1e000420, 0x00020002 },
32+ { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc },
33+ { 0x00000001, 0x24280e08, 0x08000000, 0x00070003 },
34+ { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 },
35+ { 0x04600031, 0x26200a88, 0x0e000800, 0x02190006 },
36+ { 0x00600001, 0x28400208, 0x008d0020, 0x00000000 },
37+ { 0x00600001, 0x28600608, 0x00000000, 0x00000000 },
38+ { 0x00600001, 0x28800608, 0x00000000, 0x00000000 },
39+ { 0x00000001, 0x23800608, 0x00000000, 0x00000000 },
40+ { 0x00000005, 0x23840208, 0x06000384, 0xff000000 },
41+ { 0x00600001, 0x28a00208, 0x008d0380, 0x00000000 },
42+ { 0x00000001, 0x24000688, 0x00000000, 0x10001000 },
43+ { 0x00000001, 0x24010e88, 0x08000000, 0x00000000 },
44+ { 0x00000001, 0x28a41248, 0x00000400, 0x00000000 },
45+ { 0x00600001, 0x28c00608, 0x00000000, 0x00000000 },
46+ { 0x00800001, 0x28c02288, 0x00cf03a3, 0x00000000 },
47+ { 0x00000001, 0x28d00608, 0x00000000, 0x11111111 },
48+ { 0x00000001, 0x28dc0608, 0x00000000, 0x00010101 },
49+ { 0x00000001, 0x28d41248, 0x00000606, 0x00000000 },
50+ { 0x00400001, 0x28f00208, 0x00690608, 0x00000000 },
51+ { 0x00600001, 0x28e01248, 0x00ae0622, 0x00000000 },
52+ { 0x00000001, 0x247c1648, 0x10000000, 0x00000000 },
53+ { 0x00000001, 0x247c0e88, 0x08000000, 0x00000002 },
54+ { 0x00000001, 0x247d2288, 0x000000a5, 0x00000000 },
55+ { 0x00000001, 0x24001648, 0x10000000, 0x00200020 },
56+ { 0x00000001, 0x247e2288, 0x00000400, 0x00000000 },
57+ { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 },
58+ { 0x00000040, 0x244c0208, 0x0600044c, 0x00800000 },
59+ { 0x00000001, 0x244f0e88, 0x08000000, 0x00000030 },
60+ { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 },
61+ { 0x0d600031, 0x21800a08, 0x0e000800, 0x10782000 },
62+ { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
63+ { 0x00000001, 0x28200208, 0x00000180, 0x00000000 },
64+ { 0x00000001, 0x28240208, 0x00000190, 0x00000000 },
65+ { 0x00000001, 0x28280208, 0x00000194, 0x00000000 },
66+ { 0x00000001, 0x282c0208, 0x00000198, 0x00000000 },
67+ { 0x00000001, 0x28301248, 0x0000018c, 0x00000000 },
68+ { 0x00000001, 0x28340208, 0x00000188, 0x00000000 },
69+ { 0x00000001, 0x28380208, 0x0000019c, 0x00000000 },
70+ { 0x00000001, 0x283c0208, 0x00000488, 0x00000000 },
71+ { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 },
72+ { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 },
73+ { 0x07800031, 0x24000a40, 0x0e000e00, 0x82000010 },
--- a/src/vp8_probs.h
+++ b/src/vp8_probs.h
@@ -42,27 +42,27 @@
4242 #ifndef VP8_PROBS_H
4343 #define VP8_PROBS_H
4444
45-const unsigned char vp8_ymode_prob[4] =
45+static const unsigned char vp8_ymode_prob[4] =
4646 {
4747 112, 86, 140, 37
4848 };
4949
50-const unsigned char vp8_kf_ymode_prob[4] =
50+static const unsigned char vp8_kf_ymode_prob[4] =
5151 {
5252 145, 156, 163, 128
5353 };
5454
55-const unsigned char vp8_uv_mode_prob[3] =
55+static const unsigned char vp8_uv_mode_prob[3] =
5656 {
5757 162, 101, 204
5858 };
5959
60-static const unsigned char vp8_kf_uv_mode_prob[3] =
60+static const unsigned char vp8_kf_uv_mode_prob[3] =
6161 {
6262 142, 114, 183
6363 };
6464
65-const unsigned char vp8_base_skip_false_prob[128] =
65+static const unsigned char vp8_base_skip_false_prob[128] =
6666 {
6767 255, 255, 255, 255, 255, 255, 255, 255,
6868 255, 255, 255, 255, 255, 255, 255, 255,
@@ -82,7 +82,7 @@ const unsigned char vp8_base_skip_false_prob[128] =
8282 30, 28, 26, 24, 22, 20, 18, 16,
8383 };
8484
85-const unsigned char vp8_mv_update_probs[2][19] =
85+static const unsigned char vp8_mv_update_probs[2][19] =
8686 {
8787 {
8888 237,
@@ -98,7 +98,7 @@ const unsigned char vp8_mv_update_probs[2][19] =
9898 }
9999 };
100100
101-const unsigned char vp8_default_mv_context[2][19] =
101+static const unsigned char vp8_default_mv_context[2][19] =
102102 {
103103 {
104104 162, /* is short */
@@ -116,7 +116,7 @@ const unsigned char vp8_default_mv_context[2][19] =
116116 }
117117 };
118118
119-const unsigned char vp8_default_coef_probs[4][8][3][11] =
119+static const unsigned char vp8_default_coef_probs[4][8][3][11] =
120120 {
121121 { /* Block Type ( 0 ) */
122122 { /* Coeff Band ( 0 )*/