/*
 * This is a simple program to test the multi-core microblaze SoC.
 */

#include <stdio.h>


#include "sysace_stdio.h"

#include "DecTest.h"


/* global variable */
AVCHandle *mHandle=NULL;


uint8 **intra_pred_top;
uint8 **intra_pred_top_cb;
uint8 **intra_pred_top_cr;



static int32_t Malloc(void *userData, int32_t size, int32_t attrs) {
    return (int32_t)(smp_malloc(size));
}

static void Free(void *userData, int32_t ptr) {
    free((void *)(ptr));
}



int all_count = 0;




int waveFront()
{

    FILE *in_fd,*out_fd;


    char *fileName = INPUT_FILE_NAME;
    char *outfileName = OUTPUT_FILE_NAME;

	int mSPSSeen,mPPSSeen;
	int H264_Size,H264_Offset;
	uint8_t *H264_File;


    in_fd = sysace_fopen (fileName ,"r" );

    mutex_lock(APP_MUTEX_NUM);
	printf("input_filename %s \n",fileName);
	mutex_unlock(APP_MUTEX_NUM);


	H264_File = (unsigned char *) smp_malloc (8388608);

	H264_Size = sysace_fread(H264_File,8388608,1,in_fd);

    sysace_fclose(in_fd);

    out_fd= sysace_fopen(outfileName,"w");


	mHandle=smp_malloc(sizeof(AVCHandle));

	AVCDecInit( mHandle , &mSPSSeen , &mPPSSeen , &H264_Offset );

	const uint8_t *NALUnit;
	int NALSize,Read_res=0,Dec_res=0;

	printf("Start decode \n");


	/* lock each row */
	int i;
	for( i = 0 ; i < ROW_MUTEX_LENGTH ; i++){
		mutex_lock(i+ROW_MUTEX_BASE);
	}




	/* end */

	int* slice_end_flag;
	slice_end_flag = (void*)BR_FLAG_ADDR;
	*slice_end_flag = 0;


	int start_time = tmr_get_value();


	while(H264_File!=NULL)
	{
		Read_res=ReadNALUnit( H264_File , H264_Offset , H264_Size , &NALUnit , &NALSize );

		if(Read_res==-1)
			break;//fali

		Dec_res=DecNALUnit (mHandle,NALUnit,NALSize,&mSPSSeen,&mPPSSeen);
		if(Dec_res== -1)
		{
			break;//fali
		}else if(Dec_res ==0 ) // Dec_res == 1 , OK
		{				       // next NAL
			if (NALSize + 4 == H264_Size)
			{
				free(H264_File);
				H264_File = NULL;
			} else
			{
				H264_Offset = H264_Offset+NALSize+4;
				H264_Size = H264_Size-NALSize-4;
			}

		}else if (Dec_res ==1) // Dec_res ==1
		{                     // get YUV data to output_filename
			int index;
			int Release;
			AVCFrameIO Output;
			Output.YCbCr[0] = Output.YCbCr[1] = Output.YCbCr[2] = NULL;

			AVCDec_Status status = PVAVCDecGetOutput(mHandle, &index, &Release, &Output);
#ifdef dumpFile
			// 2014/3/4
			microblaze_invalidate_dcache_range(Output.YCbCr[0],Output.pitch*Output.height);
			microblaze_invalidate_dcache_range(Output.YCbCr[1],(Output.pitch/2)*(Output.height/2));
			microblaze_invalidate_dcache_range(Output.YCbCr[2],(Output.pitch/2)*(Output.height/2));

			sysace_fwrite(Output.YCbCr[0],Output.pitch*Output.height,1,out_fd);
			sysace_fwrite(Output.YCbCr[1],(Output.pitch/2)*(Output.height/2),1,out_fd);
			sysace_fwrite(Output.YCbCr[2],(Output.pitch/2)*(Output.height/2),1,out_fd);
#endif
			if (status != AVCDEC_SUCCESS)
			{
				mutex_lock(APP_MUTEX_NUM);
				printf("PVAVCDecGetOutput returned error %d\n", status);
				mutex_unlock(APP_MUTEX_NUM);
				break;
			}
		}
	}


	// get last YUV data
	AVCDec_Status status =AVCDEC_SUCCESS;
	while(status==AVCDEC_SUCCESS){
		int index;
		int Release;
		AVCFrameIO Output;

		Output.YCbCr[0] = Output.YCbCr[1] = Output.YCbCr[2] = NULL;
		AVCDec_Status status = PVAVCDecGetOutput(mHandle, &index, &Release, &Output);
		if (status != AVCDEC_SUCCESS) {
			break;
		}

#ifdef dumpFile
		// 2014/3/4
		microblaze_invalidate_dcache_range(Output.YCbCr[0],Output.pitch*Output.height);
		microblaze_invalidate_dcache_range(Output.YCbCr[1],(Output.pitch/2)*(Output.height/2));
		microblaze_invalidate_dcache_range(Output.YCbCr[2],(Output.pitch/2)*(Output.height/2));

		sysace_fwrite(Output.YCbCr[0],Output.pitch*Output.height,1,out_fd);
		sysace_fwrite(Output.YCbCr[1],(Output.pitch/2)*(Output.height/2),1,out_fd);
		sysace_fwrite(Output.YCbCr[2],(Output.pitch/2)*(Output.height/2),1,out_fd);
#endif

	}

	printf("time : %.0f\n",(tmr_get_value()-start_time)*CLOCKS_PER_MICROSEC);



	if(H264_File!=NULL)
		free (H264_File);
	sysace_fclose (out_fd);
	system("pause");
	free(mHandle);

	mutex_lock(APP_MUTEX_NUM);
	printf("Decode Over \n");
	printf("all count : %d\n",all_count);
	mutex_unlock(APP_MUTEX_NUM);


    return 0;
}

void AVCDecInit(AVCHandle * mHandle,int *mSPSSeen,int *mPPSSeen,int * H264_Offset){

	memset(mHandle, 0, sizeof(AVCHandle));
    mHandle->AVCObject = NULL;
    mHandle->userData = NULL;
    mHandle->CBAVC_Malloc = Malloc;                 // if u want how to implement those function
    mHandle->CBAVC_Free = Free;                     // u can see AVCDecoder.cpp
	*mSPSSeen = 0;
    *mPPSSeen = 0;
	*H264_Offset=0;
}

int ReadNALUnit(const uint8_t * H264_File , int H264_Offset ,int H264_Size ,const uint8_t ** NALUnit ,int *NALSize ){

	const uint8_t *data =
        (const uint8_t *)H264_File + H264_Offset;

	int size = H264_Size;

	if(size < 4){
		return -1 ; //fail
	}
	if(memcmp(kStartCode, data, 4)){
		return -1 ; //fail
	}

	int offset = 4;
    while (offset + 3 < size && memcmp(kStartCode, &data[offset], 4)) {//find next NAL startcode
        ++offset;
    }

    *NALUnit = &data[4];
    if (offset + 3 >= size) {
        *NALSize = size - 4;
    } else {
        *NALSize = offset - 4;
    }
	return 1 ;

}

int DecNALUnit (AVCHandle *mHandle,const uint8_t *NALUnit,const int NALSize,int *mSPSSeen,int *mPPSSeen)
{
    int nalType;
    int nalRefIdc;
    AVCDec_Status res =PVAVCDecGetNALType((uint8_t *)(NALUnit), NALSize,&nalType, &nalRefIdc);

    if (res != AVCDEC_SUCCESS) {
    	mutex_lock(APP_MUTEX_NUM);
		printf("cannot determine nal type\n");
		mutex_unlock(APP_MUTEX_NUM);
    } else if (nalType == AVC_NALTYPE_SPS || nalType == AVC_NALTYPE_PPS|| (*mSPSSeen &&* mPPSSeen)) {
        switch (nalType) {
            case AVC_NALTYPE_SPS:
            {
                *mSPSSeen = 1;
                res = PVAVCDecSeqParamSet(
                        mHandle, (uint8_t *)(NALUnit),
                        NALSize);

                if (res != AVCDEC_SUCCESS) {
                	mutex_lock(APP_MUTEX_NUM);
					printf("PVAVCDecSeqParamSet returned error %d\n", res);
                    mutex_unlock(APP_MUTEX_NUM);
					return -1;//fail
                }

                break;
            }

            case AVC_NALTYPE_PPS:
            {
				*mPPSSeen = 1;
                res = PVAVCDecPicParamSet(
                        mHandle, (uint8_t *)(NALUnit),
                        NALSize);

                if (res != AVCDEC_SUCCESS) {
                	mutex_lock(APP_MUTEX_NUM);
                	printf("PVAVCDecPicParamSet returned error %d", res);
                	mutex_unlock(APP_MUTEX_NUM);
                	return -1;
                }
				return 0;//ok,but no data
                break;
            }

            case AVC_NALTYPE_SLICE:
            case AVC_NALTYPE_IDR:
            {
			     res = PVAVCDecodeSlice(
                        mHandle, (uint8_t *)(NALUnit),
                        NALSize);

                if (res == AVCDEC_PICTURE_OUTPUT_READY) {
                   // Do _not_ release input buffer yet.
					return 1;
                    break;
                }

                if (res == AVCDEC_PICTURE_READY || res == AVCDEC_SUCCESS) {

                } else {
                	mutex_lock(APP_MUTEX_NUM);
					printf("PVAVCDecodeSlice returned error %d", res);
					mutex_unlock(APP_MUTEX_NUM);
					return -1;
				}
                break;
            }

            case AVC_NALTYPE_SEI:
            {

                if (res != AVCDEC_SUCCESS) {
                    return -1;
                }

                break;
            }

            case AVC_NALTYPE_AUD:
            case AVC_NALTYPE_FILL:
            case AVC_NALTYPE_EOSEQ:
			{

                break;
            }

            default:
            {
              //  LOGE("Should not be here, unknown nalType %d", nalType);
              //  CHECK(!"Should not be here");
				return -1;
                break;
            }
        }
    } else {
        // We haven't seen SPS or PPS yet.
    }
	return 0;//ok,but no data

}

int thr_decode_one_row(int arg) {

	//int thr_index = arg-1;


	int row_mutex_num[(ROW_MUTEX_LENGTH/3)+1];
	//int mutex_num = 6;
	int mutex_count = 0;
	int i;

	switch (arg){
	case 1:
		for(i=0;i<(ROW_MUTEX_LENGTH/3);i++)
			row_mutex_num[i] = ROW_MUTEX_BASE+3*i;
		row_mutex_num[(ROW_MUTEX_LENGTH/3)] = ROW_MUTEX_BASE+ROW_MUTEX_LENGTH;
		break;
	case 2:
		for(i=0;i<(ROW_MUTEX_LENGTH/3);i++)
			row_mutex_num[i] = ROW_MUTEX_BASE+1+3*i;
		row_mutex_num[(ROW_MUTEX_LENGTH/3)] = ROW_MUTEX_BASE+ROW_MUTEX_LENGTH+1;
		break;
	case 3:
		for(i=0;i<(ROW_MUTEX_LENGTH/3);i++)
			row_mutex_num[i] = ROW_MUTEX_BASE+2+3*i;
		row_mutex_num[(ROW_MUTEX_LENGTH/3)] = ROW_MUTEX_BASE+ROW_MUTEX_LENGTH+2;
		break;
	default:
		mutex_lock(APP_MUTEX_NUM);
		printf("wrong core id !! \n");
		mutex_unlock(APP_MUTEX_NUM);
		//mutex_num = 0;
		break;
	}
	/* lock for end */

	while(true){

		/* lock row */
		mutex_lock(row_mutex_num[mutex_count]);

		/* read new info. to decode */
		AVCHandle *tmp_mHandle = mHandle;
		microblaze_invalidate_dcache_range(tmp_mHandle,sizeof(AVCHandle));

		AVCDecObject *decvid = (AVCDecObject*) tmp_mHandle->AVCObject;
		AVCCommonObj *video = decvid->common;
		uint CurrMbAddr;
		AVCMacroblock *currMB ;


		// initialize first MB location
		CurrMbAddr = (row_mutex_num[mutex_count]-ROW_MUTEX_BASE) * video->PicWidthInMbs;
		//end


		int mbNum = (int)video->PicWidthInMbs;

		//int PicHeightInMbs = video->PicHeightInMbs;


		uint8 tmp_intra_pred_left[17];
		uint8 tmp_intra_pred_left_cb[9];
		uint8 tmp_intra_pred_left_cr[9];


		microblaze_invalidate_dcache_range(video,sizeof(AVCCommonObj));


		do{

			currMB = &(video->mblock[CurrMbAddr]);
			/* new , 2014/3/3 */
			microblaze_invalidate_dcache_range(currMB,sizeof(AVCMacroblock));


			/* syn. part */

			int mbAddrB, mbAddrC;
			uint mbAvailB, mbAvailC;


			mbAddrB = currMB->mbAddrB;
			mbAddrC = currMB->mbAddrC;

			mbAvailB = currMB->mbAvailB;
			mbAvailC = currMB->mbAvailC;


			int checkNeighbor = checkNeighborMB(mbAddrB,mbAddrC,mbAvailB,mbAvailC);

			while( !checkNeighbor  ){
				checkNeighbor = checkNeighborMB(mbAddrB,mbAddrC,mbAvailB,mbAvailC);
				smp_sleep(100);	// ori. value is 100
			}


			currMB->intra_pred_left = tmp_intra_pred_left;
			currMB->intra_pred_left_cb = tmp_intra_pred_left_cb;
			currMB->intra_pred_left_cr = tmp_intra_pred_left_cr;


			/* read_macroblock and decode_one_macroblock() */


			if(currMB->mbMode == AVC_SKIP ){
				InterMBPrediction(video,currMB);
				currMB->mb_skip_run--;
			}
			else if (currMB->mbMode == AVC_I4 || currMB->mbMode == AVC_I16)
			{
				IntraMBPrediction(video,currMB);
			}
			else
			{
				InterMBPrediction(video,currMB);
			}




#ifdef MB_BASED_DEBLOCK

			MBInLoopDeblock(video,currMB); /* MB-based deblocking */

#endif

			//microblaze_invalidate_dcache();

			mbNum--;

			/* syn. part */
			uint8 *pbr;
			pbr = (void*)PBR_MB_FLAG_ADDR;
			pbr[CurrMbAddr] = 1;

			/* end */

			//Current MB be decoded , enable flag
			CurrMbAddr++;

		}
		while ( mbNum > 0 ); /* even if no more data, but last few MBs are skipped */

		// finish , back to waiting

		int* slice_end_flag;
		slice_end_flag = (void*)BR_FLAG_ADDR;

		mutex_unlock(row_mutex_num[mutex_count]);	//finish

		if( (video->PicHeightInMbs-1) == (row_mutex_num[mutex_count]-ROW_MUTEX_BASE)){
			*slice_end_flag = 1;
		}



		mutex_count++;
		if(mutex_count == (ROW_MUTEX_LENGTH/3)){	// end , wait for reset

			mutex_lock(row_mutex_num[mutex_count]);
			mutex_unlock(row_mutex_num[mutex_count]);
			mutex_count = 0;


		}

	}


	return 0;
}

int main_thread_one_row(int arg) {


	int row_mutex_num[(ROW_MUTEX_LENGTH/3)+1];
	int mutex_count = 0;
	int i;


	for(i=0;i<(ROW_MUTEX_LENGTH/3);i++)
		row_mutex_num[i] = ROW_MUTEX_BASE+2+3*i;


	/* lock for end */

	while(true){

		/* lock row */
		mutex_lock(row_mutex_num[mutex_count]);

		/* read new info. to decode */
//		microblaze_invalidate_dcache();

		AVCHandle *tmp_mHandle = mHandle;

		AVCDecObject *decvid = (AVCDecObject*) tmp_mHandle->AVCObject;
		AVCCommonObj *video = decvid->common;
		uint CurrMbAddr;
		AVCMacroblock *currMB ;


		// initialize first MB location
		CurrMbAddr = (row_mutex_num[mutex_count]-ROW_MUTEX_BASE) * video->PicWidthInMbs;
		//end


		int mbNum = (int)video->PicWidthInMbs;



		uint8 tmp_intra_pred_left[17];
		uint8 tmp_intra_pred_left_cb[9];
		uint8 tmp_intra_pred_left_cr[9];

		microblaze_invalidate_dcache_range(video,sizeof(AVCCommonObj));


		do{


			currMB = &(video->mblock[CurrMbAddr]);
			/* new , 2014/3/3 */
			microblaze_invalidate_dcache_range(currMB,sizeof(AVCMacroblock));


			/* syn. part */

			int mbAddrB, mbAddrC;
			uint mbAvailB, mbAvailC;


			mbAddrB = currMB->mbAddrB;
			mbAddrC = currMB->mbAddrC;

			mbAvailB = currMB->mbAvailB;
			mbAvailC = currMB->mbAvailC;

			volatile int checkNeighbor = checkNeighborMB(mbAddrB,mbAddrC,mbAvailB,mbAvailC);

			while( !checkNeighbor  ){
				checkNeighbor = checkNeighborMB(mbAddrB,mbAddrC,mbAvailB,mbAvailC);
				smp_sleep(100);
			}


			/* syn. end */

			currMB->intra_pred_left = tmp_intra_pred_left;
			currMB->intra_pred_left_cb = tmp_intra_pred_left_cb;
			currMB->intra_pred_left_cr = tmp_intra_pred_left_cr;


			/* read_macroblock and decode_one_macroblock() */


			if(currMB->mbMode == AVC_SKIP ){
				InterMBPrediction(video,currMB);
				currMB->mb_skip_run--;
			}
			else if (currMB->mbMode == AVC_I4 || currMB->mbMode == AVC_I16)
			{
				IntraMBPrediction(video,currMB);
			}
			else
			{
				InterMBPrediction(video,currMB);
			}





#ifdef MB_BASED_DEBLOCK

			MBInLoopDeblock(video,currMB); /* MB-based deblocking */
#endif

			mbNum--;

			/* syn. part */
			uint8 *pbr;
			pbr = (void*)PBR_MB_FLAG_ADDR;
			pbr[CurrMbAddr] = 1;

			/* end */

			//Current MB be decoded , enable flag
			CurrMbAddr++;

		}
		while ( mbNum > 0 ); /* even if no more data, but last few MBs are skipped */

		// finish , back to waiting

		int* slice_end_flag;
		slice_end_flag = (void*)BR_FLAG_ADDR;

		mutex_unlock(row_mutex_num[mutex_count]);	//finish

		if( (video->PicHeightInMbs-1) == (row_mutex_num[mutex_count]-ROW_MUTEX_BASE)){
			*slice_end_flag = 1;
		}


		mutex_count++;
		if(mutex_count == (ROW_MUTEX_LENGTH/3)){	// end , wait for reset
			mutex_count = 0;
			break;
		}
	}

	return 0;
}


int checkNeighborMB(int mbAddrB,int mbAddrC,uint mbAvailB,uint mbAvailC){

	volatile uint8 *pbr;
	pbr = (void*)PBR_MB_FLAG_ADDR;

	if(mbAvailB && mbAvailC){
		if( pbr[mbAddrC] )
			return true;
	}
	else if( mbAvailB && !mbAvailC ){
		if( pbr[mbAddrB] )
			return true;
	}
	else
		return true;

	return false;
}


