	.version 2.2
	.target sm_20
	// compiled with ../../../External/3rdParty/NVIDIA/CUDA/win/bin/../open64/lib//be.exe
	// nvopencc 3.2 built on 2010-11-04

	.visible .func (.param .s32 __cudaretf__Z15IntegerMultiplyii) _Z15IntegerMultiplyii (.param .s32 __cudaparmf1__Z15IntegerMultiplyii, .param .s32 __cudaparmf2__Z15IntegerMultiplyii)

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelXv) _Z17Standard2DKernelXv ()

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelYv) _Z17Standard2DKernelYv ()

	.visible .func (.param .align 16 .b8 __cudaretf__Z13Half4ToFloat47ushort4[16]) _Z13Half4ToFloat47ushort4 (.param .align 8 .b8 __cudaparmf1__Z13Half4ToFloat47ushort4[8])

	.visible .func (.param .align 8 .b8 __cudaretf__Z13Float4ToHalf46float4[8]) _Z13Float4ToHalf46float4 (.param .align 16 .b8 __cudaparmf1__Z13Float4ToHalf46float4[16])

	.visible .func (.param .u32 __cudaretf__Z4Mix3RjS_S_) _Z4Mix3RjS_S_ (.param .u64 __cudaparmf1__Z4Mix3RjS_S_, .param .u64 __cudaparmf2__Z4Mix3RjS_S_, .param .u64 __cudaparmf3__Z4Mix3RjS_S_)

	.visible .func (.param .s32 __cudaretf__Z4Randj) _Z4Randj (.param .u32 __cudaparmf1__Z4Randj)

	.visible .func (.param .s32 __cudaretf__Z6Rand2Djjj) _Z6Rand2Djjj (.param .u32 __cudaparmf1__Z6Rand2Djjj, .param .u32 __cudaparmf2__Z6Rand2Djjj, .param .u32 __cudaparmf3__Z6Rand2Djjj)

	.visible .func (.param .s32 __cudaretf__Z6Rand2Dj) _Z6Rand2Dj (.param .u32 __cudaparmf1__Z6Rand2Dj)

	.visible .func (.param .align 8 .b8 __cudaretf__Z6Read2DI7ushort4ET_PKS1_iii[8]) _Z6Read2DI7ushort4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI7ushort4ET_PKS1_iii)

	.visible .func (.param .align 16 .b8 __cudaretf__Z6Read2DI6float4ET_PKS1_iii[16]) _Z6Read2DI6float4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI6float4ET_PKS1_iii)

	.visible .func _Z7Write2DI7ushort4EvT_PS1_iii (.param .align 8 .b8 __cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii[8], .param .u64 __cudaparmf2__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI7ushort4EvT_PS1_iii)

	.visible .func _Z7Write2DI6float4EvT_PS1_iii (.param .align 16 .b8 __cudaparmf1__Z7Write2DI6float4EvT_PS1_iii[16], .param .u64 __cudaparmf2__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI6float4EvT_PS1_iii)

	.visible .func (.param .align 16 .b8 __cudaretf__Z18UnpremultiplyPixel8PixelRGB[16]) _Z18UnpremultiplyPixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z18UnpremultiplyPixel8PixelRGB[16])

	.visible .func (.param .f32 __cudaretf__Z13ToLinearColorf) _Z13ToLinearColorf (.param .f32 __cudaparmf1__Z13ToLinearColorf)

	.visible .func (.param .f32 __cudaretf__Z15FromLinearColorf) _Z15FromLinearColorf (.param .f32 __cudaparmf1__Z15FromLinearColorf)

	.visible .func (.param .align 16 .b8 __cudaretf__Z25PremultiplyLinearizePixel8PixelRGB[16]) _Z25PremultiplyLinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16]) _Z29UnpremultiplyUnlinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z20PremultiplyLinearize6float4[16]) _Z20PremultiplyLinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z20PremultiplyLinearize6float4[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z24UnpremultiplyUnlinearize6float4[16]) _Z24UnpremultiplyUnlinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z24UnpremultiplyUnlinearize6float4[16])

	.visible .func _Z11AlphaAdjustR8PixelRGBRiS1_iiRK21AlphaAdjustParameters (.param .u64 __cudaparmf1__Z11AlphaAdjustR8PixelRGBRiS1_iiRK21AlphaAdjustParameters, .param .u64 __cudaparmf2__Z11AlphaAdjustR8PixelRGBRiS1_iiRK21AlphaAdjustParameters, .param .u64 __cudaparmf3__Z11AlphaAdjustR8PixelRGBRiS1_iiRK21AlphaAdjustParameters, .param .s32 __cudaparmf4__Z11AlphaAdjustR8PixelRGBRiS1_iiRK21AlphaAdjustParameters, .param .s32 __cudaparmf5__Z11AlphaAdjustR8PixelRGBRiS1_iiRK21AlphaAdjustParameters, .param .u64 __cudaparmf6__Z11AlphaAdjustR8PixelRGBRiS1_iiRK21AlphaAdjustParameters)

	.visible .func (.param .align 16 .b8 __cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_[16]) _Z18SwapComponentOrderI6float4ET_RKS1_ (.param .u64 __cudaparmf1__Z18SwapComponentOrderI6float4ET_RKS1_)

	.visible .func _Z13BlackAndWhiteR8PixelRGBRiS1_ii (.param .u64 __cudaparmf1__Z13BlackAndWhiteR8PixelRGBRiS1_ii, .param .u64 __cudaparmf2__Z13BlackAndWhiteR8PixelRGBRiS1_ii, .param .u64 __cudaparmf3__Z13BlackAndWhiteR8PixelRGBRiS1_ii, .param .s32 __cudaparmf4__Z13BlackAndWhiteR8PixelRGBRiS1_ii, .param .s32 __cudaparmf5__Z13BlackAndWhiteR8PixelRGBRiS1_ii)

	.visible .func _Z21BrightnessAndContrastR8PixelRGBRiS1_iiRK31BrightnessAndContrastParameters (.param .u64 __cudaparmf1__Z21BrightnessAndContrastR8PixelRGBRiS1_iiRK31BrightnessAndContrastParameters, .param .u64 __cudaparmf2__Z21BrightnessAndContrastR8PixelRGBRiS1_iiRK31BrightnessAndContrastParameters, .param .u64 __cudaparmf3__Z21BrightnessAndContrastR8PixelRGBRiS1_iiRK31BrightnessAndContrastParameters, .param .s32 __cudaparmf4__Z21BrightnessAndContrastR8PixelRGBRiS1_iiRK31BrightnessAndContrastParameters, .param .s32 __cudaparmf5__Z21BrightnessAndContrastR8PixelRGBRiS1_iiRK31BrightnessAndContrastParameters, .param .u64 __cudaparmf6__Z21BrightnessAndContrastR8PixelRGBRiS1_iiRK31BrightnessAndContrastParameters)

	.visible .func _Z15ColorBalanceRGBR8PixelRGBRiS1_iiRK25ColorBalanceRGBParameters (.param .u64 __cudaparmf1__Z15ColorBalanceRGBR8PixelRGBRiS1_iiRK25ColorBalanceRGBParameters, .param .u64 __cudaparmf2__Z15ColorBalanceRGBR8PixelRGBRiS1_iiRK25ColorBalanceRGBParameters, .param .u64 __cudaparmf3__Z15ColorBalanceRGBR8PixelRGBRiS1_iiRK25ColorBalanceRGBParameters, .param .s32 __cudaparmf4__Z15ColorBalanceRGBR8PixelRGBRiS1_iiRK25ColorBalanceRGBParameters, .param .s32 __cudaparmf5__Z15ColorBalanceRGBR8PixelRGBRiS1_iiRK25ColorBalanceRGBParameters, .param .u64 __cudaparmf6__Z15ColorBalanceRGBR8PixelRGBRiS1_iiRK25ColorBalanceRGBParameters)

	.visible .func (.param .align 16 .b8 __cudaretf__Z18MultiplyComponentsI8PixelRGBET_RKS1_S3_[16]) _Z18MultiplyComponentsI8PixelRGBET_RKS1_S3_ (.param .u64 __cudaparmf1__Z18MultiplyComponentsI8PixelRGBET_RKS1_S3_, .param .u64 __cudaparmf2__Z18MultiplyComponentsI8PixelRGBET_RKS1_S3_)

	.visible .func _Z9ColorPassR8PixelRGBRiS1_iiRK19ColorPassParameters (.param .u64 __cudaparmf1__Z9ColorPassR8PixelRGBRiS1_iiRK19ColorPassParameters, .param .u64 __cudaparmf2__Z9ColorPassR8PixelRGBRiS1_iiRK19ColorPassParameters, .param .u64 __cudaparmf3__Z9ColorPassR8PixelRGBRiS1_iiRK19ColorPassParameters, .param .s32 __cudaparmf4__Z9ColorPassR8PixelRGBRiS1_iiRK19ColorPassParameters, .param .s32 __cudaparmf5__Z9ColorPassR8PixelRGBRiS1_iiRK19ColorPassParameters, .param .u64 __cudaparmf6__Z9ColorPassR8PixelRGBRiS1_iiRK19ColorPassParameters)

	.visible .func (.param .f32 __cudaretf__Z12MaxComponentI8PixelRGBEfRKT_) _Z12MaxComponentI8PixelRGBEfRKT_ (.param .u64 __cudaparmf1__Z12MaxComponentI8PixelRGBEfRKT_)

	.visible .func (.param .align 16 .b8 __cudaretf__Z13AbsComponentsI8PixelRGBET_RKS1_[16]) _Z13AbsComponentsI8PixelRGBET_RKS1_ (.param .u64 __cudaparmf1__Z13AbsComponentsI8PixelRGBET_RKS1_)

	.visible .func (.param .align 16 .b8 __cudaretf__Z15ClampComponentsI8PixelRGBET_RKS1_ff[16]) _Z15ClampComponentsI8PixelRGBET_RKS1_ff (.param .u64 __cudaparmf1__Z15ClampComponentsI8PixelRGBET_RKS1_ff, .param .f32 __cudaparmf2__Z15ClampComponentsI8PixelRGBET_RKS1_ff, .param .f32 __cudaparmf3__Z15ClampComponentsI8PixelRGBET_RKS1_ff)

	.visible .func (.param .f32 __cudaretf__Z5ClampIfET_S0_S0_S0_) _Z5ClampIfET_S0_S0_S0_ (.param .f32 __cudaparmf1__Z5ClampIfET_S0_S0_S0_, .param .f32 __cudaparmf2__Z5ClampIfET_S0_S0_S0_, .param .f32 __cudaparmf3__Z5ClampIfET_S0_S0_S0_)

	.visible .func _Z12ColorReplaceR8PixelRGBRiS1_iiRK22ColorReplaceParameters (.param .u64 __cudaparmf1__Z12ColorReplaceR8PixelRGBRiS1_iiRK22ColorReplaceParameters, .param .u64 __cudaparmf2__Z12ColorReplaceR8PixelRGBRiS1_iiRK22ColorReplaceParameters, .param .u64 __cudaparmf3__Z12ColorReplaceR8PixelRGBRiS1_iiRK22ColorReplaceParameters, .param .s32 __cudaparmf4__Z12ColorReplaceR8PixelRGBRiS1_iiRK22ColorReplaceParameters, .param .s32 __cudaparmf5__Z12ColorReplaceR8PixelRGBRiS1_iiRK22ColorReplaceParameters, .param .u64 __cudaparmf6__Z12ColorReplaceR8PixelRGBRiS1_iiRK22ColorReplaceParameters)

	.visible .func _Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters (.param .u64 __cudaparmf1__Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters, .param .u64 __cudaparmf2__Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters, .param .u64 __cudaparmf3__Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters, .param .s32 __cudaparmf4__Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters, .param .s32 __cudaparmf5__Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters, .param .u64 __cudaparmf6__Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters)

	.visible .func (.param .s32 __cudaretf__Z3MaxIiET_S0_S0_S0_S0_) _Z3MaxIiET_S0_S0_S0_S0_ (.param .s32 __cudaparmf1__Z3MaxIiET_S0_S0_S0_S0_, .param .s32 __cudaparmf2__Z3MaxIiET_S0_S0_S0_S0_, .param .s32 __cudaparmf3__Z3MaxIiET_S0_S0_S0_S0_, .param .s32 __cudaparmf4__Z3MaxIiET_S0_S0_S0_S0_)

	.visible .func _Z7ExtractR8PixelRGBRiS1_iiRK17ExtractParameters (.param .u64 __cudaparmf1__Z7ExtractR8PixelRGBRiS1_iiRK17ExtractParameters, .param .u64 __cudaparmf2__Z7ExtractR8PixelRGBRiS1_iiRK17ExtractParameters, .param .u64 __cudaparmf3__Z7ExtractR8PixelRGBRiS1_iiRK17ExtractParameters, .param .s32 __cudaparmf4__Z7ExtractR8PixelRGBRiS1_iiRK17ExtractParameters, .param .s32 __cudaparmf5__Z7ExtractR8PixelRGBRiS1_iiRK17ExtractParameters, .param .u64 __cudaparmf6__Z7ExtractR8PixelRGBRiS1_iiRK17ExtractParameters)

	.visible .func _Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters (.param .u64 __cudaparmf1__Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .u64 __cudaparmf2__Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .u64 __cudaparmf3__Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .s32 __cudaparmf4__Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .s32 __cudaparmf5__Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .u64 __cudaparmf6__Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters)

	.visible .func (.param .f32 __cudaretf__Z4LERPIfET_S0_S0_S0_) _Z4LERPIfET_S0_S0_S0_ (.param .f32 __cudaparmf1__Z4LERPIfET_S0_S0_S0_, .param .f32 __cudaparmf2__Z4LERPIfET_S0_S0_S0_, .param .f32 __cudaparmf3__Z4LERPIfET_S0_S0_S0_)

	.visible .func _Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters (.param .u64 __cudaparmf1__Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .u64 __cudaparmf2__Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .u64 __cudaparmf3__Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .s32 __cudaparmf4__Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .s32 __cudaparmf5__Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .u64 __cudaparmf6__Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters)

	.visible .func _Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters (.param .u64 __cudaparmf1__Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .u64 __cudaparmf2__Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .u64 __cudaparmf3__Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .s32 __cudaparmf4__Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .s32 __cudaparmf5__Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .u64 __cudaparmf6__Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters)

	.visible .func _Z15GammaCorrectionR8PixelRGBRiS1_iiRK25GammaCorrectionParameters (.param .u64 __cudaparmf1__Z15GammaCorrectionR8PixelRGBRiS1_iiRK25GammaCorrectionParameters, .param .u64 __cudaparmf2__Z15GammaCorrectionR8PixelRGBRiS1_iiRK25GammaCorrectionParameters, .param .u64 __cudaparmf3__Z15GammaCorrectionR8PixelRGBRiS1_iiRK25GammaCorrectionParameters, .param .s32 __cudaparmf4__Z15GammaCorrectionR8PixelRGBRiS1_iiRK25GammaCorrectionParameters, .param .s32 __cudaparmf5__Z15GammaCorrectionR8PixelRGBRiS1_iiRK25GammaCorrectionParameters, .param .u64 __cudaparmf6__Z15GammaCorrectionR8PixelRGBRiS1_iiRK25GammaCorrectionParameters)

	.visible .func (.param .align 16 .b8 __cudaretf__Z5PowerI8PixelRGBET_RKS1_f[16]) _Z5PowerI8PixelRGBET_RKS1_f (.param .u64 __cudaparmf1__Z5PowerI8PixelRGBET_RKS1_f, .param .f32 __cudaparmf2__Z5PowerI8PixelRGBET_RKS1_f)

	.visible .func _Z14HorizontalFlipR8PixelRGBRiS1_ii (.param .u64 __cudaparmf1__Z14HorizontalFlipR8PixelRGBRiS1_ii, .param .u64 __cudaparmf2__Z14HorizontalFlipR8PixelRGBRiS1_ii, .param .u64 __cudaparmf3__Z14HorizontalFlipR8PixelRGBRiS1_ii, .param .s32 __cudaparmf4__Z14HorizontalFlipR8PixelRGBRiS1_ii, .param .s32 __cudaparmf5__Z14HorizontalFlipR8PixelRGBRiS1_ii)

	.visible .func _Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters (.param .u64 __cudaparmf1__Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters, .param .u64 __cudaparmf2__Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters, .param .u64 __cudaparmf3__Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters, .param .s32 __cudaparmf4__Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters, .param .s32 __cudaparmf5__Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters, .param .u64 __cudaparmf6__Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters)

	.visible .func _Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters (.param .u64 __cudaparmf1__Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters, .param .u64 __cudaparmf2__Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters, .param .u64 __cudaparmf3__Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters, .param .s32 __cudaparmf4__Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters, .param .s32 __cudaparmf5__Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters, .param .u64 __cudaparmf6__Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters)

	.visible .func _Z4TintR8PixelRGBRiS1_iiRK14TintParameters (.param .u64 __cudaparmf1__Z4TintR8PixelRGBRiS1_iiRK14TintParameters, .param .u64 __cudaparmf2__Z4TintR8PixelRGBRiS1_iiRK14TintParameters, .param .u64 __cudaparmf3__Z4TintR8PixelRGBRiS1_iiRK14TintParameters, .param .s32 __cudaparmf4__Z4TintR8PixelRGBRiS1_iiRK14TintParameters, .param .s32 __cudaparmf5__Z4TintR8PixelRGBRiS1_iiRK14TintParameters, .param .u64 __cudaparmf6__Z4TintR8PixelRGBRiS1_iiRK14TintParameters)

	.visible .func _Z12VerticalFlipR8PixelRGBRiS1_ii (.param .u64 __cudaparmf1__Z12VerticalFlipR8PixelRGBRiS1_ii, .param .u64 __cudaparmf2__Z12VerticalFlipR8PixelRGBRiS1_ii, .param .u64 __cudaparmf3__Z12VerticalFlipR8PixelRGBRiS1_ii, .param .s32 __cudaparmf4__Z12VerticalFlipR8PixelRGBRiS1_ii, .param .s32 __cudaparmf5__Z12VerticalFlipR8PixelRGBRiS1_ii)

	.visible .func _Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter (.param .u64 __cudaparmf1__Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter, .param .u64 __cudaparmf2__Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter, .param .u64 __cudaparmf3__Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter, .param .s32 __cudaparmf4__Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter, .param .s32 __cudaparmf5__Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter, .param .u64 __cudaparmf6__Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter)

	//-----------------------------------------------------------
	// Compiling C:/Users/dvaeng/AppData/Local/Temp/tmpxft_00003058_00000000-11_PointwiseFilterHost.cpp3.i (C:/Users/dvaeng/AppData/Local/Temp/ccBI#.a15404)
	//-----------------------------------------------------------

	//-----------------------------------------------------------
	// Options:
	//-----------------------------------------------------------
	//  Target:ptx, ISA:sm_20, Endian:little, Pointer Size:64
	//  -O3	(Optimization level)
	//  -g0	(Debug level)
	//  -m2	(Report advisories)
	//-----------------------------------------------------------

	.file	1	"C:/Users/dvaeng/AppData/Local/Temp/tmpxft_00003058_00000000-10_PointwiseFilterHost.cudafe2.gpu"
	.file	2	"c:\Mulder64\shared\adobe\MediaCore\Display\Inc\CUDA/Effects/PointwiseFilterHost.h"
	.file	3	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/PixelFormat.h"
	.file	4	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/PixelRGB.h"
	.file	5	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/PixelYUV.h"
	.file	6	"C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\include\crtdefs.h"
	.file	7	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\crt/device_runtime.h"
	.file	8	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\host_defines.h"
	.file	9	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\builtin_types.h"
	.file	10	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\device_types.h"
	.file	11	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\driver_types.h"
	.file	12	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\surface_types.h"
	.file	13	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\texture_types.h"
	.file	14	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\vector_types.h"
	.file	15	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\builtin_types.h"
	.file	16	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\host_defines.h"
	.file	17	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\device_launch_parameters.h"
	.file	18	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\crt\storage_class.h"
	.file	19	"C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\include\time.h"
	.file	20	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/Utils.h"
	.file	21	"c:\Mulder64\shared\adobe\MediaCore\Display\Inc\CUDA/Effects/AlphaAdjust.h"
	.file	22	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/VectorUtils.h"
	.file	23	"c:\Mulder64\shared\adobe\MediaCore\Display\Inc\CUDA/Effects/BlackAndWhite.h"
	.file	24	"c:\Mulder64\shared\adobe\MediaCore\Display\Inc\CUDA/Effects/BrightnessAndContrast.h"
	.file	25	"c:\Mulder64\shared\adobe\MediaCore\Display\Inc\CUDA/Effects/ColorBalanceRGB.h"
	.file	26	"c:\Mulder64\shared\adobe\MediaCore\Display\Inc\CUDA/Effects/ColorPass.h"
	.file	27	"c:\Mulder64\shared\adobe\MediaCore\Display\Inc\CUDA/Effects/ColorReplace.h"
	.file	28	"c:\Mulder64\shared\adobe\MediaCore\Display\Inc\CUDA/Effects/EdgeFeather.h"
	.file	29	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/Numeric.h"
	.file	30	"c:\Mulder64\shared\adobe\MediaCore\Display\Inc\CUDA/Effects/Extract.h"
	.file	31	"c:\Mulder64\shared\adobe\MediaCore\Display\Inc\CUDA/Effects/FastColorCorrector.h"
	.file	32	"c:\Mulder64\shared\adobe\MediaCore\Display\Inc\CUDA/Effects/GammaCorrection.h"
	.file	33	"c:\Mulder64\shared\adobe\MediaCore\Display\Inc\CUDA/Effects/HorizontalFlip.h"
	.file	34	"c:\Mulder64\shared\adobe\MediaCore\Display\Inc\CUDA/Effects/Noise.h"
	.file	35	"c:\Mulder64\shared\adobe\MediaCore\Display\Inc\CUDA/Effects/ProcAmp.h"
	.file	36	"c:\Mulder64\shared\adobe\MediaCore\Display\Inc\CUDA/Effects/Tint.h"
	.file	37	"c:\Mulder64\shared\adobe\MediaCore\Display\Inc\CUDA/Effects/VerticalFlip.h"
	.file	38	"c:/Mulder64/shared/adobe/MediaCore/Display/Src/CUDA/Effects/PointwiseFilterHost.cu"
	.file	39	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\common_functions.h"
	.file	40	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_functions.h"
	.file	41	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_constants.h"
	.file	42	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\device_functions.h"
	.file	43	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_11_atomic_functions.h"
	.file	44	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_12_atomic_functions.h"
	.file	45	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_13_double_functions.h"
	.file	46	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_20_atomic_functions.h"
	.file	47	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_20_intrinsics.h"
	.file	48	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\surface_functions.h"
	.file	49	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\texture_fetch_functions.h"
	.file	50	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_functions_dbl_ptx3.h"
	.file	51	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/ColorSpaceConvert.h"


	.visible .func (.param .s32 __cudaretf__Z15IntegerMultiplyii) _Z15IntegerMultiplyii (.param .s32 __cudaparmf1__Z15IntegerMultiplyii, .param .s32 __cudaparmf2__Z15IntegerMultiplyii)
	{
	.reg .u32 %r<7>;
	.loc	20	60	0
$LDWbegin__Z15IntegerMultiplyii:
	ld.param.u32 	%r1, [__cudaparmf1__Z15IntegerMultiplyii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf2__Z15IntegerMultiplyii];
	mov.s32 	%r4, %r3;
	.loc	20	64	0
	mul.lo.s32 	%r5, %r2, %r4;
	st.param.s32 	[__cudaretf__Z15IntegerMultiplyii], %r5;
	ret;
$LDWend__Z15IntegerMultiplyii:
	} // _Z15IntegerMultiplyii

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelXv) _Z17Standard2DKernelXv ()
	{
	.reg .u32 %r<7>;
	.loc	20	73	0
$LDWbegin__Z17Standard2DKernelXv:
	.loc	20	74	0
	mov.u32 	%r1, %tid.x;
	cvt.s32.u32 	%r2, %ctaid.x;
	cvt.s32.u32 	%r3, %ntid.x;
	mul.lo.s32 	%r4, %r2, %r3;
	add.u32 	%r5, %r1, %r4;
	st.param.s32 	[__cudaretf__Z17Standard2DKernelXv], %r5;
	ret;
$LDWend__Z17Standard2DKernelXv:
	} // _Z17Standard2DKernelXv

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelYv) _Z17Standard2DKernelYv ()
	{
	.reg .u32 %r<7>;
	.loc	20	77	0
$LDWbegin__Z17Standard2DKernelYv:
	.loc	20	78	0
	mov.u32 	%r1, %tid.y;
	cvt.s32.u32 	%r2, %ctaid.y;
	cvt.s32.u32 	%r3, %ntid.y;
	mul.lo.s32 	%r4, %r2, %r3;
	add.u32 	%r5, %r1, %r4;
	st.param.s32 	[__cudaretf__Z17Standard2DKernelYv], %r5;
	ret;
$LDWend__Z17Standard2DKernelYv:
	} // _Z17Standard2DKernelYv

	.visible .func (.param .align 16 .b8 __cudaretf__Z13Half4ToFloat47ushort4[16]) _Z13Half4ToFloat47ushort4 (.param .align 8 .b8 __cudaparmf1__Z13Half4ToFloat47ushort4[8])
	{
	.reg .u32 %r<14>;
	.reg .f32 %f<9>;
	.loc	20	86	0
$LDWbegin__Z13Half4ToFloat47ushort4:
	ld.param.u16 	%r1, [__cudaparmf1__Z13Half4ToFloat47ushort4+0];
	mov.s32 	%r2, %r1;
	ld.param.u16 	%r3, [__cudaparmf1__Z13Half4ToFloat47ushort4+2];
	mov.s32 	%r4, %r3;
	ld.param.u16 	%r5, [__cudaparmf1__Z13Half4ToFloat47ushort4+4];
	mov.s32 	%r6, %r5;
	ld.param.u16 	%r7, [__cudaparmf1__Z13Half4ToFloat47ushort4+6];
	mov.s32 	%r8, %r7;
	.loc	20	87	0
	cvt.u16.u32 	%r9, %r4;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r9;
	cvt.ftz.f32.f16	%f1, %b1; }
	cvt.u16.u32 	%r10, %r6;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r10;
	cvt.ftz.f32.f16	%f2, %b1; }
	cvt.u16.u32 	%r11, %r8;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r11;
	cvt.ftz.f32.f16	%f3, %b1; }
	cvt.u16.u32 	%r12, %r2;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r12;
	cvt.ftz.f32.f16	%f4, %b1; }
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+0], %f4;
	mov.f32 	%f5, %f1;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+4], %f5;
	mov.f32 	%f6, %f2;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+8], %f6;
	mov.f32 	%f7, %f3;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+12], %f7;
	ret;
$LDWend__Z13Half4ToFloat47ushort4:
	} // _Z13Half4ToFloat47ushort4

	.visible .func (.param .align 8 .b8 __cudaretf__Z13Float4ToHalf46float4[8]) _Z13Float4ToHalf46float4 (.param .align 16 .b8 __cudaparmf1__Z13Float4ToHalf46float4[16])
	{
	.reg .u32 %r<13>;
	.reg .f32 %f<10>;
	.loc	20	95	0
$LDWbegin__Z13Float4ToHalf46float4:
	ld.param.f32 	%f1, [__cudaparmf1__Z13Float4ToHalf46float4+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z13Float4ToHalf46float4+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z13Float4ToHalf46float4+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z13Float4ToHalf46float4+12];
	mov.f32 	%f8, %f7;
	.loc	20	96	0
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r1, %b1; }
	cvt.u16.u32 	%r2, %r1;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r3, %b1; }
	cvt.u16.u32 	%r4, %r3;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f8;
	mov.b32		%r5, %b1; }
	cvt.u16.u32 	%r6, %r5;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f2;
	mov.b32		%r7, %b1; }
	cvt.u16.u32 	%r8, %r7;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+0], %r8;
	mov.s32 	%r9, %r2;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+2], %r9;
	mov.s32 	%r10, %r4;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+4], %r10;
	mov.s32 	%r11, %r6;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+6], %r11;
	ret;
$LDWend__Z13Float4ToHalf46float4:
	} // _Z13Float4ToHalf46float4

	.visible .func (.param .u32 __cudaretf__Z4Mix3RjS_S_) _Z4Mix3RjS_S_ (.param .u64 __cudaparmf1__Z4Mix3RjS_S_, .param .u64 __cudaparmf2__Z4Mix3RjS_S_, .param .u64 __cudaparmf3__Z4Mix3RjS_S_)
	{
	.reg .u32 %r<75>;
	.reg .u64 %rd<8>;
	.loc	20	138	0
$LDWbegin__Z4Mix3RjS_S_:
	ld.param.u64 	%rd1, [__cudaparmf1__Z4Mix3RjS_S_];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf2__Z4Mix3RjS_S_];
	mov.s64 	%rd4, %rd3;
	ld.param.u64 	%rd5, [__cudaparmf3__Z4Mix3RjS_S_];
	mov.s64 	%rd6, %rd5;
	.loc	20	139	0
	ld.u32 	%r1, [%rd2+0];
	ld.u32 	%r2, [%rd4+0];
	sub.u32 	%r3, %r1, %r2;
	st.u32 	[%rd2+0], %r3;
	ld.u32 	%r4, [%rd6+0];
	sub.u32 	%r5, %r3, %r4;
	st.u32 	[%rd2+0], %r5;
	ld.u32 	%r6, [%rd6+0];
	shr.u32 	%r7, %r6, 13;
	xor.b32 	%r8, %r5, %r7;
	st.u32 	[%rd2+0], %r8;
	.loc	20	140	0
	ld.u32 	%r9, [%rd4+0];
	ld.u32 	%r10, [%rd6+0];
	sub.u32 	%r11, %r9, %r10;
	st.u32 	[%rd4+0], %r11;
	ld.u32 	%r12, [%rd2+0];
	sub.u32 	%r13, %r11, %r12;
	st.u32 	[%rd4+0], %r13;
	ld.u32 	%r14, [%rd2+0];
	shl.b32 	%r15, %r14, 8;
	xor.b32 	%r16, %r13, %r15;
	st.u32 	[%rd4+0], %r16;
	.loc	20	141	0
	ld.u32 	%r17, [%rd6+0];
	ld.u32 	%r18, [%rd2+0];
	sub.u32 	%r19, %r17, %r18;
	st.u32 	[%rd6+0], %r19;
	ld.u32 	%r20, [%rd4+0];
	sub.u32 	%r21, %r19, %r20;
	st.u32 	[%rd6+0], %r21;
	ld.u32 	%r22, [%rd4+0];
	shr.u32 	%r23, %r22, 13;
	xor.b32 	%r24, %r21, %r23;
	st.u32 	[%rd6+0], %r24;
	.loc	20	142	0
	ld.u32 	%r25, [%rd2+0];
	ld.u32 	%r26, [%rd4+0];
	sub.u32 	%r27, %r25, %r26;
	st.u32 	[%rd2+0], %r27;
	ld.u32 	%r28, [%rd6+0];
	sub.u32 	%r29, %r27, %r28;
	st.u32 	[%rd2+0], %r29;
	ld.u32 	%r30, [%rd6+0];
	shr.u32 	%r31, %r30, 12;
	xor.b32 	%r32, %r29, %r31;
	st.u32 	[%rd2+0], %r32;
	.loc	20	143	0
	ld.u32 	%r33, [%rd4+0];
	ld.u32 	%r34, [%rd6+0];
	sub.u32 	%r35, %r33, %r34;
	st.u32 	[%rd4+0], %r35;
	ld.u32 	%r36, [%rd2+0];
	sub.u32 	%r37, %r35, %r36;
	st.u32 	[%rd4+0], %r37;
	ld.u32 	%r38, [%rd2+0];
	shl.b32 	%r39, %r38, 16;
	xor.b32 	%r40, %r37, %r39;
	st.u32 	[%rd4+0], %r40;
	.loc	20	144	0
	ld.u32 	%r41, [%rd6+0];
	ld.u32 	%r42, [%rd2+0];
	sub.u32 	%r43, %r41, %r42;
	st.u32 	[%rd6+0], %r43;
	ld.u32 	%r44, [%rd4+0];
	sub.u32 	%r45, %r43, %r44;
	st.u32 	[%rd6+0], %r45;
	ld.u32 	%r46, [%rd4+0];
	shr.u32 	%r47, %r46, 5;
	xor.b32 	%r48, %r45, %r47;
	st.u32 	[%rd6+0], %r48;
	.loc	20	145	0
	ld.u32 	%r49, [%rd2+0];
	ld.u32 	%r50, [%rd4+0];
	sub.u32 	%r51, %r49, %r50;
	st.u32 	[%rd2+0], %r51;
	ld.u32 	%r52, [%rd6+0];
	sub.u32 	%r53, %r51, %r52;
	st.u32 	[%rd2+0], %r53;
	ld.u32 	%r54, [%rd6+0];
	shr.u32 	%r55, %r54, 3;
	xor.b32 	%r56, %r53, %r55;
	st.u32 	[%rd2+0], %r56;
	.loc	20	146	0
	ld.u32 	%r57, [%rd4+0];
	ld.u32 	%r58, [%rd6+0];
	sub.u32 	%r59, %r57, %r58;
	st.u32 	[%rd4+0], %r59;
	ld.u32 	%r60, [%rd2+0];
	sub.u32 	%r61, %r59, %r60;
	st.u32 	[%rd4+0], %r61;
	ld.u32 	%r62, [%rd2+0];
	shl.b32 	%r63, %r62, 10;
	xor.b32 	%r64, %r61, %r63;
	st.u32 	[%rd4+0], %r64;
	.loc	20	147	0
	ld.u32 	%r65, [%rd6+0];
	ld.u32 	%r66, [%rd2+0];
	sub.u32 	%r67, %r65, %r66;
	st.u32 	[%rd6+0], %r67;
	ld.u32 	%r68, [%rd4+0];
	sub.u32 	%r69, %r67, %r68;
	st.u32 	[%rd6+0], %r69;
	ld.u32 	%r70, [%rd4+0];
	shr.u32 	%r71, %r70, 15;
	xor.b32 	%r72, %r69, %r71;
	st.u32 	[%rd6+0], %r72;
	.loc	20	148	0
	mov.s32 	%r73, %r72;
	st.param.u32 	[__cudaretf__Z4Mix3RjS_S_], %r73;
	ret;
$LDWend__Z4Mix3RjS_S_:
	} // _Z4Mix3RjS_S_

	.visible .func (.param .s32 __cudaretf__Z4Randj) _Z4Randj (.param .u32 __cudaparmf1__Z4Randj)
	{
	.reg .u32 %r<14>;
	.loc	20	152	0
$LDWbegin__Z4Randj:
	ld.param.u32 	%r1, [__cudaparmf1__Z4Randj];
	mov.s32 	%r2, %r1;
	.loc	20	163	0
	mul.lo.u32 	%r3, %r2, 1103515245;
	add.u32 	%r4, %r3, 12345;
	shr.u32 	%r5, %r4, 16;
	and.b32 	%r6, %r5, 255;
	shl.b32 	%r7, %r6, 7;
	mul.lo.u32 	%r8, %r2, -1029531031;
	sub.u32 	%r9, %r8, 740551042;
	shr.u32 	%r10, %r9, 16;
	and.b32 	%r11, %r10, 255;
	xor.b32 	%r12, %r7, %r11;
	st.param.s32 	[__cudaretf__Z4Randj], %r12;
	ret;
$LDWend__Z4Randj:
	} // _Z4Randj

	.visible .func (.param .s32 __cudaretf__Z6Rand2Djjj) _Z6Rand2Djjj (.param .u32 __cudaparmf1__Z6Rand2Djjj, .param .u32 __cudaparmf2__Z6Rand2Djjj, .param .u32 __cudaparmf3__Z6Rand2Djjj)
	{
	.reg .u32 %r<54>;
	.loc	20	169	0
$LDWbegin__Z6Rand2Djjj:
	ld.param.u32 	%r1, [__cudaparmf1__Z6Rand2Djjj];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf2__Z6Rand2Djjj];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf3__Z6Rand2Djjj];
	mov.s32 	%r6, %r5;
	.loc	20	139	0
	sub.u32 	%r7, %r2, %r4;
	sub.u32 	%r8, %r7, %r6;
	shr.u32 	%r9, %r6, 13;
	xor.b32 	%r10, %r8, %r9;
	.loc	20	140	0
	sub.u32 	%r11, %r4, %r6;
	sub.u32 	%r12, %r11, %r10;
	shl.b32 	%r13, %r10, 8;
	xor.b32 	%r14, %r12, %r13;
	.loc	20	141	0
	sub.u32 	%r15, %r6, %r10;
	sub.u32 	%r16, %r15, %r14;
	shr.u32 	%r17, %r14, 13;
	xor.b32 	%r18, %r16, %r17;
	.loc	20	142	0
	sub.u32 	%r19, %r10, %r14;
	sub.u32 	%r20, %r19, %r18;
	shr.u32 	%r21, %r18, 12;
	xor.b32 	%r22, %r20, %r21;
	.loc	20	143	0
	sub.u32 	%r23, %r14, %r18;
	sub.u32 	%r24, %r23, %r22;
	shl.b32 	%r25, %r22, 16;
	xor.b32 	%r26, %r24, %r25;
	.loc	20	144	0
	sub.u32 	%r27, %r18, %r22;
	sub.u32 	%r28, %r27, %r26;
	shr.u32 	%r29, %r26, 5;
	xor.b32 	%r30, %r28, %r29;
	.loc	20	145	0
	sub.u32 	%r31, %r22, %r26;
	sub.u32 	%r32, %r31, %r30;
	shr.u32 	%r33, %r30, 3;
	xor.b32 	%r34, %r32, %r33;
	.loc	20	146	0
	sub.u32 	%r35, %r26, %r30;
	sub.u32 	%r36, %r35, %r34;
	shl.b32 	%r37, %r34, 10;
	xor.b32 	%r38, %r36, %r37;
	.loc	20	147	0
	sub.u32 	%r39, %r30, %r34;
	sub.u32 	%r40, %r39, %r38;
	shr.u32 	%r41, %r38, 15;
	xor.b32 	%r42, %r40, %r41;
	.loc	20	170	0
	mul.lo.u32 	%r43, %r42, 1103515245;
	add.u32 	%r44, %r43, 12345;
	shr.u32 	%r45, %r44, 16;
	and.b32 	%r46, %r45, 255;
	shl.b32 	%r47, %r46, 7;
	mul.lo.u32 	%r48, %r42, -1029531031;
	sub.u32 	%r49, %r48, 740551042;
	shr.u32 	%r50, %r49, 16;
	and.b32 	%r51, %r50, 255;
	xor.b32 	%r52, %r47, %r51;
	st.param.s32 	[__cudaretf__Z6Rand2Djjj], %r52;
	ret;
$LDWend__Z6Rand2Djjj:
	} // _Z6Rand2Djjj

	.visible .func (.param .s32 __cudaretf__Z6Rand2Dj) _Z6Rand2Dj (.param .u32 __cudaparmf1__Z6Rand2Dj)
	{
	.reg .u32 %r<60>;
	.loc	20	175	0
$LDWbegin__Z6Rand2Dj:
	ld.param.u32 	%r1, [__cudaparmf1__Z6Rand2Dj];
	mov.s32 	%r2, %r1;
	.loc	20	143	0
	cvt.s32.u32 	%r3, %ctaid.y;
	cvt.s32.u32 	%r4, %ntid.y;
	mul.lo.s32 	%r5, %r3, %r4;
	cvt.s32.u32 	%r6, %ctaid.x;
	cvt.s32.u32 	%r7, %ntid.x;
	mul.lo.s32 	%r8, %r6, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r5, %r9;
	mov.u32 	%r11, %tid.x;
	add.u32 	%r12, %r8, %r11;
	shr.u32 	%r13, %r10, 13;
	sub.u32 	%r14, %r2, %r12;
	sub.u32 	%r15, %r12, %r10;
	sub.u32 	%r16, %r14, %r10;
	xor.b32 	%r17, %r13, %r16;
	shl.b32 	%r18, %r17, 8;
	sub.u32 	%r19, %r15, %r17;
	sub.u32 	%r20, %r10, %r17;
	xor.b32 	%r21, %r18, %r19;
	shr.u32 	%r22, %r21, 13;
	sub.u32 	%r23, %r20, %r21;
	sub.u32 	%r24, %r17, %r21;
	xor.b32 	%r25, %r22, %r23;
	shr.u32 	%r26, %r25, 12;
	sub.u32 	%r27, %r24, %r25;
	xor.b32 	%r28, %r26, %r27;
	sub.u32 	%r29, %r21, %r25;
	sub.u32 	%r30, %r29, %r28;
	shl.b32 	%r31, %r28, 16;
	xor.b32 	%r32, %r30, %r31;
	.loc	20	144	0
	sub.u32 	%r33, %r25, %r28;
	sub.u32 	%r34, %r33, %r32;
	shr.u32 	%r35, %r32, 5;
	xor.b32 	%r36, %r34, %r35;
	.loc	20	145	0
	sub.u32 	%r37, %r28, %r32;
	sub.u32 	%r38, %r37, %r36;
	shr.u32 	%r39, %r36, 3;
	xor.b32 	%r40, %r38, %r39;
	.loc	20	146	0
	sub.u32 	%r41, %r32, %r36;
	sub.u32 	%r42, %r41, %r40;
	shl.b32 	%r43, %r40, 10;
	xor.b32 	%r44, %r42, %r43;
	.loc	20	147	0
	sub.u32 	%r45, %r36, %r40;
	sub.u32 	%r46, %r45, %r44;
	shr.u32 	%r47, %r44, 15;
	xor.b32 	%r48, %r46, %r47;
	.loc	20	176	0
	mul.lo.u32 	%r49, %r48, 1103515245;
	add.u32 	%r50, %r49, 12345;
	shr.u32 	%r51, %r50, 16;
	and.b32 	%r52, %r51, 255;
	shl.b32 	%r53, %r52, 7;
	mul.lo.u32 	%r54, %r48, -1029531031;
	sub.u32 	%r55, %r54, 740551042;
	shr.u32 	%r56, %r55, 16;
	and.b32 	%r57, %r56, 255;
	xor.b32 	%r58, %r53, %r57;
	st.param.s32 	[__cudaretf__Z6Rand2Dj], %r58;
	ret;
$LDWend__Z6Rand2Dj:
	} // _Z6Rand2Dj

	.visible .func (.param .align 8 .b8 __cudaretf__Z6Read2DI7ushort4ET_PKS1_iii[8]) _Z6Read2DI7ushort4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI7ushort4ET_PKS1_iii)
	{
	.reg .u32 %r<14>;
	.reg .u64 %rd<7>;
	.loc	20	114	0
$LDWbegin__Z6Read2DI7ushort4ET_PKS1_iii:
	ld.param.u64 	%rd1, [__cudaparmf1__Z6Read2DI7ushort4ET_PKS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf2__Z6Read2DI7ushort4ET_PKS1_iii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf3__Z6Read2DI7ushort4ET_PKS1_iii];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf4__Z6Read2DI7ushort4ET_PKS1_iii];
	mov.s32 	%r6, %r5;
	.loc	20	115	0
	mul.lo.s32 	%r7, %r2, %r6;
	add.s32 	%r8, %r4, %r7;
	cvt.s64.s32 	%rd3, %r8;
	mul.wide.s32 	%rd4, %r8, 8;
	add.u64 	%rd5, %rd2, %rd4;
	ld.v4.u16 	{%r9,%r10,%r11,%r12}, [%rd5+0];
	st.param.u16 	[__cudaretf__Z6Read2DI7ushort4ET_PKS1_iii+0], %r9;
	st.param.u16 	[__cudaretf__Z6Read2DI7ushort4ET_PKS1_iii+2], %r10;
	st.param.u16 	[__cudaretf__Z6Read2DI7ushort4ET_PKS1_iii+4], %r11;
	st.param.u16 	[__cudaretf__Z6Read2DI7ushort4ET_PKS1_iii+6], %r12;
	ret;
$LDWend__Z6Read2DI7ushort4ET_PKS1_iii:
	} // _Z6Read2DI7ushort4ET_PKS1_iii

	.visible .func (.param .align 16 .b8 __cudaretf__Z6Read2DI6float4ET_PKS1_iii[16]) _Z6Read2DI6float4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI6float4ET_PKS1_iii)
	{
	.reg .u32 %r<10>;
	.reg .u64 %rd<7>;
	.reg .f32 %f<6>;
	.loc	20	114	0
$LDWbegin__Z6Read2DI6float4ET_PKS1_iii:
	ld.param.u64 	%rd1, [__cudaparmf1__Z6Read2DI6float4ET_PKS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf2__Z6Read2DI6float4ET_PKS1_iii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf3__Z6Read2DI6float4ET_PKS1_iii];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf4__Z6Read2DI6float4ET_PKS1_iii];
	mov.s32 	%r6, %r5;
	.loc	20	115	0
	mul.lo.s32 	%r7, %r2, %r6;
	add.s32 	%r8, %r4, %r7;
	cvt.s64.s32 	%rd3, %r8;
	mul.wide.s32 	%rd4, %r8, 16;
	add.u64 	%rd5, %rd2, %rd4;
	ld.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd5+0];
	st.param.f32 	[__cudaretf__Z6Read2DI6float4ET_PKS1_iii+0], %f1;
	st.param.f32 	[__cudaretf__Z6Read2DI6float4ET_PKS1_iii+4], %f2;
	st.param.f32 	[__cudaretf__Z6Read2DI6float4ET_PKS1_iii+8], %f3;
	st.param.f32 	[__cudaretf__Z6Read2DI6float4ET_PKS1_iii+12], %f4;
	ret;
$LDWend__Z6Read2DI6float4ET_PKS1_iii:
	} // _Z6Read2DI6float4ET_PKS1_iii

	.visible .func _Z7Write2DI7ushort4EvT_PS1_iii (.param .align 8 .b8 __cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii[8], .param .u64 __cudaparmf2__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI7ushort4EvT_PS1_iii)
	{
	.reg .u32 %r<18>;
	.reg .u64 %rd<7>;
	.loc	20	125	0
$LDWbegin__Z7Write2DI7ushort4EvT_PS1_iii:
	ld.param.u16 	%r1, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+0];
	mov.s32 	%r2, %r1;
	ld.param.u16 	%r3, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+2];
	mov.s32 	%r4, %r3;
	ld.param.u16 	%r5, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+4];
	mov.s32 	%r6, %r5;
	ld.param.u16 	%r7, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+6];
	mov.s32 	%r8, %r7;
	ld.param.u64 	%rd1, [__cudaparmf2__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r9, [__cudaparmf3__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s32 	%r10, %r9;
	ld.param.u32 	%r11, [__cudaparmf4__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s32 	%r12, %r11;
	ld.param.u32 	%r13, [__cudaparmf5__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s32 	%r14, %r13;
	.loc	20	126	0
	mul.lo.s32 	%r15, %r10, %r14;
	add.s32 	%r16, %r12, %r15;
	cvt.s64.s32 	%rd3, %r16;
	mul.wide.s32 	%rd4, %r16, 8;
	add.u64 	%rd5, %rd2, %rd4;
	st.v4.u16 	[%rd5+0], {%r2,%r4,%r6,%r8};
	.loc	20	127	0
	ret;
$LDWend__Z7Write2DI7ushort4EvT_PS1_iii:
	} // _Z7Write2DI7ushort4EvT_PS1_iii

	.visible .func _Z7Write2DI6float4EvT_PS1_iii (.param .align 16 .b8 __cudaparmf1__Z7Write2DI6float4EvT_PS1_iii[16], .param .u64 __cudaparmf2__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI6float4EvT_PS1_iii)
	{
	.reg .u32 %r<10>;
	.reg .u64 %rd<7>;
	.reg .f32 %f<10>;
	.loc	20	125	0
$LDWbegin__Z7Write2DI6float4EvT_PS1_iii:
	ld.param.f32 	%f1, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+12];
	mov.f32 	%f8, %f7;
	ld.param.u64 	%rd1, [__cudaparmf2__Z7Write2DI6float4EvT_PS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf3__Z7Write2DI6float4EvT_PS1_iii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf4__Z7Write2DI6float4EvT_PS1_iii];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf5__Z7Write2DI6float4EvT_PS1_iii];
	mov.s32 	%r6, %r5;
	.loc	20	126	0
	mul.lo.s32 	%r7, %r2, %r6;
	add.s32 	%r8, %r4, %r7;
	cvt.s64.s32 	%rd3, %r8;
	mul.wide.s32 	%rd4, %r8, 16;
	add.u64 	%rd5, %rd2, %rd4;
	st.v4.f32 	[%rd5+0], {%f2,%f4,%f6,%f8};
	.loc	20	127	0
	ret;
$LDWend__Z7Write2DI6float4EvT_PS1_iii:
	} // _Z7Write2DI6float4EvT_PS1_iii

	.visible .func (.param .align 16 .b8 __cudaretf__Z18UnpremultiplyPixel8PixelRGB[16]) _Z18UnpremultiplyPixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z18UnpremultiplyPixel8PixelRGB[16])
	{
	.reg .f32 %f<23>;
	.reg .pred %p<3>;
	.loc	4	206	0
$LDWbegin__Z18UnpremultiplyPixel8PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+12];
	mov.f32 	%f8, %f7;
	.loc	4	208	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f9, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f12, %f13;
	@%p1 bra 	$Lt_13_1282;
	.loc	4	213	0
	rcp.approx.ftz.f32 	%f14, %f9;
	mul.ftz.f32 	%f15, %f14, %f6;
	.loc	4	214	0
	mul.ftz.f32 	%f16, %f14, %f4;
	.loc	4	215	0
	mul.ftz.f32 	%f17, %f14, %f2;
	bra.uni 	$Lt_13_1026;
$Lt_13_1282:
	.loc	4	219	0
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	mov.f32 	%f17, 0f00000000;    	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
$Lt_13_1026:
	.loc	4	224	0
	mov.f32 	%f18, %f17;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+0], %f18;
	mov.f32 	%f19, %f16;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+4], %f19;
	mov.f32 	%f20, %f15;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+8], %f20;
	mov.f32 	%f21, %f10;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+12], %f21;
	ret;
$LDWend__Z18UnpremultiplyPixel8PixelRGB:
	} // _Z18UnpremultiplyPixel8PixelRGB

	.visible .func (.param .f32 __cudaretf__Z13ToLinearColorf) _Z13ToLinearColorf (.param .f32 __cudaparmf1__Z13ToLinearColorf)
	{
	.reg .f32 %f<15>;
	.reg .pred %p<3>;
	.loc	4	231	0
$LDWbegin__Z13ToLinearColorf:
	ld.param.f32 	%f1, [__cudaparmf1__Z13ToLinearColorf];
	mov.f32 	%f2, %f1;
	mov.f32 	%f3, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f3;
	@!%p1 bra 	$Lt_14_1026;
	.loc	4	234	0
	neg.ftz.f32 	%f4, %f2;
	lg2.approx.ftz.f32 	%f5, %f4;
	mov.f32 	%f6, 0f400ccccd;     	// 2.2
	mul.ftz.f32 	%f7, %f5, %f6;
	ex2.approx.ftz.f32 	%f8, %f7;
	neg.ftz.f32 	%f9, %f8;
	bra.uni 	$LBB4__Z13ToLinearColorf;
$Lt_14_1026:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f10, %f2;
	mov.f32 	%f11, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f12, %f10, %f11;
	ex2.approx.ftz.f32 	%f9, %f12;
$LBB4__Z13ToLinearColorf:
	mov.f32 	%f13, %f9;
	st.param.f32 	[__cudaretf__Z13ToLinearColorf], %f13;
	ret;
$LDWend__Z13ToLinearColorf:
	} // _Z13ToLinearColorf

	.visible .func (.param .f32 __cudaretf__Z15FromLinearColorf) _Z15FromLinearColorf (.param .f32 __cudaparmf1__Z15FromLinearColorf)
	{
	.reg .f32 %f<15>;
	.reg .pred %p<3>;
	.loc	4	239	0
$LDWbegin__Z15FromLinearColorf:
	ld.param.f32 	%f1, [__cudaparmf1__Z15FromLinearColorf];
	mov.f32 	%f2, %f1;
	mov.f32 	%f3, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f3;
	@!%p1 bra 	$Lt_15_1026;
	.loc	4	242	0
	neg.ftz.f32 	%f4, %f2;
	lg2.approx.ftz.f32 	%f5, %f4;
	mov.f32 	%f6, 0f3ee8ba2e;     	// 0.454545
	mul.ftz.f32 	%f7, %f5, %f6;
	ex2.approx.ftz.f32 	%f8, %f7;
	neg.ftz.f32 	%f9, %f8;
	bra.uni 	$LBB4__Z15FromLinearColorf;
$Lt_15_1026:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f10, %f2;
	mov.f32 	%f11, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f12, %f10, %f11;
	ex2.approx.ftz.f32 	%f9, %f12;
$LBB4__Z15FromLinearColorf:
	mov.f32 	%f13, %f9;
	st.param.f32 	[__cudaretf__Z15FromLinearColorf], %f13;
	ret;
$LDWend__Z15FromLinearColorf:
	} // _Z15FromLinearColorf

	.visible .func (.param .align 16 .b8 __cudaretf__Z25PremultiplyLinearizePixel8PixelRGB[16]) _Z25PremultiplyLinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB[16])
	{
	.reg .f32 %f<47>;
	.reg .pred %p<5>;
	.loc	4	252	0
$LDWbegin__Z25PremultiplyLinearizePixel8PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+12];
	mov.f32 	%f8, %f7;
	.loc	4	254	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	.loc	4	255	0
	mov.f32 	%f10, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f10;
	@!%p1 bra 	$Lt_16_4098;
	.loc	4	234	0
	neg.ftz.f32 	%f11, %f2;
	lg2.approx.ftz.f32 	%f12, %f11;
	mov.f32 	%f13, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f15, %f14;
	neg.ftz.f32 	%f16, %f15;
	bra.uni 	$LDWendi___log2f_193_5;
$Lt_16_4098:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f17, %f2;
	mov.f32 	%f18, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f16, %f19;
$LDWendi___log2f_193_5:
	.loc	4	256	0
	mov.f32 	%f20, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f4, %f20;
	@!%p2 bra 	$Lt_16_4610;
	.loc	4	234	0
	neg.ftz.f32 	%f21, %f4;
	lg2.approx.ftz.f32 	%f22, %f21;
	mov.f32 	%f23, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f25, %f24;
	neg.ftz.f32 	%f26, %f25;
	bra.uni 	$LDWendi___log2f_193_3;
$Lt_16_4610:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f27, %f4;
	mov.f32 	%f28, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f26, %f29;
$LDWendi___log2f_193_3:
	.loc	4	257	0
	mov.f32 	%f30, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f6, %f30;
	@!%p3 bra 	$Lt_16_5122;
	.loc	4	234	0
	neg.ftz.f32 	%f31, %f6;
	lg2.approx.ftz.f32 	%f32, %f31;
	mov.f32 	%f33, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f35, %f34;
	neg.ftz.f32 	%f36, %f35;
	bra.uni 	$LDWendi___log2f_193_1;
$Lt_16_5122:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f37, %f6;
	mov.f32 	%f38, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f39, %f37, %f38;
	ex2.approx.ftz.f32 	%f36, %f39;
$LDWendi___log2f_193_1:
	.loc	4	259	0
	mul.ftz.f32 	%f40, %f36, %f9;
	mul.ftz.f32 	%f41, %f26, %f9;
	mul.ftz.f32 	%f42, %f16, %f9;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+0], %f42;
	mov.f32 	%f43, %f41;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+4], %f43;
	mov.f32 	%f44, %f40;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+8], %f44;
	mov.f32 	%f45, %f9;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+12], %f45;
	ret;
$LDWend__Z25PremultiplyLinearizePixel8PixelRGB:
	} // _Z25PremultiplyLinearizePixel8PixelRGB

	.visible .func (.param .align 16 .b8 __cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16]) _Z29UnpremultiplyUnlinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16])
	{
	.reg .f32 %f<53>;
	.reg .pred %p<6>;
	.loc	4	263	0
$LDWbegin__Z29UnpremultiplyUnlinearizePixel8PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+12];
	mov.f32 	%f8, %f7;
	.loc	4	208	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f9, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f12, %f13;
	@%p1 bra 	$Lt_17_5122;
	.loc	4	213	0
	rcp.approx.ftz.f32 	%f14, %f9;
	mul.ftz.f32 	%f15, %f14, %f6;
	.loc	4	214	0
	mul.ftz.f32 	%f16, %f14, %f4;
	.loc	4	215	0
	mul.ftz.f32 	%f17, %f14, %f2;
	bra.uni 	$Lt_17_4866;
$Lt_17_5122:
	.loc	4	219	0
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	mov.f32 	%f17, 0f00000000;    	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
$Lt_17_4866:
	.loc	4	266	0
	mov.f32 	%f18, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f17, %f18;
	@!%p2 bra 	$Lt_17_5378;
	.loc	4	242	0
	neg.ftz.f32 	%f19, %f17;
	lg2.approx.ftz.f32 	%f20, %f19;
	mov.f32 	%f21, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f22, %f20, %f21;
	ex2.approx.ftz.f32 	%f23, %f22;
	neg.ftz.f32 	%f24, %f23;
	bra.uni 	$LDWendi___log2f_194_5;
$Lt_17_5378:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f25, %f17;
	mov.f32 	%f26, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f27, %f25, %f26;
	ex2.approx.ftz.f32 	%f24, %f27;
$LDWendi___log2f_194_5:
	.loc	4	267	0
	mov.f32 	%f28, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f16, %f28;
	@!%p3 bra 	$Lt_17_5890;
	.loc	4	242	0
	neg.ftz.f32 	%f29, %f16;
	lg2.approx.ftz.f32 	%f30, %f29;
	mov.f32 	%f31, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f32, %f30, %f31;
	ex2.approx.ftz.f32 	%f33, %f32;
	neg.ftz.f32 	%f34, %f33;
	bra.uni 	$LDWendi___log2f_194_3;
$Lt_17_5890:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f35, %f16;
	mov.f32 	%f36, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f37, %f35, %f36;
	ex2.approx.ftz.f32 	%f34, %f37;
$LDWendi___log2f_194_3:
	.loc	4	268	0
	mov.f32 	%f38, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f15, %f38;
	@!%p4 bra 	$Lt_17_6402;
	.loc	4	242	0
	neg.ftz.f32 	%f39, %f15;
	lg2.approx.ftz.f32 	%f40, %f39;
	mov.f32 	%f41, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f42, %f40, %f41;
	ex2.approx.ftz.f32 	%f43, %f42;
	neg.ftz.f32 	%f44, %f43;
	bra.uni 	$LDWendi___log2f_194_1;
$Lt_17_6402:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f45, %f15;
	mov.f32 	%f46, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f47, %f45, %f46;
	ex2.approx.ftz.f32 	%f44, %f47;
$LDWendi___log2f_194_1:
	.loc	4	269	0
	mov.f32 	%f48, %f24;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+0], %f48;
	mov.f32 	%f49, %f34;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+4], %f49;
	mov.f32 	%f50, %f44;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+8], %f50;
	mov.f32 	%f51, %f10;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+12], %f51;
	ret;
$LDWend__Z29UnpremultiplyUnlinearizePixel8PixelRGB:
	} // _Z29UnpremultiplyUnlinearizePixel8PixelRGB

	.visible .func (.param .align 16 .b8 __cudaretf__Z20PremultiplyLinearize6float4[16]) _Z20PremultiplyLinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z20PremultiplyLinearize6float4[16])
	{
	.reg .f32 %f<47>;
	.reg .pred %p<5>;
	.loc	4	277	0
$LDWbegin__Z20PremultiplyLinearize6float4:
	ld.param.f32 	%f1, [__cudaparmf1__Z20PremultiplyLinearize6float4+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z20PremultiplyLinearize6float4+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z20PremultiplyLinearize6float4+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z20PremultiplyLinearize6float4+12];
	mov.f32 	%f8, %f7;
	.loc	4	254	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	.loc	4	255	0
	mov.f32 	%f10, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f10;
	@!%p1 bra 	$Lt_18_4098;
	.loc	4	234	0
	neg.ftz.f32 	%f11, %f2;
	lg2.approx.ftz.f32 	%f12, %f11;
	mov.f32 	%f13, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f15, %f14;
	neg.ftz.f32 	%f16, %f15;
	bra.uni 	$LDWendi___log2f_195_5;
$Lt_18_4098:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f17, %f2;
	mov.f32 	%f18, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f16, %f19;
$LDWendi___log2f_195_5:
	.loc	4	256	0
	mov.f32 	%f20, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f4, %f20;
	@!%p2 bra 	$Lt_18_4610;
	.loc	4	234	0
	neg.ftz.f32 	%f21, %f4;
	lg2.approx.ftz.f32 	%f22, %f21;
	mov.f32 	%f23, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f25, %f24;
	neg.ftz.f32 	%f26, %f25;
	bra.uni 	$LDWendi___log2f_195_3;
$Lt_18_4610:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f27, %f4;
	mov.f32 	%f28, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f26, %f29;
$LDWendi___log2f_195_3:
	.loc	4	257	0
	mov.f32 	%f30, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f6, %f30;
	@!%p3 bra 	$Lt_18_5122;
	.loc	4	234	0
	neg.ftz.f32 	%f31, %f6;
	lg2.approx.ftz.f32 	%f32, %f31;
	mov.f32 	%f33, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f35, %f34;
	neg.ftz.f32 	%f36, %f35;
	bra.uni 	$LDWendi___log2f_195_1;
$Lt_18_5122:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f37, %f6;
	mov.f32 	%f38, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f39, %f37, %f38;
	ex2.approx.ftz.f32 	%f36, %f39;
$LDWendi___log2f_195_1:
	.loc	4	259	0
	mul.ftz.f32 	%f40, %f36, %f9;
	mul.ftz.f32 	%f41, %f26, %f9;
	.loc	4	278	0
	mul.ftz.f32 	%f42, %f16, %f9;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+0], %f42;
	mov.f32 	%f43, %f41;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+4], %f43;
	mov.f32 	%f44, %f40;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+8], %f44;
	mov.f32 	%f45, %f9;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+12], %f45;
	ret;
$LDWend__Z20PremultiplyLinearize6float4:
	} // _Z20PremultiplyLinearize6float4

	.visible .func (.param .align 16 .b8 __cudaretf__Z24UnpremultiplyUnlinearize6float4[16]) _Z24UnpremultiplyUnlinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z24UnpremultiplyUnlinearize6float4[16])
	{
	.reg .f32 %f<53>;
	.reg .pred %p<6>;
	.loc	4	284	0
$LDWbegin__Z24UnpremultiplyUnlinearize6float4:
	ld.param.f32 	%f1, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+12];
	mov.f32 	%f8, %f7;
	.loc	4	208	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f9, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f12, %f13;
	@%p1 bra 	$Lt_19_5122;
	.loc	4	213	0
	rcp.approx.ftz.f32 	%f14, %f9;
	mul.ftz.f32 	%f15, %f14, %f6;
	.loc	4	214	0
	mul.ftz.f32 	%f16, %f14, %f4;
	.loc	4	215	0
	mul.ftz.f32 	%f17, %f14, %f2;
	bra.uni 	$Lt_19_4866;
$Lt_19_5122:
	.loc	4	219	0
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	mov.f32 	%f17, 0f00000000;    	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
$Lt_19_4866:
	.loc	4	266	0
	mov.f32 	%f18, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f17, %f18;
	@!%p2 bra 	$Lt_19_5378;
	.loc	4	242	0
	neg.ftz.f32 	%f19, %f17;
	lg2.approx.ftz.f32 	%f20, %f19;
	mov.f32 	%f21, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f22, %f20, %f21;
	ex2.approx.ftz.f32 	%f23, %f22;
	neg.ftz.f32 	%f24, %f23;
	bra.uni 	$LDWendi___log2f_196_5;
$Lt_19_5378:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f25, %f17;
	mov.f32 	%f26, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f27, %f25, %f26;
	ex2.approx.ftz.f32 	%f24, %f27;
$LDWendi___log2f_196_5:
	.loc	4	267	0
	mov.f32 	%f28, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f16, %f28;
	@!%p3 bra 	$Lt_19_5890;
	.loc	4	242	0
	neg.ftz.f32 	%f29, %f16;
	lg2.approx.ftz.f32 	%f30, %f29;
	mov.f32 	%f31, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f32, %f30, %f31;
	ex2.approx.ftz.f32 	%f33, %f32;
	neg.ftz.f32 	%f34, %f33;
	bra.uni 	$LDWendi___log2f_196_3;
$Lt_19_5890:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f35, %f16;
	mov.f32 	%f36, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f37, %f35, %f36;
	ex2.approx.ftz.f32 	%f34, %f37;
$LDWendi___log2f_196_3:
	.loc	4	268	0
	mov.f32 	%f38, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f15, %f38;
	@!%p4 bra 	$Lt_19_6402;
	.loc	4	242	0
	neg.ftz.f32 	%f39, %f15;
	lg2.approx.ftz.f32 	%f40, %f39;
	mov.f32 	%f41, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f42, %f40, %f41;
	ex2.approx.ftz.f32 	%f43, %f42;
	neg.ftz.f32 	%f44, %f43;
	bra.uni 	$LDWendi___log2f_196_1;
$Lt_19_6402:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f45, %f15;
	mov.f32 	%f46, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f47, %f45, %f46;
	ex2.approx.ftz.f32 	%f44, %f47;
$LDWendi___log2f_196_1:
	.loc	4	285	0
	mov.f32 	%f48, %f24;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+0], %f48;
	mov.f32 	%f49, %f34;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+4], %f49;
	mov.f32 	%f50, %f44;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+8], %f50;
	mov.f32 	%f51, %f10;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+12], %f51;
	ret;
$LDWend__Z24UnpremultiplyUnlinearize6float4:
	} // _Z24UnpremultiplyUnlinearize6float4

	.visible .func _Z11AlphaAdjustR8PixelRGBRiS1_iiRK21AlphaAdjustParameters (.param .u64 __cudaparmf1__Z11AlphaAdjustR8PixelRGBRiS1_iiRK21AlphaAdjustParameters, .param .u64 __cudaparmf2__Z11AlphaAdjustR8PixelRGBRiS1_iiRK21AlphaAdjustParameters, .param .u64 __cudaparmf3__Z11AlphaAdjustR8PixelRGBRiS1_iiRK21AlphaAdjustParameters, .param .s32 __cudaparmf4__Z11AlphaAdjustR8PixelRGBRiS1_iiRK21AlphaAdjustParameters, .param .s32 __cudaparmf5__Z11AlphaAdjustR8PixelRGBRiS1_iiRK21AlphaAdjustParameters, .param .u64 __cudaparmf6__Z11AlphaAdjustR8PixelRGBRiS1_iiRK21AlphaAdjustParameters)
	{
	.reg .u64 %rd<6>;
	.reg .f32 %f<23>;
	.reg .pred %p<7>;
	.loc	21	41	0
$LDWbegin__Z11AlphaAdjustR8PixelRGBRiS1_iiRK21AlphaAdjustParameters:
	ld.param.u64 	%rd1, [__cudaparmf1__Z11AlphaAdjustR8PixelRGBRiS1_iiRK21AlphaAdjustParameters];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf6__Z11AlphaAdjustR8PixelRGBRiS1_iiRK21AlphaAdjustParameters];
	mov.s64 	%rd4, %rd3;
	.loc	21	42	0
	ld.f32 	%f1, [%rd2+12];
	cvt.ftz.sat.f32.f32 	%f2, %f1;
	st.f32 	[%rd2+12], %f2;
	ld.f32 	%f3, [%rd4+4];
	mov.f32 	%f4, 0f3f800000;     	// 1
	setp.eq.ftz.f32 	%p1, %f3, %f4;
	ld.f32 	%f5, [%rd4+12];
	mov.f32 	%f6, 0f3f800000;     	// 1
	setp.eq.ftz.f32 	%p2, %f5, %f6;
	@!%p2 bra 	$Lt_20_5122;
	ld.f32 	%f7, [%rd4+0];
	@!%p1 bra 	$Lt_20_5634;
	.loc	21	47	0
	mov.f32 	%f2, %f7;
	st.f32 	[%rd2+12], %f7;
	bra.uni 	$Lt_20_5378;
$Lt_20_5634:
	.loc	21	51	0
	mul.ftz.f32 	%f2, %f2, %f7;
	st.f32 	[%rd2+12], %f2;
$Lt_20_5378:
	ld.f32 	%f8, [%rd4+8];
	mov.f32 	%f9, 0f3f800000;     	// 1
	setp.eq.ftz.f32 	%p3, %f8, %f9;
	@!%p3 bra 	$Lt_20_5890;
	.loc	21	55	0
	mov.f32 	%f10, 0f3f800000;    	// 1
	sub.ftz.f32 	%f2, %f10, %f2;
	st.f32 	[%rd2+12], %f2;
$Lt_20_5890:
	.loc	21	57	0
	mov.f32 	%f11, %f2;
	st.v2.f32 	[%rd2+0], {%f11,%f11};
	st.f32 	[%rd2+8], %f11;
	bra.uni 	$Lt_20_6402;
$Lt_20_5122:
	@!%p1 bra 	$Lt_20_6658;
	.loc	21	61	0
	ld.f32 	%f12, [%rd4+0];
	st.f32 	[%rd2+12], %f12;
	ld.f32 	%f13, [%rd4+8];
	mov.f32 	%f14, 0f3f800000;    	// 1
	setp.eq.ftz.f32 	%p4, %f13, %f14;
	@!%p4 bra 	$Lt_20_6402;
	.loc	21	64	0
	mov.f32 	%f15, 0f3f800000;    	// 1
	sub.ftz.f32 	%f16, %f15, %f12;
	st.f32 	[%rd2+12], %f16;
	bra.uni 	$Lt_20_6402;
$Lt_20_6658:
	ld.f32 	%f17, [%rd4+8];
	mov.f32 	%f18, 0f3f800000;    	// 1
	setp.eq.ftz.f32 	%p5, %f17, %f18;
	@!%p5 bra 	$Lt_20_7426;
	.loc	21	71	0
	mov.f32 	%f19, 0f3f800000;    	// 1
	sub.ftz.f32 	%f2, %f19, %f2;
	st.f32 	[%rd2+12], %f2;
$Lt_20_7426:
	.loc	21	73	0
	ld.f32 	%f20, [%rd4+0];
	mul.ftz.f32 	%f21, %f20, %f2;
	st.f32 	[%rd2+12], %f21;
$Lt_20_6402:
$Lt_20_4866:
	.loc	21	75	0
	ret;
$LDWend__Z11AlphaAdjustR8PixelRGBRiS1_iiRK21AlphaAdjustParameters:
	} // _Z11AlphaAdjustR8PixelRGBRiS1_iiRK21AlphaAdjustParameters

	.visible .func (.param .align 16 .b8 __cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_[16]) _Z18SwapComponentOrderI6float4ET_RKS1_ (.param .u64 __cudaparmf1__Z18SwapComponentOrderI6float4ET_RKS1_)
	{
	.reg .u64 %rd<4>;
	.reg .f32 %f<6>;
	.loc	22	264	0
$LDWbegin__Z18SwapComponentOrderI6float4ET_RKS1_:
	ld.param.u64 	%rd1, [__cudaparmf1__Z18SwapComponentOrderI6float4ET_RKS1_];
	mov.s64 	%rd2, %rd1;
	.loc	22	270	0
	ld.f32 	%f1, [%rd2+12];
	st.param.f32 	[__cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_+0], %f1;
	ld.f32 	%f2, [%rd2+8];
	st.param.f32 	[__cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_+4], %f2;
	ld.f32 	%f3, [%rd2+4];
	st.param.f32 	[__cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_+8], %f3;
	ld.f32 	%f4, [%rd2+0];
	st.param.f32 	[__cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_+12], %f4;
	ret;
$LDWend__Z18SwapComponentOrderI6float4ET_RKS1_:
	} // _Z18SwapComponentOrderI6float4ET_RKS1_
	.const .align 4 .b8 kRGB32f_To_601YPbPr[36] = {135,22,153,62,162,69,22,63,213,120,233,61,33,201,44,190,111,155,169,190,0,0,0,63,0,0,0,63,70,94,214,190,232,134,166,189};
	.const .align 4 .b8 k601YPbPr_To_RGB32f[36] = {0,0,128,63,0,0,0,0,188,116,179,63,0,0,128,63,152,50,176,190,158,209,54,191,0,0,128,63,229,208,226,63,0,0,0,0};

	.visible .func _Z13BlackAndWhiteR8PixelRGBRiS1_ii (.param .u64 __cudaparmf1__Z13BlackAndWhiteR8PixelRGBRiS1_ii, .param .u64 __cudaparmf2__Z13BlackAndWhiteR8PixelRGBRiS1_ii, .param .u64 __cudaparmf3__Z13BlackAndWhiteR8PixelRGBRiS1_ii, .param .s32 __cudaparmf4__Z13BlackAndWhiteR8PixelRGBRiS1_ii, .param .s32 __cudaparmf5__Z13BlackAndWhiteR8PixelRGBRiS1_ii)
	{
	.reg .u64 %rd<4>;
	.reg .f32 %f<18>;
	.loc	23	40	0
$LDWbegin__Z13BlackAndWhiteR8PixelRGBRiS1_ii:
	ld.param.u64 	%rd1, [__cudaparmf1__Z13BlackAndWhiteR8PixelRGBRiS1_ii];
	mov.s64 	%rd2, %rd1;
	ld.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd2+0];
	.loc	23	44	0
	ld.const.f32 	%f5, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f6, %f5, %f2;
	ld.const.f32 	%f7, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f8, %f7, %f3, %f6;
	ld.const.f32 	%f9, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f10, %f9, %f1, %f8;
	ld.const.f32 	%f11, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f12, %f11, %f10;
	ld.const.f32 	%f13, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f14, %f13, %f10;
	ld.const.f32 	%f15, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f16, %f15, %f10;
	st.v4.f32 	[%rd2+0], {%f12,%f14,%f16,%f4};
	.loc	23	45	0
	ret;
$LDWend__Z13BlackAndWhiteR8PixelRGBRiS1_ii:
	} // _Z13BlackAndWhiteR8PixelRGBRiS1_ii

	.visible .func _Z21BrightnessAndContrastR8PixelRGBRiS1_iiRK31BrightnessAndContrastParameters (.param .u64 __cudaparmf1__Z21BrightnessAndContrastR8PixelRGBRiS1_iiRK31BrightnessAndContrastParameters, .param .u64 __cudaparmf2__Z21BrightnessAndContrastR8PixelRGBRiS1_iiRK31BrightnessAndContrastParameters, .param .u64 __cudaparmf3__Z21BrightnessAndContrastR8PixelRGBRiS1_iiRK31BrightnessAndContrastParameters, .param .s32 __cudaparmf4__Z21BrightnessAndContrastR8PixelRGBRiS1_iiRK31BrightnessAndContrastParameters, .param .s32 __cudaparmf5__Z21BrightnessAndContrastR8PixelRGBRiS1_iiRK31BrightnessAndContrastParameters, .param .u64 __cudaparmf6__Z21BrightnessAndContrastR8PixelRGBRiS1_iiRK31BrightnessAndContrastParameters)
	{
	.reg .u64 %rd<6>;
	.reg .f32 %f<29>;
	.reg .pred %p<6>;
	.loc	24	41	0
$LDWbegin__Z21BrightnessAndContrastR8PixelRGBRiS1_iiRK31BrightnessAndContrastParameters:
	ld.param.u64 	%rd1, [__cudaparmf1__Z21BrightnessAndContrastR8PixelRGBRiS1_iiRK31BrightnessAndContrastParameters];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf6__Z21BrightnessAndContrastR8PixelRGBRiS1_iiRK31BrightnessAndContrastParameters];
	mov.s64 	%rd4, %rd3;
	ld.f32 	%f1, [%rd2+8];
	ld.f32 	%f2, [%rd4+8];
	ld.f32 	%f3, [%rd4+0];
	mov.f32 	%f4, 0f00000000;     	// 0
	setp.neu.ftz.f32 	%p1, %f3, %f4;
	@!%p1 bra 	$Lt_23_3586;
	.loc	24	44	0
	ld.f32 	%f5, [%rd4+16];
	ld.f32 	%f6, [%rd4+4];
	sub.ftz.f32 	%f7, %f1, %f2;
	fma.rn.ftz.f32 	%f8, %f6, %f7, %f5;
	st.f32 	[%rd2+8], %f8;
	.loc	24	45	0
	ld.f32 	%f9, [%rd4+16];
	ld.f32 	%f10, [%rd4+4];
	ld.v2.f32 	{%f11,%f12}, [%rd2+0];
	ld.f32 	%f13, [%rd4+8];
	sub.ftz.f32 	%f14, %f12, %f13;
	fma.rn.ftz.f32 	%f15, %f10, %f14, %f9;
	.loc	24	46	0
	ld.f32 	%f16, [%rd4+16];
	ld.f32 	%f17, [%rd4+4];
	ld.f32 	%f18, [%rd4+8];
	sub.ftz.f32 	%f19, %f11, %f18;
	fma.rn.ftz.f32 	%f20, %f17, %f19, %f16;
	st.v2.f32 	[%rd2+0], {%f20,%f15};
	bra.uni 	$Lt_23_3330;
$Lt_23_3586:
	setp.gt.ftz.f32 	%p2, %f1, %f2;
	@!%p2 bra 	$Lt_23_4098;
	.loc	24	50	0
	ld.f32 	%f21, [%rd4+20];
	bra.uni 	$Lt_23_3842;
$Lt_23_4098:
	ld.f32 	%f21, [%rd4+16];
$Lt_23_3842:
	st.f32 	[%rd2+8], %f21;
	ld.f32 	%f22, [%rd2+4];
	ld.f32 	%f23, [%rd4+8];
	setp.gt.ftz.f32 	%p3, %f22, %f23;
	@!%p3 bra 	$Lt_23_4610;
	.loc	24	51	0
	ld.f32 	%f24, [%rd4+20];
	bra.uni 	$Lt_23_4354;
$Lt_23_4610:
	ld.f32 	%f24, [%rd4+16];
$Lt_23_4354:
	st.f32 	[%rd2+4], %f24;
	ld.f32 	%f25, [%rd2+0];
	ld.f32 	%f26, [%rd4+8];
	setp.gt.ftz.f32 	%p4, %f25, %f26;
	@!%p4 bra 	$Lt_23_5122;
	.loc	24	52	0
	ld.f32 	%f27, [%rd4+20];
	bra.uni 	$Lt_23_4866;
$Lt_23_5122:
	ld.f32 	%f27, [%rd4+16];
$Lt_23_4866:
	st.f32 	[%rd2+0], %f27;
$Lt_23_3330:
	.loc	24	54	0
	ret;
$LDWend__Z21BrightnessAndContrastR8PixelRGBRiS1_iiRK31BrightnessAndContrastParameters:
	} // _Z21BrightnessAndContrastR8PixelRGBRiS1_iiRK31BrightnessAndContrastParameters

	.visible .func _Z15ColorBalanceRGBR8PixelRGBRiS1_iiRK25ColorBalanceRGBParameters (.param .u64 __cudaparmf1__Z15ColorBalanceRGBR8PixelRGBRiS1_iiRK25ColorBalanceRGBParameters, .param .u64 __cudaparmf2__Z15ColorBalanceRGBR8PixelRGBRiS1_iiRK25ColorBalanceRGBParameters, .param .u64 __cudaparmf3__Z15ColorBalanceRGBR8PixelRGBRiS1_iiRK25ColorBalanceRGBParameters, .param .s32 __cudaparmf4__Z15ColorBalanceRGBR8PixelRGBRiS1_iiRK25ColorBalanceRGBParameters, .param .s32 __cudaparmf5__Z15ColorBalanceRGBR8PixelRGBRiS1_iiRK25ColorBalanceRGBParameters, .param .u64 __cudaparmf6__Z15ColorBalanceRGBR8PixelRGBRiS1_iiRK25ColorBalanceRGBParameters)
	{
	.reg .u64 %rd<6>;
	.reg .f32 %f<12>;
	.loc	25	41	0
$LDWbegin__Z15ColorBalanceRGBR8PixelRGBRiS1_iiRK25ColorBalanceRGBParameters:
	ld.param.u64 	%rd1, [__cudaparmf1__Z15ColorBalanceRGBR8PixelRGBRiS1_iiRK25ColorBalanceRGBParameters];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf6__Z15ColorBalanceRGBR8PixelRGBRiS1_iiRK25ColorBalanceRGBParameters];
	mov.s64 	%rd4, %rd3;
	.loc	22	280	0
	ld.f32 	%f1, [%rd2+4];
	ld.f32 	%f2, [%rd4+4];
	mul.ftz.f32 	%f3, %f1, %f2;
	.loc	22	281	0
	ld.f32 	%f4, [%rd2+8];
	ld.f32 	%f5, [%rd4+8];
	mul.ftz.f32 	%f6, %f4, %f5;
	.loc	22	282	0
	ld.f32 	%f7, [%rd2+12];
	.loc	25	47	0
	ld.f32 	%f8, [%rd2+0];
	ld.f32 	%f9, [%rd4+0];
	mul.ftz.f32 	%f10, %f8, %f9;
	st.v4.f32 	[%rd2+0], {%f10,%f3,%f6,%f7};
	.loc	25	48	0
	ret;
$LDWend__Z15ColorBalanceRGBR8PixelRGBRiS1_iiRK25ColorBalanceRGBParameters:
	} // _Z15ColorBalanceRGBR8PixelRGBRiS1_iiRK25ColorBalanceRGBParameters

	.visible .func (.param .align 16 .b8 __cudaretf__Z18MultiplyComponentsI8PixelRGBET_RKS1_S3_[16]) _Z18MultiplyComponentsI8PixelRGBET_RKS1_S3_ (.param .u64 __cudaparmf1__Z18MultiplyComponentsI8PixelRGBET_RKS1_S3_, .param .u64 __cudaparmf2__Z18MultiplyComponentsI8PixelRGBET_RKS1_S3_)
	{
	.reg .u64 %rd<6>;
	.reg .f32 %f<14>;
	.loc	22	277	0
$LDWbegin__Z18MultiplyComponentsI8PixelRGBET_RKS1_S3_:
	ld.param.u64 	%rd1, [__cudaparmf1__Z18MultiplyComponentsI8PixelRGBET_RKS1_S3_];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf2__Z18MultiplyComponentsI8PixelRGBET_RKS1_S3_];
	mov.s64 	%rd4, %rd3;
	.loc	22	283	0
	ld.f32 	%f1, [%rd2+0];
	ld.f32 	%f2, [%rd4+0];
	mul.ftz.f32 	%f3, %f1, %f2;
	st.param.f32 	[__cudaretf__Z18MultiplyComponentsI8PixelRGBET_RKS1_S3_+0], %f3;
	ld.f32 	%f4, [%rd2+4];
	ld.f32 	%f5, [%rd4+4];
	mul.ftz.f32 	%f6, %f4, %f5;
	st.param.f32 	[__cudaretf__Z18MultiplyComponentsI8PixelRGBET_RKS1_S3_+4], %f6;
	ld.f32 	%f7, [%rd2+8];
	ld.f32 	%f8, [%rd4+8];
	mul.ftz.f32 	%f9, %f7, %f8;
	st.param.f32 	[__cudaretf__Z18MultiplyComponentsI8PixelRGBET_RKS1_S3_+8], %f9;
	ld.f32 	%f10, [%rd2+12];
	ld.f32 	%f11, [%rd4+12];
	mul.ftz.f32 	%f12, %f10, %f11;
	st.param.f32 	[__cudaretf__Z18MultiplyComponentsI8PixelRGBET_RKS1_S3_+12], %f12;
	ret;
$LDWend__Z18MultiplyComponentsI8PixelRGBET_RKS1_S3_:
	} // _Z18MultiplyComponentsI8PixelRGBET_RKS1_S3_

	.visible .func _Z9ColorPassR8PixelRGBRiS1_iiRK19ColorPassParameters (.param .u64 __cudaparmf1__Z9ColorPassR8PixelRGBRiS1_iiRK19ColorPassParameters, .param .u64 __cudaparmf2__Z9ColorPassR8PixelRGBRiS1_iiRK19ColorPassParameters, .param .u64 __cudaparmf3__Z9ColorPassR8PixelRGBRiS1_iiRK19ColorPassParameters, .param .s32 __cudaparmf4__Z9ColorPassR8PixelRGBRiS1_iiRK19ColorPassParameters, .param .s32 __cudaparmf5__Z9ColorPassR8PixelRGBRiS1_iiRK19ColorPassParameters, .param .u64 __cudaparmf6__Z9ColorPassR8PixelRGBRiS1_iiRK19ColorPassParameters)
	{
	.reg .u64 %rd<6>;
	.reg .f32 %f<51>;
	.reg .pred %p<5>;
	.loc	26	40	0
$LDWbegin__Z9ColorPassR8PixelRGBRiS1_iiRK19ColorPassParameters:
	ld.param.u64 	%rd1, [__cudaparmf1__Z9ColorPassR8PixelRGBRiS1_iiRK19ColorPassParameters];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf6__Z9ColorPassR8PixelRGBRiS1_iiRK19ColorPassParameters];
	mov.s64 	%rd4, %rd3;
	.loc	26	48	0
	ld.f32 	%f1, [%rd2+12];
	ld.f32 	%f2, [%rd2+8];
	ld.f32 	%f3, [%rd2+4];
	ld.f32 	%f4, [%rd2+0];
	ld.f32 	%f5, [%rd4+0];
	mov.f32 	%f6, 0f00000000;     	// 0
	setp.eq.ftz.f32 	%p1, %f5, %f6;
	ld.f32 	%f7, [%rd4+4];
	mov.f32 	%f8, 0f00000000;     	// 0
	max.ftz.f32 	%f9, %f4, %f8;
	mov.f32 	%f10, 0f3f800000;    	// 1
	min.ftz.f32 	%f11, %f9, %f10;
	ld.f32 	%f12, [%rd4+8];
	sub.ftz.f32 	%f13, %f11, %f12;
	abs.ftz.f32 	%f14, %f13;
	mov.f32 	%f15, 0f00000000;    	// 0
	max.ftz.f32 	%f16, %f3, %f15;
	mov.f32 	%f17, 0f3f800000;    	// 1
	min.ftz.f32 	%f18, %f16, %f17;
	ld.f32 	%f19, [%rd4+12];
	sub.ftz.f32 	%f20, %f18, %f19;
	abs.ftz.f32 	%f21, %f20;
	mov.f32 	%f22, 0f00000000;    	// 0
	max.ftz.f32 	%f23, %f2, %f22;
	mov.f32 	%f24, 0f3f800000;    	// 1
	min.ftz.f32 	%f25, %f23, %f24;
	ld.f32 	%f26, [%rd4+16];
	sub.ftz.f32 	%f27, %f25, %f26;
	abs.ftz.f32 	%f28, %f27;
	mov.f32 	%f29, 0f00000000;    	// 0
	max.ftz.f32 	%f30, %f1, %f29;
	mov.f32 	%f31, 0f3f800000;    	// 1
	min.ftz.f32 	%f32, %f30, %f31;
	sub.ftz.f32 	%f33, %f32, %f1;
	abs.ftz.f32 	%f34, %f33;
	max.ftz.f32 	%f35, %f28, %f34;
	max.ftz.f32 	%f36, %f21, %f35;
	max.ftz.f32 	%f37, %f14, %f36;
	setp.ge.ftz.f32 	%p2, %f7, %f37;
	xor.pred 	%p3, %p1, %p2;
	@!%p3 bra 	$Lt_26_5122;
	.loc	23	44	0
	ld.const.f32 	%f38, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f39, %f38, %f3;
	ld.const.f32 	%f40, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f41, %f40, %f2, %f39;
	ld.const.f32 	%f42, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f43, %f42, %f4, %f41;
	ld.const.f32 	%f44, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f45, %f44, %f43;
	ld.const.f32 	%f46, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f47, %f46, %f43;
	ld.const.f32 	%f48, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f49, %f48, %f43;
	st.v4.f32 	[%rd2+0], {%f45,%f47,%f49,%f1};
$Lt_26_5122:
	.loc	26	53	0
	ret;
$LDWend__Z9ColorPassR8PixelRGBRiS1_iiRK19ColorPassParameters:
	} // _Z9ColorPassR8PixelRGBRiS1_iiRK19ColorPassParameters

	.visible .func (.param .align 16 .b8 __cudaretf__Z15ClampComponentsI8PixelRGBET_RKS1_ff[16]) _Z15ClampComponentsI8PixelRGBET_RKS1_ff (.param .u64 __cudaparmf1__Z15ClampComponentsI8PixelRGBET_RKS1_ff, .param .f32 __cudaparmf2__Z15ClampComponentsI8PixelRGBET_RKS1_ff, .param .f32 __cudaparmf3__Z15ClampComponentsI8PixelRGBET_RKS1_ff)
	{
	.reg .u64 %rd<4>;
	.reg .f32 %f<18>;
	.loc	22	342	0
$LDWbegin__Z15ClampComponentsI8PixelRGBET_RKS1_ff:
	ld.param.u64 	%rd1, [__cudaparmf1__Z15ClampComponentsI8PixelRGBET_RKS1_ff];
	mov.s64 	%rd2, %rd1;
	ld.param.f32 	%f1, [__cudaparmf2__Z15ClampComponentsI8PixelRGBET_RKS1_ff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf3__Z15ClampComponentsI8PixelRGBET_RKS1_ff];
	mov.f32 	%f4, %f3;
	.loc	22	348	0
	ld.f32 	%f5, [%rd2+0];
	max.ftz.f32 	%f6, %f5, %f2;
	min.ftz.f32 	%f7, %f4, %f6;
	st.param.f32 	[__cudaretf__Z15ClampComponentsI8PixelRGBET_RKS1_ff+0], %f7;
	ld.f32 	%f8, [%rd2+4];
	max.ftz.f32 	%f9, %f8, %f2;
	min.ftz.f32 	%f10, %f4, %f9;
	st.param.f32 	[__cudaretf__Z15ClampComponentsI8PixelRGBET_RKS1_ff+4], %f10;
	ld.f32 	%f11, [%rd2+8];
	max.ftz.f32 	%f12, %f11, %f2;
	min.ftz.f32 	%f13, %f4, %f12;
	st.param.f32 	[__cudaretf__Z15ClampComponentsI8PixelRGBET_RKS1_ff+8], %f13;
	ld.f32 	%f14, [%rd2+12];
	max.ftz.f32 	%f15, %f14, %f2;
	min.ftz.f32 	%f16, %f4, %f15;
	st.param.f32 	[__cudaretf__Z15ClampComponentsI8PixelRGBET_RKS1_ff+12], %f16;
	ret;
$LDWend__Z15ClampComponentsI8PixelRGBET_RKS1_ff:
	} // _Z15ClampComponentsI8PixelRGBET_RKS1_ff

	.visible .func (.param .align 16 .b8 __cudaretf__Z13AbsComponentsI8PixelRGBET_RKS1_[16]) _Z13AbsComponentsI8PixelRGBET_RKS1_ (.param .u64 __cudaparmf1__Z13AbsComponentsI8PixelRGBET_RKS1_)
	{
	.reg .u64 %rd<4>;
	.reg .f32 %f<10>;
	.loc	22	355	0
$LDWbegin__Z13AbsComponentsI8PixelRGBET_RKS1_:
	ld.param.u64 	%rd1, [__cudaparmf1__Z13AbsComponentsI8PixelRGBET_RKS1_];
	mov.s64 	%rd2, %rd1;
	.loc	22	361	0
	ld.f32 	%f1, [%rd2+0];
	abs.ftz.f32 	%f2, %f1;
	st.param.f32 	[__cudaretf__Z13AbsComponentsI8PixelRGBET_RKS1_+0], %f2;
	ld.f32 	%f3, [%rd2+4];
	abs.ftz.f32 	%f4, %f3;
	st.param.f32 	[__cudaretf__Z13AbsComponentsI8PixelRGBET_RKS1_+4], %f4;
	ld.f32 	%f5, [%rd2+8];
	abs.ftz.f32 	%f6, %f5;
	st.param.f32 	[__cudaretf__Z13AbsComponentsI8PixelRGBET_RKS1_+8], %f6;
	ld.f32 	%f7, [%rd2+12];
	abs.ftz.f32 	%f8, %f7;
	st.param.f32 	[__cudaretf__Z13AbsComponentsI8PixelRGBET_RKS1_+12], %f8;
	ret;
$LDWend__Z13AbsComponentsI8PixelRGBET_RKS1_:
	} // _Z13AbsComponentsI8PixelRGBET_RKS1_

	.visible .func (.param .f32 __cudaretf__Z12MaxComponentI8PixelRGBEfRKT_) _Z12MaxComponentI8PixelRGBEfRKT_ (.param .u64 __cudaparmf1__Z12MaxComponentI8PixelRGBEfRKT_)
	{
	.reg .u64 %rd<4>;
	.reg .f32 %f<9>;
	.loc	22	333	0
$LDWbegin__Z12MaxComponentI8PixelRGBEfRKT_:
	ld.param.u64 	%rd1, [__cudaparmf1__Z12MaxComponentI8PixelRGBEfRKT_];
	mov.s64 	%rd2, %rd1;
	.loc	22	334	0
	ld.f32 	%f1, [%rd2+0];
	ld.f32 	%f2, [%rd2+4];
	ld.f32 	%f3, [%rd2+8];
	ld.f32 	%f4, [%rd2+12];
	max.ftz.f32 	%f5, %f3, %f4;
	max.ftz.f32 	%f6, %f2, %f5;
	max.ftz.f32 	%f7, %f1, %f6;
	st.param.f32 	[__cudaretf__Z12MaxComponentI8PixelRGBEfRKT_], %f7;
	ret;
$LDWend__Z12MaxComponentI8PixelRGBEfRKT_:
	} // _Z12MaxComponentI8PixelRGBEfRKT_

	.visible .func _Z12ColorReplaceR8PixelRGBRiS1_iiRK22ColorReplaceParameters (.param .u64 __cudaparmf1__Z12ColorReplaceR8PixelRGBRiS1_iiRK22ColorReplaceParameters, .param .u64 __cudaparmf2__Z12ColorReplaceR8PixelRGBRiS1_iiRK22ColorReplaceParameters, .param .u64 __cudaparmf3__Z12ColorReplaceR8PixelRGBRiS1_iiRK22ColorReplaceParameters, .param .s32 __cudaparmf4__Z12ColorReplaceR8PixelRGBRiS1_iiRK22ColorReplaceParameters, .param .s32 __cudaparmf5__Z12ColorReplaceR8PixelRGBRiS1_iiRK22ColorReplaceParameters, .param .u64 __cudaparmf6__Z12ColorReplaceR8PixelRGBRiS1_iiRK22ColorReplaceParameters)
	{
	.reg .u64 %rd<6>;
	.reg .f32 %f<48>;
	.reg .pred %p<4>;
	.loc	27	41	0
$LDWbegin__Z12ColorReplaceR8PixelRGBRiS1_iiRK22ColorReplaceParameters:
	ld.param.u64 	%rd1, [__cudaparmf1__Z12ColorReplaceR8PixelRGBRiS1_iiRK22ColorReplaceParameters];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf6__Z12ColorReplaceR8PixelRGBRiS1_iiRK22ColorReplaceParameters];
	mov.s64 	%rd4, %rd3;
	.loc	27	48	0
	ld.f32 	%f1, [%rd2+12];
	ld.f32 	%f2, [%rd2+8];
	ld.f32 	%f3, [%rd2+4];
	ld.f32 	%f4, [%rd2+0];
	ld.f32 	%f5, [%rd4+4];
	mov.f32 	%f6, 0f00000000;     	// 0
	max.ftz.f32 	%f7, %f4, %f6;
	mov.f32 	%f8, 0f3f800000;     	// 1
	min.ftz.f32 	%f9, %f7, %f8;
	ld.f32 	%f10, [%rd4+8];
	sub.ftz.f32 	%f11, %f9, %f10;
	abs.ftz.f32 	%f12, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	max.ftz.f32 	%f14, %f3, %f13;
	mov.f32 	%f15, 0f3f800000;    	// 1
	min.ftz.f32 	%f16, %f14, %f15;
	ld.f32 	%f17, [%rd4+12];
	sub.ftz.f32 	%f18, %f16, %f17;
	abs.ftz.f32 	%f19, %f18;
	mov.f32 	%f20, 0f00000000;    	// 0
	max.ftz.f32 	%f21, %f2, %f20;
	mov.f32 	%f22, 0f3f800000;    	// 1
	min.ftz.f32 	%f23, %f21, %f22;
	ld.f32 	%f24, [%rd4+16];
	sub.ftz.f32 	%f25, %f23, %f24;
	abs.ftz.f32 	%f26, %f25;
	mov.f32 	%f27, 0f00000000;    	// 0
	max.ftz.f32 	%f28, %f1, %f27;
	mov.f32 	%f29, 0f3f800000;    	// 1
	min.ftz.f32 	%f30, %f28, %f29;
	sub.ftz.f32 	%f31, %f30, %f1;
	abs.ftz.f32 	%f32, %f31;
	max.ftz.f32 	%f33, %f26, %f32;
	max.ftz.f32 	%f34, %f19, %f33;
	max.ftz.f32 	%f35, %f12, %f34;
	setp.ge.ftz.f32 	%p1, %f5, %f35;
	@!%p1 bra 	$Lt_30_5890;
	.loc	27	51	0
	ld.f32 	%f36, [%rd4+20];
	ld.f32 	%f37, [%rd4+24];
	ld.f32 	%f38, [%rd4+28];
	ld.f32 	%f39, [%rd4+0];
	mov.f32 	%f40, 0f00000000;    	// 0
	setp.eq.ftz.f32 	%p2, %f39, %f40;
	@!%p2 bra 	$Lt_30_6402;
	.loc	27	60	0
	ld.const.f32 	%f41, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f42, %f41, %f3;
	ld.const.f32 	%f43, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f44, %f43, %f2, %f42;
	ld.const.f32 	%f45, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f46, %f45, %f4, %f44;
	mul.ftz.f32 	%f36, %f46, %f36;
	.loc	27	61	0
	mul.ftz.f32 	%f37, %f46, %f37;
	.loc	27	62	0
	mul.ftz.f32 	%f38, %f46, %f38;
$Lt_30_6402:
	st.v4.f32 	[%rd2+0], {%f36,%f37,%f38,%f1};
$Lt_30_5890:
	.loc	27	67	0
	ret;
$LDWend__Z12ColorReplaceR8PixelRGBRiS1_iiRK22ColorReplaceParameters:
	} // _Z12ColorReplaceR8PixelRGBRiS1_iiRK22ColorReplaceParameters

	.visible .func _Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters (.param .u64 __cudaparmf1__Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters, .param .u64 __cudaparmf2__Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters, .param .u64 __cudaparmf3__Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters, .param .s32 __cudaparmf4__Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters, .param .s32 __cudaparmf5__Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters, .param .u64 __cudaparmf6__Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters)
	{
	.reg .u32 %r<18>;
	.reg .u64 %rd<10>;
	.reg .f32 %f<16>;
	.reg .pred %p<3>;
	.loc	28	41	0
$LDWbegin__Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters:
	ld.param.u64 	%rd1, [__cudaparmf1__Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf2__Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters];
	mov.s64 	%rd4, %rd3;
	ld.param.u64 	%rd5, [__cudaparmf3__Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters];
	mov.s64 	%rd6, %rd5;
	ld.param.u32 	%r1, [__cudaparmf4__Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf5__Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters];
	mov.s32 	%r4, %r3;
	ld.param.u64 	%rd7, [__cudaparmf6__Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters];
	mov.s64 	%rd8, %rd7;
	.loc	28	42	0
	ld.f32 	%f1, [%rd8+0];
	ld.s32 	%r5, [%rd4+0];
	cvt.rn.f32.s32 	%f2, %r5;
	sub.ftz.f32 	%f3, %f1, %f2;
	cvt.rzi.ftz.s32.f32 	%r6, %f3;
	.loc	28	43	0
	ld.s32 	%r7, [%rd6+0];
	cvt.rn.f32.s32 	%f4, %r7;
	sub.ftz.f32 	%f5, %f1, %f4;
	cvt.rzi.ftz.s32.f32 	%r8, %f5;
	.loc	28	44	0
	sub.s32 	%r9, %r2, %r5;
	cvt.rn.f32.s32 	%f6, %r9;
	sub.ftz.f32 	%f7, %f1, %f6;
	cvt.rzi.ftz.s32.f32 	%r10, %f7;
	.loc	28	45	0
	sub.s32 	%r11, %r4, %r7;
	cvt.rn.f32.s32 	%f8, %r11;
	sub.ftz.f32 	%f9, %f1, %f8;
	cvt.rzi.ftz.s32.f32 	%r12, %f9;
	.loc	28	47	0
	max.s32 	%r13, %r6, %r8;
	max.s32 	%r14, %r13, %r10;
	max.s32 	%r15, %r14, %r12;
	mov.u32 	%r16, 0;
	setp.le.s32 	%p1, %r15, %r16;
	@%p1 bra 	$Lt_31_1026;
	.loc	28	51	0
	cvt.rn.f32.s32 	%f10, %r15;
	sub.ftz.f32 	%f11, %f1, %f10;
	ld.f32 	%f12, [%rd2+12];
	div.approx.ftz.f32 	%f13, %f11, %f1;
	mul.ftz.f32 	%f14, %f12, %f13;
	st.f32 	[%rd2+12], %f14;
$Lt_31_1026:
	.loc	28	53	0
	ret;
$LDWend__Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters:
	} // _Z11EdgeFeatherR8PixelRGBRiS1_iiRK21EdgeFeatherParameters

	.visible .func (.param .s32 __cudaretf__Z3MaxIiET_S0_S0_S0_S0_) _Z3MaxIiET_S0_S0_S0_S0_ (.param .s32 __cudaparmf1__Z3MaxIiET_S0_S0_S0_S0_, .param .s32 __cudaparmf2__Z3MaxIiET_S0_S0_S0_S0_, .param .s32 __cudaparmf3__Z3MaxIiET_S0_S0_S0_S0_, .param .s32 __cudaparmf4__Z3MaxIiET_S0_S0_S0_S0_)
	{
	.reg .u32 %r<13>;
	.loc	29	66	0
$LDWbegin__Z3MaxIiET_S0_S0_S0_S0_:
	ld.param.u32 	%r1, [__cudaparmf1__Z3MaxIiET_S0_S0_S0_S0_];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf2__Z3MaxIiET_S0_S0_S0_S0_];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf3__Z3MaxIiET_S0_S0_S0_S0_];
	mov.s32 	%r6, %r5;
	ld.param.u32 	%r7, [__cudaparmf4__Z3MaxIiET_S0_S0_S0_S0_];
	mov.s32 	%r8, %r7;
	.loc	29	67	0
	max.s32 	%r9, %r2, %r4;
	max.s32 	%r10, %r6, %r8;
	max.s32 	%r11, %r9, %r10;
	st.param.s32 	[__cudaretf__Z3MaxIiET_S0_S0_S0_S0_], %r11;
	ret;
$LDWend__Z3MaxIiET_S0_S0_S0_S0_:
	} // _Z3MaxIiET_S0_S0_S0_S0_

	.visible .func _Z7ExtractR8PixelRGBRiS1_iiRK17ExtractParameters (.param .u64 __cudaparmf1__Z7ExtractR8PixelRGBRiS1_iiRK17ExtractParameters, .param .u64 __cudaparmf2__Z7ExtractR8PixelRGBRiS1_iiRK17ExtractParameters, .param .u64 __cudaparmf3__Z7ExtractR8PixelRGBRiS1_iiRK17ExtractParameters, .param .s32 __cudaparmf4__Z7ExtractR8PixelRGBRiS1_iiRK17ExtractParameters, .param .s32 __cudaparmf5__Z7ExtractR8PixelRGBRiS1_iiRK17ExtractParameters, .param .u64 __cudaparmf6__Z7ExtractR8PixelRGBRiS1_iiRK17ExtractParameters)
	{
	.reg .u32 %r<14>;
	.reg .u64 %rd<6>;
	.reg .f32 %f<51>;
	.reg .pred %p<6>;
	.loc	30	41	0
$LDWbegin__Z7ExtractR8PixelRGBRiS1_iiRK17ExtractParameters:
	ld.param.u64 	%rd1, [__cudaparmf1__Z7ExtractR8PixelRGBRiS1_iiRK17ExtractParameters];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf6__Z7ExtractR8PixelRGBRiS1_iiRK17ExtractParameters];
	mov.s64 	%rd4, %rd3;
	ld.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd2+0];
	.loc	30	44	0
	ld.f32 	%f5, [%rd4+8];
	ld.f32 	%f6, [%rd4+4];
	ld.f32 	%f7, [%rd4+12];
	sub.ftz.f32 	%f8, %f5, %f6;
	mul.ftz.f32 	%f9, %f7, %f8;
	.loc	30	46	0
	sub.ftz.f32 	%f10, %f6, %f9;
	.loc	30	47	0
	add.ftz.f32 	%f11, %f6, %f9;
	.loc	30	50	0
	add.ftz.f32 	%f12, %f5, %f9;
	ld.const.f32 	%f13, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f14, %f13, %f2;
	mov.f32 	%f15, 0f00000000;    	// 0
	max.ftz.f32 	%f16, %f12, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	max.ftz.f32 	%f18, %f10, %f17;
	ld.const.f32 	%f19, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f20, %f19, %f3, %f14;
	mov.f32 	%f21, 0f3f800000;    	// 1
	min.ftz.f32 	%f22, %f16, %f21;
	mov.f32 	%f23, 0f3f800000;    	// 1
	min.ftz.f32 	%f24, %f18, %f23;
	ld.const.f32 	%f25, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f26, %f25, %f1, %f20;
	set.gt.ftz.u32.f32 	%r1, %f24, %f26;
	neg.s32 	%r2, %r1;
	set.le.ftz.u32.f32 	%r3, %f22, %f26;
	neg.s32 	%r4, %r3;
	or.b32 	%r5, %r2, %r4;
	mov.u32 	%r6, 0;
	setp.eq.s32 	%p1, %r5, %r6;
	@%p1 bra 	$Lt_33_8194;
	mov.f32 	%f27, 0f00000000;    	// 0
	bra.uni 	$Lt_33_7938;
$Lt_33_8194:
	mov.f32 	%f28, 0f00000000;    	// 0
	max.ftz.f32 	%f29, %f11, %f28;
	mov.f32 	%f30, 0f3f800000;    	// 1
	min.ftz.f32 	%f31, %f29, %f30;
	set.le.ftz.u32.f32 	%r7, %f31, %f26;
	neg.s32 	%r8, %r7;
	sub.ftz.f32 	%f32, %f5, %f9;
	mov.f32 	%f33, 0f00000000;    	// 0
	max.ftz.f32 	%f34, %f32, %f33;
	mov.f32 	%f35, 0f3f800000;    	// 1
	min.ftz.f32 	%f36, %f34, %f35;
	set.lt.ftz.u32.f32 	%r9, %f26, %f36;
	neg.s32 	%r10, %r9;
	and.b32 	%r11, %r8, %r10;
	mov.u32 	%r12, 0;
	setp.eq.s32 	%p2, %r11, %r12;
	@%p2 bra 	$Lt_33_8706;
	mov.f32 	%f27, 0f3f800000;    	// 1
	bra.uni 	$Lt_33_8450;
$Lt_33_8706:
	add.ftz.f32 	%f37, %f9, %f9;
	setp.gt.ftz.f32 	%p3, %f31, %f26;
	@!%p3 bra 	$Lt_33_9218;
	.loc	30	62	0
	sub.ftz.f32 	%f38, %f26, %f24;
	div.approx.ftz.f32 	%f27, %f38, %f37;
	bra.uni 	$Lt_33_8962;
$Lt_33_9218:
	.loc	30	66	0
	sub.ftz.f32 	%f39, %f22, %f26;
	div.approx.ftz.f32 	%f27, %f39, %f37;
$Lt_33_8962:
$Lt_33_8450:
$Lt_33_7938:
	.loc	30	69	0
	mov.f32 	%f40, 0f3f800000;    	// 1
	sub.ftz.f32 	%f41, %f40, %f27;
	ld.f32 	%f42, [%rd4+0];
	mov.f32 	%f43, 0f00000000;    	// 0
	setp.neu.ftz.f32 	%p4, %f42, %f43;
	selp.f32 	%f27, %f41, %f27, %p4;
	.loc	30	77	0
	ld.const.f32 	%f44, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f45, %f44, %f27;
	ld.const.f32 	%f46, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f47, %f46, %f27;
	ld.const.f32 	%f48, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f49, %f48, %f27;
	st.v4.f32 	[%rd2+0], {%f45,%f47,%f49,%f4};
	.loc	30	78	0
	ret;
$LDWend__Z7ExtractR8PixelRGBRiS1_iiRK17ExtractParameters:
	} // _Z7ExtractR8PixelRGBRiS1_iiRK17ExtractParameters

	.visible .func (.param .f32 __cudaretf__Z5ClampIfET_S0_S0_S0_) _Z5ClampIfET_S0_S0_S0_ (.param .f32 __cudaparmf1__Z5ClampIfET_S0_S0_S0_, .param .f32 __cudaparmf2__Z5ClampIfET_S0_S0_S0_, .param .f32 __cudaparmf3__Z5ClampIfET_S0_S0_S0_)
	{
	.reg .f32 %f<10>;
	.loc	29	72	0
$LDWbegin__Z5ClampIfET_S0_S0_S0_:
	ld.param.f32 	%f1, [__cudaparmf1__Z5ClampIfET_S0_S0_S0_];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z5ClampIfET_S0_S0_S0_];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf3__Z5ClampIfET_S0_S0_S0_];
	mov.f32 	%f6, %f5;
	.loc	29	73	0
	max.ftz.f32 	%f7, %f2, %f4;
	min.ftz.f32 	%f8, %f6, %f7;
	st.param.f32 	[__cudaretf__Z5ClampIfET_S0_S0_S0_], %f8;
	ret;
$LDWend__Z5ClampIfET_S0_S0_S0_:
	} // _Z5ClampIfET_S0_S0_S0_

	.visible .func _Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters (.param .u64 __cudaparmf1__Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .u64 __cudaparmf2__Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .u64 __cudaparmf3__Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .s32 __cudaparmf4__Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .s32 __cudaparmf5__Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .u64 __cudaparmf6__Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters)
	{
	.reg .u32 %r<71>;
	.reg .u64 %rd<10>;
	.reg .f32 %f<95>;
	.reg .pred %p<9>;
	.loc	31	42	0
$LDWbegin__Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters:
	ld.param.u64 	%rd1, [__cudaparmf1__Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf2__Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters];
	mov.s64 	%rd4, %rd3;
	ld.param.u64 	%rd5, [__cudaparmf3__Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters];
	mov.s64 	%rd6, %rd5;
	ld.param.u32 	%r1, [__cudaparmf4__Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf5__Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters];
	mov.s32 	%r4, %r3;
	ld.param.u64 	%rd7, [__cudaparmf6__Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters];
	mov.s64 	%rd8, %rd7;
	ld.f32 	%f1, [%rd8+40];
	mov.f32 	%f2, 0f00000000;     	// 0
	setp.ge.ftz.f32 	%p1, %f1, %f2;
	@!%p1 bra 	$L_35_4866;
	ld.s32 	%r5, [%rd4+0];
	cvt.rn.f32.s32 	%f3, %r5;
	cvt.rn.f32.s32 	%f4, %r2;
	mul.ftz.f32 	%f5, %f4, %f1;
	setp.lt.ftz.f32 	%p2, %f3, %f5;
	@%p2 bra 	$L_35_4610;
$L_35_4866:
	mov.f32 	%f6, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p3, %f1, %f6;
	@!%p3 bra 	$Lt_35_7170;
	ld.s32 	%r6, [%rd6+0];
	cvt.rn.f32.s32 	%f7, %r6;
	cvt.rn.f32.s32 	%f8, %r4;
	mul.ftz.f32 	%f9, %f8, %f1;
	neg.ftz.f32 	%f10, %f9;
	setp.lt.ftz.f32 	%p4, %f7, %f10;
	@!%p4 bra 	$Lt_35_7170;
$L_35_4610:
	ld.v4.f32 	{%f11,%f12,%f13,%f14}, [%rd2+0];
	.loc	31	47	0
	ld.const.f32 	%f15, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f16, %f15, %f12;
	ld.const.f32 	%f17, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f18, %f17, %f13, %f16;
	ld.const.f32 	%f19, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f20, %f19, %f11, %f18;
	.loc	31	49	0
	ld.f32 	%f21, [%rd8+4];
	ld.f32 	%f22, [%rd8+0];
	sub.ftz.f32 	%f23, %f22, %f21;
	.loc	31	50	0
	ld.f32 	%f24, [%rd8+12];
	ld.f32 	%f25, [%rd8+8];
	sub.ftz.f32 	%f26, %f25, %f24;
	mov.f32 	%f27, 0f3f800000;    	// 1
	set.neu.ftz.u32.f32 	%r7, %f23, %f27;
	neg.s32 	%r8, %r7;
	mov.f32 	%f28, 0f3f800000;    	// 1
	set.neu.ftz.u32.f32 	%r9, %f26, %f28;
	neg.s32 	%r10, %r9;
	or.b32 	%r11, %r8, %r10;
	mov.u32 	%r12, 0;
	setp.eq.s32 	%p5, %r11, %r12;
	@%p5 bra 	$Lt_35_5122;
	.loc	20	143	0
	cvt.s32.u32 	%r13, %ctaid.y;
	cvt.s32.u32 	%r14, %ntid.y;
	mul.lo.s32 	%r15, %r13, %r14;
	cvt.s32.u32 	%r16, %ctaid.x;
	cvt.s32.u32 	%r17, %ntid.x;
	mul.lo.s32 	%r18, %r16, %r17;
	mov.u32 	%r19, %tid.y;
	add.u32 	%r20, %r15, %r19;
	mov.u32 	%r21, %tid.x;
	add.u32 	%r22, %r18, %r21;
	shr.u32 	%r23, %r20, 13;
	mov.s32 	%r24, 1;
	sub.s32 	%r25, %r24, %r22;
	sub.u32 	%r26, %r22, %r20;
	sub.u32 	%r27, %r25, %r20;
	xor.b32 	%r28, %r23, %r27;
	shl.b32 	%r29, %r28, 8;
	sub.u32 	%r30, %r26, %r28;
	sub.u32 	%r31, %r20, %r28;
	xor.b32 	%r32, %r29, %r30;
	shr.u32 	%r33, %r32, 13;
	sub.u32 	%r34, %r31, %r32;
	sub.u32 	%r35, %r28, %r32;
	xor.b32 	%r36, %r33, %r34;
	shr.u32 	%r37, %r36, 12;
	sub.u32 	%r38, %r35, %r36;
	xor.b32 	%r39, %r37, %r38;
	sub.u32 	%r40, %r32, %r36;
	sub.u32 	%r41, %r40, %r39;
	shl.b32 	%r42, %r39, 16;
	xor.b32 	%r43, %r41, %r42;
	.loc	20	144	0
	sub.u32 	%r44, %r36, %r39;
	sub.u32 	%r45, %r44, %r43;
	shr.u32 	%r46, %r43, 5;
	xor.b32 	%r47, %r45, %r46;
	.loc	20	145	0
	sub.u32 	%r48, %r39, %r43;
	sub.u32 	%r49, %r48, %r47;
	shr.u32 	%r50, %r47, 3;
	xor.b32 	%r51, %r49, %r50;
	.loc	20	146	0
	sub.u32 	%r52, %r43, %r47;
	sub.u32 	%r53, %r52, %r51;
	shl.b32 	%r54, %r51, 10;
	xor.b32 	%r55, %r53, %r54;
	.loc	20	147	0
	sub.u32 	%r56, %r47, %r51;
	sub.u32 	%r57, %r56, %r55;
	shr.u32 	%r58, %r55, 15;
	xor.b32 	%r59, %r57, %r58;
	.loc	31	57	0
	mov.f32 	%f29, 0f3b270d73;    	// 0.00254902
	mul.lo.u32 	%r60, %r59, 1103515245;
	add.u32 	%r61, %r60, 12345;
	shr.u32 	%r62, %r61, 16;
	and.b32 	%r63, %r62, 255;
	shl.b32 	%r64, %r63, 7;
	mul.lo.u32 	%r65, %r59, -1029531031;
	sub.u32 	%r66, %r65, 740551042;
	shr.u32 	%r67, %r66, 16;
	and.b32 	%r68, %r67, 255;
	xor.b32 	%r69, %r64, %r68;
	cvt.rn.f32.s32 	%f30, %r69;
	mov.f32 	%f31, 0f467ffe00;    	// 16383.5
	div.approx.ftz.f32 	%f32, %f30, %f31;
	mov.f32 	%f33, 0fbf800000;    	// -1
	add.ftz.f32 	%f34, %f32, %f33;
	fma.rn.ftz.f32 	%f20, %f29, %f34, %f20;
$Lt_35_5122:
	.loc	31	60	0
	sub.ftz.f32 	%f35, %f20, %f21;
	ld.f32 	%f36, [%rd8+16];
	mov.f32 	%f37, 0f3f800000;    	// 1
	setp.neu.ftz.f32 	%p6, %f36, %f37;
	@!%p6 bra 	$Lt_35_5890;
	mov.f32 	%f38, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p7, %f35, %f38;
	@!%p7 bra 	$Lt_35_6402;
	.loc	31	66	0
	mov.f32 	%f20, %f24;
	bra.uni 	$Lt_35_5634;
$Lt_35_6402:
	.loc	31	70	0
	rcp.approx.ftz.f32 	%f39, %f23;
	mul.ftz.f32 	%f40, %f35, %f39;
	lg2.approx.ftz.f32 	%f41, %f40;
	mul.ftz.f32 	%f42, %f36, %f41;
	ex2.approx.ftz.f32 	%f43, %f42;
	fma.rn.ftz.f32 	%f20, %f26, %f43, %f24;
	bra.uni 	$Lt_35_5634;
$Lt_35_5890:
	.loc	31	77	0
	rcp.approx.ftz.f32 	%f44, %f23;
	mul.ftz.f32 	%f45, %f35, %f44;
	fma.rn.ftz.f32 	%f20, %f26, %f45, %f24;
$Lt_35_5634:
	.loc	31	81	0
	ld.f32 	%f46, [%rd8+24];
	.loc	31	89	0
	ld.f32 	%f47, [%rd8+20];
	sin.approx.ftz.f32 	%f48, %f46;
	ld.const.f32 	%f49, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f50, %f49, %f12;
	ld.const.f32 	%f51, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f52, %f51, %f12;
	cos.approx.ftz.f32 	%f53, %f46;
	ld.f32 	%f54, [%rd8+36];
	ld.const.f32 	%f55, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f56, %f55, %f13, %f50;
	ld.const.f32 	%f57, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f58, %f57, %f13, %f52;
	ld.const.f32 	%f59, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f60, %f59, %f11, %f56;
	ld.const.f32 	%f61, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f62, %f61, %f11, %f58;
	mul.ftz.f32 	%f63, %f60, %f48;
	mul.ftz.f32 	%f64, %f62, %f53;
	sub.ftz.f32 	%f65, %f64, %f63;
	ld.f32 	%f66, [%rd8+28];
	sub.ftz.f32 	%f67, %f66, %f65;
	fma.rn.ftz.f32 	%f68, %f54, %f67, %f65;
	mul.ftz.f32 	%f69, %f47, %f68;
	.loc	31	90	0
	mul.ftz.f32 	%f70, %f60, %f53;
	fma.rn.ftz.f32 	%f71, %f62, %f48, %f70;
	ld.f32 	%f72, [%rd8+32];
	sub.ftz.f32 	%f73, %f72, %f71;
	fma.rn.ftz.f32 	%f74, %f54, %f73, %f71;
	mul.ftz.f32 	%f75, %f47, %f74;
	.loc	31	92	0
	ld.const.f32 	%f76, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f77, %f76, %f69;
	ld.const.f32 	%f78, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f79, %f78, %f20, %f77;
	ld.const.f32 	%f80, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f81, %f80, %f75, %f79;
	ld.const.f32 	%f82, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f83, %f82, %f69;
	ld.const.f32 	%f84, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f85, %f84, %f20, %f83;
	ld.const.f32 	%f86, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f87, %f86, %f75, %f85;
	ld.const.f32 	%f88, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f89, %f88, %f69;
	ld.const.f32 	%f90, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f91, %f90, %f20, %f89;
	ld.const.f32 	%f92, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f93, %f92, %f75, %f91;
	st.v4.f32 	[%rd2+0], {%f81,%f87,%f93,%f14};
$Lt_35_7170:
$L_35_4354:
	.loc	31	94	0
	ret;
$LDWend__Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters:
	} // _Z28FastColorCorrector_CompositeR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters

	.visible .func (.param .f32 __cudaretf__Z4LERPIfET_S0_S0_S0_) _Z4LERPIfET_S0_S0_S0_ (.param .f32 __cudaparmf1__Z4LERPIfET_S0_S0_S0_, .param .f32 __cudaparmf2__Z4LERPIfET_S0_S0_S0_, .param .f32 __cudaparmf3__Z4LERPIfET_S0_S0_S0_)
	{
	.reg .f32 %f<10>;
	.loc	29	78	0
$LDWbegin__Z4LERPIfET_S0_S0_S0_:
	ld.param.f32 	%f1, [__cudaparmf1__Z4LERPIfET_S0_S0_S0_];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z4LERPIfET_S0_S0_S0_];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf3__Z4LERPIfET_S0_S0_S0_];
	mov.f32 	%f6, %f5;
	.loc	29	79	0
	sub.ftz.f32 	%f7, %f4, %f2;
	fma.rn.ftz.f32 	%f8, %f6, %f7, %f2;
	st.param.f32 	[__cudaretf__Z4LERPIfET_S0_S0_S0_], %f8;
	ret;
$LDWend__Z4LERPIfET_S0_S0_S0_:
	} // _Z4LERPIfET_S0_S0_S0_

	.visible .func _Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters (.param .u64 __cudaparmf1__Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .u64 __cudaparmf2__Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .u64 __cudaparmf3__Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .s32 __cudaparmf4__Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .s32 __cudaparmf5__Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .u64 __cudaparmf6__Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters)
	{
	.reg .u32 %r<8>;
	.reg .u64 %rd<10>;
	.reg .f32 %f<45>;
	.reg .pred %p<7>;
	.loc	31	106	0
$LDWbegin__Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters:
	ld.param.u64 	%rd1, [__cudaparmf1__Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf2__Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters];
	mov.s64 	%rd4, %rd3;
	ld.param.u64 	%rd5, [__cudaparmf3__Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters];
	mov.s64 	%rd6, %rd5;
	ld.param.u32 	%r1, [__cudaparmf4__Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf5__Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters];
	mov.s32 	%r4, %r3;
	ld.param.u64 	%rd7, [__cudaparmf6__Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters];
	mov.s64 	%rd8, %rd7;
	ld.f32 	%f1, [%rd8+40];
	mov.f32 	%f2, 0f00000000;     	// 0
	setp.ge.ftz.f32 	%p1, %f1, %f2;
	@!%p1 bra 	$L_37_3074;
	ld.s32 	%r5, [%rd4+0];
	cvt.rn.f32.s32 	%f3, %r5;
	cvt.rn.f32.s32 	%f4, %r2;
	mul.ftz.f32 	%f5, %f4, %f1;
	setp.lt.ftz.f32 	%p2, %f3, %f5;
	@%p2 bra 	$L_37_2818;
$L_37_3074:
	mov.f32 	%f6, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p3, %f1, %f6;
	@!%p3 bra 	$Lt_37_4354;
	ld.s32 	%r6, [%rd6+0];
	cvt.rn.f32.s32 	%f7, %r6;
	cvt.rn.f32.s32 	%f8, %r4;
	mul.ftz.f32 	%f9, %f8, %f1;
	neg.ftz.f32 	%f10, %f9;
	setp.lt.ftz.f32 	%p4, %f7, %f10;
	@!%p4 bra 	$Lt_37_4354;
$L_37_2818:
	ld.v4.f32 	{%f11,%f12,%f13,%f14}, [%rd2+0];
	.loc	31	113	0
	ld.f32 	%f15, [%rd8+4];
	ld.f32 	%f16, [%rd8+0];
	sub.ftz.f32 	%f17, %f16, %f15;
	.loc	31	114	0
	ld.f32 	%f18, [%rd8+12];
	ld.f32 	%f19, [%rd8+8];
	sub.ftz.f32 	%f20, %f19, %f18;
	.loc	31	116	0
	ld.const.f32 	%f21, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f22, %f12, %f21;
	ld.const.f32 	%f23, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f24, %f23, %f13, %f22;
	ld.const.f32 	%f25, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f26, %f25, %f11, %f24;
	sub.ftz.f32 	%f27, %f26, %f15;
	ld.f32 	%f28, [%rd8+16];
	mov.f32 	%f29, 0f3f800000;    	// 1
	setp.neu.ftz.f32 	%p5, %f28, %f29;
	@!%p5 bra 	$Lt_37_3586;
	.loc	31	120	0
	mov.f32 	%f30, 0f00000000;    	// 0
	max.ftz.f32 	%f31, %f27, %f30;
	div.approx.ftz.f32 	%f32, %f31, %f17;
	lg2.approx.ftz.f32 	%f33, %f32;
	mul.ftz.f32 	%f34, %f28, %f33;
	ex2.approx.ftz.f32 	%f35, %f34;
	fma.rn.ftz.f32 	%f36, %f20, %f35, %f18;
	bra.uni 	$Lt_37_3330;
$Lt_37_3586:
	.loc	31	129	0
	div.approx.ftz.f32 	%f37, %f27, %f17;
	fma.rn.ftz.f32 	%f36, %f20, %f37, %f18;
$Lt_37_3330:
	.loc	31	135	0
	ld.const.f32 	%f38, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f39, %f38, %f36;
	ld.const.f32 	%f40, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f41, %f40, %f36;
	ld.const.f32 	%f42, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f43, %f42, %f36;
	st.v4.f32 	[%rd2+0], {%f39,%f41,%f43,%f14};
$Lt_37_4354:
$L_37_2562:
	.loc	31	137	0
	ret;
$LDWend__Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters:
	} // _Z23FastColorCorrector_LumaR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters

	.visible .func _Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters (.param .u64 __cudaparmf1__Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .u64 __cudaparmf2__Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .u64 __cudaparmf3__Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .s32 __cudaparmf4__Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .s32 __cudaparmf5__Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters, .param .u64 __cudaparmf6__Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters)
	{
	.reg .u32 %r<8>;
	.reg .u64 %rd<10>;
	.reg .f32 %f<16>;
	.reg .pred %p<6>;
	.loc	31	149	0
$LDWbegin__Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters:
	ld.param.u64 	%rd1, [__cudaparmf1__Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf2__Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters];
	mov.s64 	%rd4, %rd3;
	ld.param.u64 	%rd5, [__cudaparmf3__Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters];
	mov.s64 	%rd6, %rd5;
	ld.param.u32 	%r1, [__cudaparmf4__Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf5__Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters];
	mov.s32 	%r4, %r3;
	ld.param.u64 	%rd7, [__cudaparmf6__Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters];
	mov.s64 	%rd8, %rd7;
	ld.f32 	%f1, [%rd8+40];
	mov.f32 	%f2, 0f00000000;     	// 0
	setp.ge.ftz.f32 	%p1, %f1, %f2;
	@!%p1 bra 	$L_38_2306;
	ld.s32 	%r5, [%rd4+0];
	cvt.rn.f32.s32 	%f3, %r5;
	cvt.rn.f32.s32 	%f4, %r2;
	mul.ftz.f32 	%f5, %f4, %f1;
	setp.lt.ftz.f32 	%p2, %f3, %f5;
	@%p2 bra 	$L_38_2050;
$L_38_2306:
	mov.f32 	%f6, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p3, %f1, %f6;
	@!%p3 bra 	$Lt_38_3074;
	ld.s32 	%r6, [%rd6+0];
	cvt.rn.f32.s32 	%f7, %r6;
	cvt.rn.f32.s32 	%f8, %r4;
	mul.ftz.f32 	%f9, %f8, %f1;
	neg.ftz.f32 	%f10, %f9;
	setp.lt.ftz.f32 	%p4, %f7, %f10;
	@!%p4 bra 	$Lt_38_3074;
$L_38_2050:
	.loc	31	153	0
	ld.f32 	%f11, [%rd2+12];
	.loc	31	160	0
	ld.const.f32 	%f12, [k601YPbPr_To_RGB32f+24];
	ld.const.f32 	%f13, [k601YPbPr_To_RGB32f+12];
	ld.const.f32 	%f14, [k601YPbPr_To_RGB32f+0];
	st.v4.f32 	[%rd2+0], {%f12,%f13,%f14,%f11};
$Lt_38_3074:
$L_38_1794:
	.loc	31	162	0
	ret;
$LDWend__Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters:
	} // _Z23FastColorCorrector_MaskR8PixelRGBRiS1_iiRK28FastColorCorrectorParameters

	.visible .func _Z15GammaCorrectionR8PixelRGBRiS1_iiRK25GammaCorrectionParameters (.param .u64 __cudaparmf1__Z15GammaCorrectionR8PixelRGBRiS1_iiRK25GammaCorrectionParameters, .param .u64 __cudaparmf2__Z15GammaCorrectionR8PixelRGBRiS1_iiRK25GammaCorrectionParameters, .param .u64 __cudaparmf3__Z15GammaCorrectionR8PixelRGBRiS1_iiRK25GammaCorrectionParameters, .param .s32 __cudaparmf4__Z15GammaCorrectionR8PixelRGBRiS1_iiRK25GammaCorrectionParameters, .param .s32 __cudaparmf5__Z15GammaCorrectionR8PixelRGBRiS1_iiRK25GammaCorrectionParameters, .param .u64 __cudaparmf6__Z15GammaCorrectionR8PixelRGBRiS1_iiRK25GammaCorrectionParameters)
	{
	.reg .u64 %rd<6>;
	.reg .f32 %f<39>;
	.reg .pred %p<6>;
	.loc	32	41	0
$LDWbegin__Z15GammaCorrectionR8PixelRGBRiS1_iiRK25GammaCorrectionParameters:
	ld.param.u64 	%rd1, [__cudaparmf1__Z15GammaCorrectionR8PixelRGBRiS1_iiRK25GammaCorrectionParameters];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf6__Z15GammaCorrectionR8PixelRGBRiS1_iiRK25GammaCorrectionParameters];
	mov.s64 	%rd4, %rd3;
	.loc	32	42	0
	ld.f32 	%f1, [%rd4+0];
	ld.f32 	%f2, [%rd2+0];
	mov.f32 	%f3, 0f00000000;     	// 0
	setp.ge.ftz.f32 	%p1, %f2, %f3;
	@!%p1 bra 	$Lt_39_3586;
	.loc	22	292	0
	lg2.approx.ftz.f32 	%f4, %f2;
	mul.ftz.f32 	%f5, %f1, %f4;
	ex2.approx.ftz.f32 	%f6, %f5;
	bra.uni 	$Lt_39_3330;
$Lt_39_3586:
	neg.ftz.f32 	%f7, %f2;
	lg2.approx.ftz.f32 	%f8, %f7;
	mul.ftz.f32 	%f9, %f1, %f8;
	ex2.approx.ftz.f32 	%f10, %f9;
	neg.ftz.f32 	%f6, %f10;
$Lt_39_3330:
	ld.f32 	%f11, [%rd2+4];
	mov.f32 	%f12, 0f00000000;    	// 0
	setp.ge.ftz.f32 	%p2, %f11, %f12;
	@!%p2 bra 	$Lt_39_4098;
	.loc	22	293	0
	lg2.approx.ftz.f32 	%f13, %f11;
	mul.ftz.f32 	%f14, %f1, %f13;
	ex2.approx.ftz.f32 	%f15, %f14;
	bra.uni 	$Lt_39_3842;
$Lt_39_4098:
	neg.ftz.f32 	%f16, %f11;
	lg2.approx.ftz.f32 	%f17, %f16;
	mul.ftz.f32 	%f18, %f1, %f17;
	ex2.approx.ftz.f32 	%f19, %f18;
	neg.ftz.f32 	%f15, %f19;
$Lt_39_3842:
	ld.f32 	%f20, [%rd2+8];
	mov.f32 	%f21, 0f00000000;    	// 0
	setp.ge.ftz.f32 	%p3, %f20, %f21;
	@!%p3 bra 	$Lt_39_4610;
	.loc	22	294	0
	lg2.approx.ftz.f32 	%f22, %f20;
	mul.ftz.f32 	%f23, %f1, %f22;
	ex2.approx.ftz.f32 	%f24, %f23;
	bra.uni 	$Lt_39_4354;
$Lt_39_4610:
	neg.ftz.f32 	%f25, %f20;
	lg2.approx.ftz.f32 	%f26, %f25;
	mul.ftz.f32 	%f27, %f1, %f26;
	ex2.approx.ftz.f32 	%f28, %f27;
	neg.ftz.f32 	%f24, %f28;
$Lt_39_4354:
	ld.f32 	%f29, [%rd2+12];
	mov.f32 	%f30, 0f00000000;    	// 0
	setp.ge.ftz.f32 	%p4, %f29, %f30;
	@!%p4 bra 	$Lt_39_5122;
	.loc	22	295	0
	lg2.approx.ftz.f32 	%f31, %f29;
	mul.ftz.f32 	%f32, %f1, %f31;
	ex2.approx.ftz.f32 	%f33, %f32;
	bra.uni 	$Lt_39_4866;
$Lt_39_5122:
	neg.ftz.f32 	%f34, %f29;
	lg2.approx.ftz.f32 	%f35, %f34;
	mul.ftz.f32 	%f36, %f1, %f35;
	ex2.approx.ftz.f32 	%f37, %f36;
	neg.ftz.f32 	%f33, %f37;
$Lt_39_4866:
	st.v4.f32 	[%rd2+0], {%f6,%f15,%f24,%f33};
	.loc	32	43	0
	ret;
$LDWend__Z15GammaCorrectionR8PixelRGBRiS1_iiRK25GammaCorrectionParameters:
	} // _Z15GammaCorrectionR8PixelRGBRiS1_iiRK25GammaCorrectionParameters

	.visible .func (.param .align 16 .b8 __cudaretf__Z5PowerI8PixelRGBET_RKS1_f[16]) _Z5PowerI8PixelRGBET_RKS1_f (.param .u64 __cudaparmf1__Z5PowerI8PixelRGBET_RKS1_f, .param .f32 __cudaparmf2__Z5PowerI8PixelRGBET_RKS1_f)
	{
	.reg .u64 %rd<4>;
	.reg .f32 %f<44>;
	.reg .pred %p<6>;
	.loc	22	290	0
$LDWbegin__Z5PowerI8PixelRGBET_RKS1_f:
	ld.param.u64 	%rd1, [__cudaparmf1__Z5PowerI8PixelRGBET_RKS1_f];
	mov.s64 	%rd2, %rd1;
	ld.param.f32 	%f1, [__cudaparmf2__Z5PowerI8PixelRGBET_RKS1_f];
	mov.f32 	%f2, %f1;
	ld.f32 	%f3, [%rd2+0];
	mov.f32 	%f4, 0f00000000;     	// 0
	setp.ge.ftz.f32 	%p1, %f3, %f4;
	@!%p1 bra 	$Lt_40_3586;
	.loc	22	292	0
	lg2.approx.ftz.f32 	%f5, %f3;
	mul.ftz.f32 	%f6, %f2, %f5;
	ex2.approx.ftz.f32 	%f7, %f6;
	bra.uni 	$Lt_40_3330;
$Lt_40_3586:
	neg.ftz.f32 	%f8, %f3;
	lg2.approx.ftz.f32 	%f9, %f8;
	mul.ftz.f32 	%f10, %f2, %f9;
	ex2.approx.ftz.f32 	%f11, %f10;
	neg.ftz.f32 	%f7, %f11;
$Lt_40_3330:
	ld.f32 	%f12, [%rd2+4];
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.ge.ftz.f32 	%p2, %f12, %f13;
	@!%p2 bra 	$Lt_40_4098;
	.loc	22	293	0
	lg2.approx.ftz.f32 	%f14, %f12;
	mul.ftz.f32 	%f15, %f2, %f14;
	ex2.approx.ftz.f32 	%f16, %f15;
	bra.uni 	$Lt_40_3842;
$Lt_40_4098:
	neg.ftz.f32 	%f17, %f12;
	lg2.approx.ftz.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f2, %f18;
	ex2.approx.ftz.f32 	%f20, %f19;
	neg.ftz.f32 	%f16, %f20;
$Lt_40_3842:
	ld.f32 	%f21, [%rd2+8];
	mov.f32 	%f22, 0f00000000;    	// 0
	setp.ge.ftz.f32 	%p3, %f21, %f22;
	@!%p3 bra 	$Lt_40_4610;
	.loc	22	294	0
	lg2.approx.ftz.f32 	%f23, %f21;
	mul.ftz.f32 	%f24, %f2, %f23;
	ex2.approx.ftz.f32 	%f25, %f24;
	bra.uni 	$Lt_40_4354;
$Lt_40_4610:
	neg.ftz.f32 	%f26, %f21;
	lg2.approx.ftz.f32 	%f27, %f26;
	mul.ftz.f32 	%f28, %f2, %f27;
	ex2.approx.ftz.f32 	%f29, %f28;
	neg.ftz.f32 	%f25, %f29;
$Lt_40_4354:
	ld.f32 	%f30, [%rd2+12];
	mov.f32 	%f31, 0f00000000;    	// 0
	setp.ge.ftz.f32 	%p4, %f30, %f31;
	@!%p4 bra 	$Lt_40_5122;
	.loc	22	295	0
	lg2.approx.ftz.f32 	%f32, %f30;
	mul.ftz.f32 	%f33, %f2, %f32;
	ex2.approx.ftz.f32 	%f34, %f33;
	bra.uni 	$Lt_40_4866;
$Lt_40_5122:
	neg.ftz.f32 	%f35, %f30;
	lg2.approx.ftz.f32 	%f36, %f35;
	mul.ftz.f32 	%f37, %f2, %f36;
	ex2.approx.ftz.f32 	%f38, %f37;
	neg.ftz.f32 	%f34, %f38;
$Lt_40_4866:
	.loc	22	296	0
	mov.f32 	%f39, %f7;
	st.param.f32 	[__cudaretf__Z5PowerI8PixelRGBET_RKS1_f+0], %f39;
	mov.f32 	%f40, %f16;
	st.param.f32 	[__cudaretf__Z5PowerI8PixelRGBET_RKS1_f+4], %f40;
	mov.f32 	%f41, %f25;
	st.param.f32 	[__cudaretf__Z5PowerI8PixelRGBET_RKS1_f+8], %f41;
	mov.f32 	%f42, %f34;
	st.param.f32 	[__cudaretf__Z5PowerI8PixelRGBET_RKS1_f+12], %f42;
	ret;
$LDWend__Z5PowerI8PixelRGBET_RKS1_f:
	} // _Z5PowerI8PixelRGBET_RKS1_f

	.visible .func _Z14HorizontalFlipR8PixelRGBRiS1_ii (.param .u64 __cudaparmf1__Z14HorizontalFlipR8PixelRGBRiS1_ii, .param .u64 __cudaparmf2__Z14HorizontalFlipR8PixelRGBRiS1_ii, .param .u64 __cudaparmf3__Z14HorizontalFlipR8PixelRGBRiS1_ii, .param .s32 __cudaparmf4__Z14HorizontalFlipR8PixelRGBRiS1_ii, .param .s32 __cudaparmf5__Z14HorizontalFlipR8PixelRGBRiS1_ii)
	{
	.reg .u32 %r<7>;
	.reg .u64 %rd<4>;
	.loc	33	40	0
$LDWbegin__Z14HorizontalFlipR8PixelRGBRiS1_ii:
	ld.param.u64 	%rd1, [__cudaparmf2__Z14HorizontalFlipR8PixelRGBRiS1_ii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf4__Z14HorizontalFlipR8PixelRGBRiS1_ii];
	mov.s32 	%r2, %r1;
	.loc	33	41	0
	ld.s32 	%r3, [%rd2+0];
	sub.s32 	%r4, %r2, %r3;
	sub.s32 	%r5, %r4, 1;
	st.s32 	[%rd2+0], %r5;
	.loc	33	42	0
	ret;
$LDWend__Z14HorizontalFlipR8PixelRGBRiS1_ii:
	} // _Z14HorizontalFlipR8PixelRGBRiS1_ii

	.visible .func _Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters (.param .u64 __cudaparmf1__Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters, .param .u64 __cudaparmf2__Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters, .param .u64 __cudaparmf3__Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters, .param .s32 __cudaparmf4__Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters, .param .s32 __cudaparmf5__Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters, .param .u64 __cudaparmf6__Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters)
	{
	.reg .u32 %r<185>;
	.reg .u64 %rd<10>;
	.reg .f32 %f<54>;
	.reg .pred %p<4>;
	.loc	34	41	0
$LDWbegin__Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters:
	ld.param.u64 	%rd1, [__cudaparmf1__Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf2__Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters];
	mov.s64 	%rd4, %rd3;
	ld.param.u64 	%rd5, [__cudaparmf3__Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters];
	mov.s64 	%rd6, %rd5;
	ld.param.u64 	%rd7, [__cudaparmf6__Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters];
	mov.s64 	%rd8, %rd7;
	ld.u32 	%r1, [%rd6+0];
	ld.f32 	%f1, [%rd8+12];
	ld.u32 	%r2, [%rd4+0];
	ld.f32 	%f2, [%rd8+0];
	mov.f32 	%f3, 0f3f000000;     	// 0.5
	mul.ftz.f32 	%f4, %f2, %f3;
	ld.f32 	%f5, [%rd8+4];
	mov.f32 	%f6, 0f3f800000;     	// 1
	setp.eq.ftz.f32 	%p1, %f5, %f6;
	@!%p1 bra 	$Lt_42_6146;
	.loc	34	48	0
	cvt.rzi.ftz.u32.f32 	%r3, %f1;
	.loc	20	139	0
	mul.lo.s32 	%r4, %r2, 3;
	sub.u32 	%r5, %r4, %r1;
	sub.u32 	%r6, %r5, %r3;
	shr.u32 	%r7, %r3, 13;
	xor.b32 	%r8, %r6, %r7;
	.loc	20	140	0
	sub.u32 	%r9, %r1, %r3;
	sub.u32 	%r10, %r9, %r8;
	shl.b32 	%r11, %r8, 8;
	xor.b32 	%r12, %r10, %r11;
	.loc	20	141	0
	sub.u32 	%r13, %r3, %r8;
	sub.u32 	%r14, %r13, %r12;
	shr.u32 	%r15, %r12, 13;
	xor.b32 	%r16, %r14, %r15;
	.loc	20	142	0
	sub.u32 	%r17, %r8, %r12;
	sub.u32 	%r18, %r17, %r16;
	shr.u32 	%r19, %r16, 12;
	xor.b32 	%r20, %r18, %r19;
	.loc	20	143	0
	sub.u32 	%r21, %r12, %r16;
	sub.u32 	%r22, %r21, %r20;
	shl.b32 	%r23, %r20, 16;
	xor.b32 	%r24, %r22, %r23;
	.loc	20	144	0
	sub.u32 	%r25, %r16, %r20;
	sub.u32 	%r26, %r25, %r24;
	shr.u32 	%r27, %r24, 5;
	xor.b32 	%r28, %r26, %r27;
	.loc	20	145	0
	sub.u32 	%r29, %r20, %r24;
	sub.u32 	%r30, %r29, %r28;
	shr.u32 	%r31, %r28, 3;
	xor.b32 	%r32, %r30, %r31;
	.loc	20	146	0
	sub.u32 	%r33, %r24, %r28;
	sub.u32 	%r34, %r33, %r32;
	shl.b32 	%r35, %r32, 10;
	xor.b32 	%r36, %r34, %r35;
	.loc	20	147	0
	sub.u32 	%r37, %r28, %r32;
	sub.u32 	%r38, %r37, %r36;
	shr.u32 	%r39, %r36, 15;
	xor.b32 	%r40, %r38, %r39;
	.loc	34	48	0
	mul.lo.u32 	%r41, %r40, 1103515245;
	add.u32 	%r42, %r41, 12345;
	shr.u32 	%r43, %r42, 16;
	and.b32 	%r44, %r43, 255;
	shl.b32 	%r45, %r44, 7;
	mul.lo.u32 	%r46, %r40, -1029531031;
	sub.u32 	%r47, %r46, 740551042;
	shr.u32 	%r48, %r47, 16;
	and.b32 	%r49, %r48, 255;
	xor.b32 	%r50, %r45, %r49;
	cvt.rn.f32.s32 	%f7, %r50;
	mov.f32 	%f8, 0f46fffe00;     	// 32767
	div.approx.ftz.f32 	%f9, %f7, %f8;
	mul.ftz.f32 	%f10, %f2, %f9;
	sub.ftz.f32 	%f11, %f10, %f4;
	.loc	20	139	0
	add.u32 	%r51, %r6, 1;
	xor.b32 	%r52, %r7, %r51;
	.loc	20	140	0
	sub.u32 	%r53, %r9, %r52;
	shl.b32 	%r54, %r52, 8;
	xor.b32 	%r55, %r53, %r54;
	.loc	20	141	0
	sub.u32 	%r56, %r3, %r52;
	sub.u32 	%r57, %r56, %r55;
	shr.u32 	%r58, %r55, 13;
	xor.b32 	%r59, %r57, %r58;
	.loc	20	142	0
	sub.u32 	%r60, %r52, %r55;
	sub.u32 	%r61, %r60, %r59;
	shr.u32 	%r62, %r59, 12;
	xor.b32 	%r63, %r61, %r62;
	.loc	20	143	0
	sub.u32 	%r64, %r55, %r59;
	sub.u32 	%r65, %r64, %r63;
	shl.b32 	%r66, %r63, 16;
	xor.b32 	%r67, %r65, %r66;
	.loc	20	144	0
	sub.u32 	%r68, %r59, %r63;
	sub.u32 	%r69, %r68, %r67;
	shr.u32 	%r70, %r67, 5;
	xor.b32 	%r71, %r69, %r70;
	.loc	20	145	0
	sub.u32 	%r72, %r63, %r67;
	sub.u32 	%r73, %r72, %r71;
	shr.u32 	%r74, %r71, 3;
	xor.b32 	%r75, %r73, %r74;
	.loc	20	146	0
	sub.u32 	%r76, %r67, %r71;
	sub.u32 	%r77, %r76, %r75;
	shl.b32 	%r78, %r75, 10;
	xor.b32 	%r79, %r77, %r78;
	.loc	20	147	0
	sub.u32 	%r80, %r71, %r75;
	sub.u32 	%r81, %r80, %r79;
	shr.u32 	%r82, %r79, 15;
	xor.b32 	%r83, %r81, %r82;
	.loc	34	49	0
	mul.lo.u32 	%r84, %r83, 1103515245;
	add.u32 	%r85, %r84, 12345;
	shr.u32 	%r86, %r85, 16;
	and.b32 	%r87, %r86, 255;
	shl.b32 	%r88, %r87, 7;
	mul.lo.u32 	%r89, %r83, -1029531031;
	sub.u32 	%r90, %r89, 740551042;
	shr.u32 	%r91, %r90, 16;
	and.b32 	%r92, %r91, 255;
	xor.b32 	%r93, %r88, %r92;
	cvt.rn.f32.s32 	%f12, %r93;
	mov.f32 	%f13, 0f46fffe00;    	// 32767
	div.approx.ftz.f32 	%f14, %f12, %f13;
	mul.ftz.f32 	%f15, %f2, %f14;
	sub.ftz.f32 	%f16, %f15, %f4;
	.loc	20	139	0
	add.u32 	%r94, %r6, 2;
	xor.b32 	%r95, %r7, %r94;
	.loc	20	140	0
	sub.u32 	%r96, %r9, %r95;
	shl.b32 	%r97, %r95, 8;
	xor.b32 	%r98, %r96, %r97;
	.loc	20	141	0
	sub.u32 	%r99, %r3, %r95;
	sub.u32 	%r100, %r99, %r98;
	shr.u32 	%r101, %r98, 13;
	xor.b32 	%r102, %r100, %r101;
	.loc	20	142	0
	sub.u32 	%r103, %r95, %r98;
	sub.u32 	%r104, %r103, %r102;
	shr.u32 	%r105, %r102, 12;
	xor.b32 	%r106, %r104, %r105;
	.loc	20	143	0
	sub.u32 	%r107, %r98, %r102;
	sub.u32 	%r108, %r107, %r106;
	shl.b32 	%r109, %r106, 16;
	xor.b32 	%r110, %r108, %r109;
	.loc	20	144	0
	sub.u32 	%r111, %r102, %r106;
	sub.u32 	%r112, %r111, %r110;
	shr.u32 	%r113, %r110, 5;
	xor.b32 	%r114, %r112, %r113;
	.loc	20	145	0
	sub.u32 	%r115, %r106, %r110;
	sub.u32 	%r116, %r115, %r114;
	shr.u32 	%r117, %r114, 3;
	xor.b32 	%r118, %r116, %r117;
	.loc	20	146	0
	sub.u32 	%r119, %r110, %r114;
	sub.u32 	%r120, %r119, %r118;
	shl.b32 	%r121, %r118, 10;
	xor.b32 	%r122, %r120, %r121;
	.loc	20	147	0
	sub.u32 	%r123, %r114, %r118;
	sub.u32 	%r124, %r123, %r122;
	shr.u32 	%r125, %r122, 15;
	xor.b32 	%r126, %r124, %r125;
	.loc	34	50	0
	mul.lo.u32 	%r127, %r126, 1103515245;
	add.u32 	%r128, %r127, 12345;
	shr.u32 	%r129, %r128, 16;
	and.b32 	%r130, %r129, 255;
	shl.b32 	%r131, %r130, 7;
	mul.lo.u32 	%r132, %r126, -1029531031;
	sub.u32 	%r133, %r132, 740551042;
	shr.u32 	%r134, %r133, 16;
	and.b32 	%r135, %r134, 255;
	xor.b32 	%r136, %r131, %r135;
	cvt.rn.f32.s32 	%f17, %r136;
	mov.f32 	%f18, 0f46fffe00;    	// 32767
	div.approx.ftz.f32 	%f19, %f17, %f18;
	mul.ftz.f32 	%f20, %f2, %f19;
	sub.ftz.f32 	%f21, %f20, %f4;
	bra.uni 	$Lt_42_5890;
$Lt_42_6146:
	.loc	34	54	0
	cvt.rzi.ftz.u32.f32 	%r137, %f1;
	.loc	20	139	0
	shr.u32 	%r138, %r137, 13;
	sub.u32 	%r139, %r2, %r1;
	sub.u32 	%r140, %r139, %r137;
	xor.b32 	%r141, %r138, %r140;
	.loc	20	140	0
	sub.u32 	%r142, %r1, %r137;
	sub.u32 	%r143, %r142, %r141;
	shl.b32 	%r144, %r141, 8;
	xor.b32 	%r145, %r143, %r144;
	.loc	20	141	0
	sub.u32 	%r146, %r137, %r141;
	sub.u32 	%r147, %r146, %r145;
	shr.u32 	%r148, %r145, 13;
	xor.b32 	%r149, %r147, %r148;
	.loc	20	142	0
	sub.u32 	%r150, %r141, %r145;
	sub.u32 	%r151, %r150, %r149;
	shr.u32 	%r152, %r149, 12;
	xor.b32 	%r153, %r151, %r152;
	.loc	20	143	0
	sub.u32 	%r154, %r145, %r149;
	sub.u32 	%r155, %r154, %r153;
	shl.b32 	%r156, %r153, 16;
	xor.b32 	%r157, %r155, %r156;
	.loc	20	144	0
	sub.u32 	%r158, %r149, %r153;
	sub.u32 	%r159, %r158, %r157;
	shr.u32 	%r160, %r157, 5;
	xor.b32 	%r161, %r159, %r160;
	.loc	20	145	0
	sub.u32 	%r162, %r153, %r157;
	sub.u32 	%r163, %r162, %r161;
	shr.u32 	%r164, %r161, 3;
	xor.b32 	%r165, %r163, %r164;
	.loc	20	146	0
	sub.u32 	%r166, %r157, %r161;
	sub.u32 	%r167, %r166, %r165;
	shl.b32 	%r168, %r165, 10;
	xor.b32 	%r169, %r167, %r168;
	.loc	20	147	0
	sub.u32 	%r170, %r161, %r165;
	sub.u32 	%r171, %r170, %r169;
	shr.u32 	%r172, %r169, 15;
	xor.b32 	%r173, %r171, %r172;
	.loc	34	54	0
	mul.lo.u32 	%r174, %r173, 1103515245;
	add.u32 	%r175, %r174, 12345;
	shr.u32 	%r176, %r175, 16;
	and.b32 	%r177, %r176, 255;
	shl.b32 	%r178, %r177, 7;
	mul.lo.u32 	%r179, %r173, -1029531031;
	sub.u32 	%r180, %r179, 740551042;
	shr.u32 	%r181, %r180, 16;
	and.b32 	%r182, %r181, 255;
	xor.b32 	%r183, %r178, %r182;
	cvt.rn.f32.s32 	%f22, %r183;
	mov.f32 	%f23, 0f46fffe00;    	// 32767
	div.approx.ftz.f32 	%f24, %f22, %f23;
	mul.ftz.f32 	%f25, %f2, %f24;
	sub.ftz.f32 	%f21, %f25, %f4;
	mov.f32 	%f16, %f21;
	mov.f32 	%f11, %f21;
$Lt_42_5890:
	ld.v4.f32 	{%f26,%f27,%f28,_}, [%rd2+0];
	.loc	34	57	0
	add.ftz.f32 	%f29, %f28, %f11;
	st.f32 	[%rd2+8], %f29;
	.loc	34	58	0
	add.ftz.f32 	%f30, %f27, %f16;
	.loc	34	59	0
	add.ftz.f32 	%f31, %f26, %f21;
	st.v2.f32 	[%rd2+0], {%f31,%f30};
	ld.f32 	%f32, [%rd8+8];
	mov.f32 	%f33, 0f3f800000;    	// 1
	setp.eq.ftz.f32 	%p2, %f32, %f33;
	@!%p2 bra 	$Lt_42_6402;
	.loc	22	345	0
	ld.f32 	%f34, [%rd2+4];
	.loc	22	346	0
	ld.f32 	%f35, [%rd2+8];
	.loc	22	347	0
	ld.f32 	%f36, [%rd2+12];
	.loc	34	63	0
	mov.f32 	%f37, 0f00000000;    	// 0
	max.ftz.f32 	%f38, %f31, %f37;
	mov.f32 	%f39, 0f3f800000;    	// 1
	min.ftz.f32 	%f40, %f38, %f39;
	mov.f32 	%f41, 0f00000000;    	// 0
	max.ftz.f32 	%f42, %f34, %f41;
	mov.f32 	%f43, 0f3f800000;    	// 1
	min.ftz.f32 	%f44, %f42, %f43;
	mov.f32 	%f45, 0f00000000;    	// 0
	max.ftz.f32 	%f46, %f35, %f45;
	mov.f32 	%f47, 0f3f800000;    	// 1
	min.ftz.f32 	%f48, %f46, %f47;
	mov.f32 	%f49, 0f00000000;    	// 0
	max.ftz.f32 	%f50, %f36, %f49;
	mov.f32 	%f51, 0f3f800000;    	// 1
	min.ftz.f32 	%f52, %f50, %f51;
	st.v4.f32 	[%rd2+0], {%f40,%f44,%f48,%f52};
$Lt_42_6402:
	.loc	34	65	0
	ret;
$LDWend__Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters:
	} // _Z5NoiseR8PixelRGBRiS1_iiRK15NoiseParameters

	.visible .func _Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters (.param .u64 __cudaparmf1__Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters, .param .u64 __cudaparmf2__Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters, .param .u64 __cudaparmf3__Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters, .param .s32 __cudaparmf4__Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters, .param .s32 __cudaparmf5__Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters, .param .u64 __cudaparmf6__Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters)
	{
	.reg .u32 %r<5>;
	.reg .u64 %rd<8>;
	.reg .f32 %f<56>;
	.reg .pred %p<3>;
	.loc	35	45	0
$LDWbegin__Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters:
	ld.param.u64 	%rd1, [__cudaparmf1__Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf2__Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters];
	mov.s64 	%rd4, %rd3;
	ld.param.u32 	%r1, [__cudaparmf4__Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters];
	mov.s32 	%r2, %r1;
	ld.param.u64 	%rd5, [__cudaparmf6__Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters];
	mov.s64 	%rd6, %rd5;
	ld.s32 	%r3, [%rd4+0];
	cvt.rn.f32.s32 	%f1, %r3;
	cvt.rn.f32.s32 	%f2, %r2;
	ld.f32 	%f3, [%rd6+16];
	mul.ftz.f32 	%f4, %f2, %f3;
	setp.lt.ftz.f32 	%p1, %f1, %f4;
	@!%p1 bra 	$Lt_43_1026;
	ld.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd2+0];
	.loc	35	52	0
	ld.f32 	%f9, [%rd6+0];
	ld.f32 	%f10, [%rd6+4];
	ld.const.f32 	%f11, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f12, %f11, %f6;
	ld.const.f32 	%f13, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f14, %f13, %f7, %f12;
	ld.const.f32 	%f15, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f16, %f15, %f5, %f14;
	fma.rn.ftz.f32 	%f17, %f10, %f16, %f9;
	.loc	35	53	0
	ld.f32 	%f18, [%rd6+12];
	ld.const.f32 	%f19, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f20, %f19, %f6;
	ld.f32 	%f21, [%rd6+8];
	ld.const.f32 	%f22, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f23, %f22, %f6;
	ld.const.f32 	%f24, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f25, %f24, %f7, %f20;
	ld.const.f32 	%f26, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f27, %f26, %f7, %f23;
	ld.const.f32 	%f28, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f29, %f28, %f5, %f25;
	ld.const.f32 	%f30, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f31, %f30, %f5, %f27;
	mul.ftz.f32 	%f32, %f18, %f29;
	mul.ftz.f32 	%f33, %f21, %f31;
	sub.ftz.f32 	%f34, %f33, %f32;
	.loc	35	54	0
	mul.ftz.f32 	%f35, %f18, %f31;
	fma.rn.ftz.f32 	%f36, %f21, %f29, %f35;
	.loc	35	56	0
	ld.const.f32 	%f37, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f38, %f37, %f34;
	ld.const.f32 	%f39, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f40, %f39, %f17, %f38;
	ld.const.f32 	%f41, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f42, %f41, %f36, %f40;
	ld.const.f32 	%f43, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f44, %f43, %f34;
	ld.const.f32 	%f45, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f46, %f45, %f17, %f44;
	ld.const.f32 	%f47, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f48, %f47, %f36, %f46;
	ld.const.f32 	%f49, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f50, %f49, %f34;
	ld.const.f32 	%f51, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f52, %f51, %f17, %f50;
	ld.const.f32 	%f53, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f54, %f53, %f36, %f52;
	st.v4.f32 	[%rd2+0], {%f42,%f48,%f54,%f8};
$Lt_43_1026:
	.loc	35	58	0
	ret;
$LDWend__Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters:
	} // _Z7ProcAmpR8PixelRGBRiS1_iiRK17ProcAmpParameters

	.visible .func _Z4TintR8PixelRGBRiS1_iiRK14TintParameters (.param .u64 __cudaparmf1__Z4TintR8PixelRGBRiS1_iiRK14TintParameters, .param .u64 __cudaparmf2__Z4TintR8PixelRGBRiS1_iiRK14TintParameters, .param .u64 __cudaparmf3__Z4TintR8PixelRGBRiS1_iiRK14TintParameters, .param .s32 __cudaparmf4__Z4TintR8PixelRGBRiS1_iiRK14TintParameters, .param .s32 __cudaparmf5__Z4TintR8PixelRGBRiS1_iiRK14TintParameters, .param .u64 __cudaparmf6__Z4TintR8PixelRGBRiS1_iiRK14TintParameters)
	{
	.reg .u64 %rd<6>;
	.reg .f32 %f<34>;
	.loc	36	42	0
$LDWbegin__Z4TintR8PixelRGBRiS1_iiRK14TintParameters:
	ld.param.u64 	%rd1, [__cudaparmf1__Z4TintR8PixelRGBRiS1_iiRK14TintParameters];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf6__Z4TintR8PixelRGBRiS1_iiRK14TintParameters];
	mov.s64 	%rd4, %rd3;
	ld.v4.f32 	{%f1,%f2,%f3,_}, [%rd2+0];
	.loc	36	46	0
	ld.f32 	%f4, [%rd4+4];
	ld.const.f32 	%f5, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f6, %f5, %f2;
	ld.const.f32 	%f7, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f8, %f7, %f3, %f6;
	ld.const.f32 	%f9, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f10, %f9, %f1, %f8;
	ld.f32 	%f11, [%rd4+16];
	sub.ftz.f32 	%f12, %f11, %f4;
	fma.rn.ftz.f32 	%f13, %f10, %f12, %f4;
	.loc	36	47	0
	ld.f32 	%f14, [%rd4+8];
	ld.f32 	%f15, [%rd4+20];
	sub.ftz.f32 	%f16, %f15, %f14;
	fma.rn.ftz.f32 	%f17, %f10, %f16, %f14;
	.loc	36	49	0
	ld.f32 	%f18, [%rd4+0];
	ld.f32 	%f19, [%rd4+24];
	ld.f32 	%f20, [%rd4+12];
	sub.ftz.f32 	%f21, %f20, %f18;
	fma.rn.ftz.f32 	%f22, %f10, %f21, %f18;
	sub.ftz.f32 	%f23, %f22, %f1;
	fma.rn.ftz.f32 	%f24, %f19, %f23, %f1;
	.loc	36	50	0
	ld.f32 	%f25, [%rd2+4];
	ld.f32 	%f26, [%rd4+24];
	sub.ftz.f32 	%f27, %f13, %f25;
	fma.rn.ftz.f32 	%f28, %f26, %f27, %f25;
	st.v2.f32 	[%rd2+0], {%f24,%f28};
	.loc	36	51	0
	ld.f32 	%f29, [%rd2+8];
	ld.f32 	%f30, [%rd4+24];
	sub.ftz.f32 	%f31, %f17, %f29;
	fma.rn.ftz.f32 	%f32, %f30, %f31, %f29;
	st.f32 	[%rd2+8], %f32;
	.loc	36	52	0
	ret;
$LDWend__Z4TintR8PixelRGBRiS1_iiRK14TintParameters:
	} // _Z4TintR8PixelRGBRiS1_iiRK14TintParameters

	.visible .func _Z12VerticalFlipR8PixelRGBRiS1_ii (.param .u64 __cudaparmf1__Z12VerticalFlipR8PixelRGBRiS1_ii, .param .u64 __cudaparmf2__Z12VerticalFlipR8PixelRGBRiS1_ii, .param .u64 __cudaparmf3__Z12VerticalFlipR8PixelRGBRiS1_ii, .param .s32 __cudaparmf4__Z12VerticalFlipR8PixelRGBRiS1_ii, .param .s32 __cudaparmf5__Z12VerticalFlipR8PixelRGBRiS1_ii)
	{
	.reg .u32 %r<7>;
	.reg .u64 %rd<4>;
	.loc	37	40	0
$LDWbegin__Z12VerticalFlipR8PixelRGBRiS1_ii:
	ld.param.u64 	%rd1, [__cudaparmf3__Z12VerticalFlipR8PixelRGBRiS1_ii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf5__Z12VerticalFlipR8PixelRGBRiS1_ii];
	mov.s32 	%r2, %r1;
	.loc	37	41	0
	ld.s32 	%r3, [%rd2+0];
	sub.s32 	%r4, %r2, %r3;
	sub.s32 	%r5, %r4, 1;
	st.s32 	[%rd2+0], %r5;
	.loc	37	42	0
	ret;
$LDWend__Z12VerticalFlipR8PixelRGBRiS1_ii:
	} // _Z12VerticalFlipR8PixelRGBRiS1_ii

	.visible .func _Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter (.param .u64 __cudaparmf1__Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter, .param .u64 __cudaparmf2__Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter, .param .u64 __cudaparmf3__Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter, .param .s32 __cudaparmf4__Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter, .param .s32 __cudaparmf5__Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter, .param .u64 __cudaparmf6__Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter)
	{
	.reg .u32 %r<307>;
	.reg .u64 %rd<10>;
	.reg .f32 %f<519>;
	.reg .pred %p<61>;
	.loc	38	53	0
$LDWbegin__Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter:
	ld.param.u64 	%rd1, [__cudaparmf1__Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf2__Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter];
	mov.s64 	%rd4, %rd3;
	ld.param.u64 	%rd5, [__cudaparmf3__Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter];
	mov.s64 	%rd6, %rd5;
	ld.param.u32 	%r1, [__cudaparmf4__Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf5__Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter];
	mov.s32 	%r4, %r3;
	ld.param.u64 	%rd7, [__cudaparmf6__Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter];
	mov.s64 	%rd8, %rd7;
	.loc	38	54	0
	ld.s32 	%r5, [%rd8+0];
	mov.u32 	%r6, 0;
	setp.eq.s32 	%p1, %r5, %r6;
	@%p1 bra 	$Lt_46_258;
	mov.u32 	%r7, 1;
	setp.eq.s32 	%p2, %r5, %r7;
	@%p2 bra 	$Lt_46_770;
	mov.u32 	%r8, 2;
	setp.eq.s32 	%p3, %r5, %r8;
	@%p3 bra 	$Lt_46_1026;
	mov.u32 	%r9, 3;
	setp.eq.s32 	%p4, %r5, %r9;
	@%p4 bra 	$Lt_46_1282;
	mov.u32 	%r10, 4;
	setp.eq.s32 	%p5, %r5, %r10;
	@%p5 bra 	$Lt_46_1538;
	mov.u32 	%r11, 5;
	setp.eq.s32 	%p6, %r5, %r11;
	@%p6 bra 	$Lt_46_1794;
	mov.u32 	%r12, 6;
	setp.eq.s32 	%p7, %r5, %r12;
	@%p7 bra 	$Lt_46_2050;
	mov.u32 	%r13, 7;
	setp.eq.s32 	%p8, %r5, %r13;
	@%p8 bra 	$Lt_46_2306;
	mov.u32 	%r14, 8;
	setp.eq.s32 	%p9, %r5, %r14;
	@%p9 bra 	$Lt_46_2562;
	mov.u32 	%r15, 9;
	setp.eq.s32 	%p10, %r5, %r15;
	@%p10 bra 	$Lt_46_2818;
	mov.u32 	%r16, 10;
	setp.eq.s32 	%p11, %r5, %r16;
	@%p11 bra 	$Lt_46_3074;
	mov.u32 	%r17, 11;
	setp.eq.s32 	%p12, %r5, %r17;
	@%p12 bra 	$Lt_46_3330;
	mov.u32 	%r18, 12;
	setp.eq.s32 	%p13, %r5, %r18;
	@%p13 bra 	$Lt_46_3586;
	mov.u32 	%r19, 13;
	setp.eq.s32 	%p14, %r5, %r19;
	@%p14 bra 	$Lt_46_3842;
	mov.u32 	%r20, 14;
	setp.eq.s32 	%p15, %r5, %r20;
	@%p15 bra 	$Lt_46_4098;
	mov.u32 	%r21, 15;
	setp.eq.s32 	%p16, %r5, %r21;
	@%p16 bra 	$Lt_46_4354;
	mov.u32 	%r22, 16;
	setp.eq.s32 	%p17, %r5, %r22;
	@%p17 bra 	$Lt_46_4610;
	bra.uni 	$Lt_46_514;
$Lt_46_258:
	.loc	21	42	0
	ld.f32 	%f1, [%rd2+12];
	cvt.ftz.sat.f32.f32 	%f2, %f1;
	st.f32 	[%rd2+12], %f2;
	ld.f32 	%f3, [%rd8+8];
	mov.f32 	%f4, 0f3f800000;     	// 1
	setp.eq.ftz.f32 	%p18, %f3, %f4;
	ld.f32 	%f5, [%rd8+16];
	mov.f32 	%f6, 0f3f800000;     	// 1
	setp.eq.ftz.f32 	%p19, %f5, %f6;
	@!%p19 bra 	$Lt_46_51970;
	ld.f32 	%f7, [%rd8+4];
	@!%p18 bra 	$Lt_46_52482;
	.loc	21	47	0
	mov.f32 	%f2, %f7;
	st.f32 	[%rd2+12], %f7;
	bra.uni 	$Lt_46_52226;
$Lt_46_52482:
	.loc	21	51	0
	mul.ftz.f32 	%f2, %f2, %f7;
	st.f32 	[%rd2+12], %f2;
$Lt_46_52226:
	ld.f32 	%f8, [%rd8+12];
	mov.f32 	%f9, 0f3f800000;     	// 1
	setp.eq.ftz.f32 	%p20, %f8, %f9;
	@!%p20 bra 	$Lt_46_52738;
	.loc	21	55	0
	mov.f32 	%f10, 0f3f800000;    	// 1
	sub.ftz.f32 	%f2, %f10, %f2;
	st.f32 	[%rd2+12], %f2;
$Lt_46_52738:
	.loc	21	57	0
	mov.f32 	%f11, %f2;
	st.v2.f32 	[%rd2+0], {%f11,%f11};
	st.f32 	[%rd2+8], %f11;
	bra.uni 	$Lt_46_514;
$Lt_46_51970:
	@!%p18 bra 	$Lt_46_53506;
	.loc	21	61	0
	ld.f32 	%f12, [%rd8+4];
	st.f32 	[%rd2+12], %f12;
	ld.f32 	%f13, [%rd8+12];
	mov.f32 	%f14, 0f3f800000;    	// 1
	setp.eq.ftz.f32 	%p21, %f13, %f14;
	@!%p21 bra 	$Lt_46_514;
	.loc	21	64	0
	mov.f32 	%f15, 0f3f800000;    	// 1
	sub.ftz.f32 	%f16, %f15, %f12;
	st.f32 	[%rd2+12], %f16;
	bra.uni 	$Lt_46_514;
$Lt_46_53506:
	ld.f32 	%f17, [%rd8+12];
	mov.f32 	%f18, 0f3f800000;    	// 1
	setp.eq.ftz.f32 	%p22, %f17, %f18;
	@!%p22 bra 	$Lt_46_54274;
	.loc	21	71	0
	mov.f32 	%f19, 0f3f800000;    	// 1
	sub.ftz.f32 	%f2, %f19, %f2;
	st.f32 	[%rd2+12], %f2;
$Lt_46_54274:
	.loc	21	73	0
	ld.f32 	%f20, [%rd8+4];
	mul.ftz.f32 	%f21, %f20, %f2;
	st.f32 	[%rd2+12], %f21;
	bra.uni 	$Lt_46_514;
$Lt_46_770:
	ld.v4.f32 	{%f22,%f23,%f24,%f25}, [%rd2+0];
	.loc	23	44	0
	ld.const.f32 	%f26, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f27, %f26, %f23;
	ld.const.f32 	%f28, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f29, %f28, %f24, %f27;
	ld.const.f32 	%f30, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f31, %f30, %f22, %f29;
	ld.const.f32 	%f32, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f33, %f32, %f31;
	ld.const.f32 	%f34, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f35, %f34, %f31;
	ld.const.f32 	%f36, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f37, %f36, %f31;
	st.v4.f32 	[%rd2+0], {%f33,%f35,%f37,%f25};
	.loc	38	61	0
	bra.uni 	$Lt_46_514;
$Lt_46_1026:
	.loc	38	63	0
	ld.f32 	%f38, [%rd8+12];
	ld.f32 	%f24, [%rd2+8];
	ld.f32 	%f39, [%rd8+4];
	mov.f32 	%f40, 0f00000000;    	// 0
	setp.neu.ftz.f32 	%p23, %f39, %f40;
	@!%p23 bra 	$Lt_46_55042;
	.loc	24	44	0
	ld.f32 	%f41, [%rd8+20];
	ld.f32 	%f42, [%rd8+8];
	sub.ftz.f32 	%f43, %f24, %f38;
	fma.rn.ftz.f32 	%f44, %f42, %f43, %f41;
	st.f32 	[%rd2+8], %f44;
	.loc	24	45	0
	ld.f32 	%f45, [%rd8+20];
	ld.f32 	%f46, [%rd8+8];
	ld.v2.f32 	{%f47,%f48}, [%rd2+0];
	ld.f32 	%f49, [%rd8+12];
	sub.ftz.f32 	%f50, %f48, %f49;
	fma.rn.ftz.f32 	%f51, %f46, %f50, %f45;
	.loc	24	46	0
	ld.f32 	%f52, [%rd8+20];
	ld.f32 	%f53, [%rd8+8];
	ld.f32 	%f54, [%rd8+12];
	sub.ftz.f32 	%f55, %f47, %f54;
	fma.rn.ftz.f32 	%f56, %f53, %f55, %f52;
	st.v2.f32 	[%rd2+0], {%f56,%f51};
	bra.uni 	$Lt_46_514;
$Lt_46_55042:
	setp.lt.ftz.f32 	%p24, %f38, %f24;
	@!%p24 bra 	$Lt_46_55554;
	.loc	24	50	0
	ld.f32 	%f57, [%rd8+24];
	bra.uni 	$Lt_46_55298;
$Lt_46_55554:
	ld.f32 	%f57, [%rd8+20];
$Lt_46_55298:
	st.f32 	[%rd2+8], %f57;
	ld.f32 	%f58, [%rd2+4];
	ld.f32 	%f59, [%rd8+12];
	setp.gt.ftz.f32 	%p25, %f58, %f59;
	@!%p25 bra 	$Lt_46_56066;
	.loc	24	51	0
	ld.f32 	%f60, [%rd8+24];
	bra.uni 	$Lt_46_55810;
$Lt_46_56066:
	ld.f32 	%f60, [%rd8+20];
$Lt_46_55810:
	st.f32 	[%rd2+4], %f60;
	ld.f32 	%f61, [%rd2+0];
	ld.f32 	%f62, [%rd8+12];
	setp.gt.ftz.f32 	%p26, %f61, %f62;
	@!%p26 bra 	$Lt_46_56578;
	.loc	24	52	0
	ld.f32 	%f63, [%rd8+24];
	bra.uni 	$Lt_46_56322;
$Lt_46_56578:
	ld.f32 	%f63, [%rd8+20];
$Lt_46_56322:
	st.f32 	[%rd2+0], %f63;
	bra.uni 	$Lt_46_514;
$Lt_46_1282:
	.loc	22	280	0
	ld.f32 	%f64, [%rd2+4];
	ld.f32 	%f65, [%rd8+8];
	mul.ftz.f32 	%f66, %f64, %f65;
	.loc	22	281	0
	ld.f32 	%f67, [%rd2+8];
	ld.f32 	%f68, [%rd8+12];
	mul.ftz.f32 	%f69, %f67, %f68;
	.loc	22	282	0
	ld.f32 	%f25, [%rd2+12];
	.loc	25	47	0
	ld.f32 	%f70, [%rd2+0];
	ld.f32 	%f71, [%rd8+4];
	mul.ftz.f32 	%f72, %f70, %f71;
	st.v4.f32 	[%rd2+0], {%f72,%f66,%f69,%f25};
	.loc	38	67	0
	bra.uni 	$Lt_46_514;
$Lt_46_1538:
	.loc	26	48	0
	ld.f32 	%f25, [%rd2+12];
	ld.f32 	%f24, [%rd2+8];
	ld.f32 	%f23, [%rd2+4];
	ld.f32 	%f22, [%rd2+0];
	ld.f32 	%f73, [%rd8+4];
	mov.f32 	%f74, 0f00000000;    	// 0
	setp.eq.ftz.f32 	%p27, %f73, %f74;
	ld.f32 	%f75, [%rd8+8];
	mov.f32 	%f76, 0f00000000;    	// 0
	max.ftz.f32 	%f77, %f22, %f76;
	mov.f32 	%f78, 0f3f800000;    	// 1
	min.ftz.f32 	%f79, %f77, %f78;
	ld.f32 	%f80, [%rd8+12];
	sub.ftz.f32 	%f81, %f79, %f80;
	abs.ftz.f32 	%f82, %f81;
	mov.f32 	%f83, 0f00000000;    	// 0
	max.ftz.f32 	%f84, %f23, %f83;
	mov.f32 	%f85, 0f3f800000;    	// 1
	min.ftz.f32 	%f86, %f84, %f85;
	ld.f32 	%f87, [%rd8+16];
	sub.ftz.f32 	%f88, %f86, %f87;
	abs.ftz.f32 	%f89, %f88;
	mov.f32 	%f90, 0f00000000;    	// 0
	max.ftz.f32 	%f91, %f25, %f90;
	mov.f32 	%f92, 0f3f800000;    	// 1
	min.ftz.f32 	%f93, %f91, %f92;
	sub.ftz.f32 	%f94, %f93, %f25;
	abs.ftz.f32 	%f95, %f94;
	mov.f32 	%f96, 0f00000000;    	// 0
	max.ftz.f32 	%f97, %f24, %f96;
	mov.f32 	%f98, 0f3f800000;    	// 1
	min.ftz.f32 	%f99, %f97, %f98;
	ld.f32 	%f100, [%rd8+20];
	sub.ftz.f32 	%f101, %f99, %f100;
	abs.ftz.f32 	%f102, %f101;
	max.ftz.f32 	%f103, %f95, %f102;
	max.ftz.f32 	%f104, %f89, %f103;
	max.ftz.f32 	%f105, %f82, %f104;
	setp.ge.ftz.f32 	%p28, %f75, %f105;
	xor.pred 	%p29, %p27, %p28;
	@!%p29 bra 	$Lt_46_514;
	.loc	23	44	0
	ld.const.f32 	%f106, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f27, %f106, %f23;
	ld.const.f32 	%f107, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f29, %f107, %f24, %f27;
	ld.const.f32 	%f108, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f31, %f108, %f22, %f29;
	ld.const.f32 	%f109, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f110, %f109, %f31;
	ld.const.f32 	%f111, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f112, %f111, %f31;
	ld.const.f32 	%f113, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f114, %f113, %f31;
	st.v4.f32 	[%rd2+0], {%f110,%f112,%f114,%f25};
	bra.uni 	$Lt_46_514;
$Lt_46_1794:
	.loc	27	48	0
	ld.f32 	%f25, [%rd2+12];
	ld.f32 	%f24, [%rd2+8];
	ld.f32 	%f23, [%rd2+4];
	ld.f32 	%f22, [%rd2+0];
	ld.f32 	%f115, [%rd8+8];
	mov.f32 	%f116, 0f00000000;   	// 0
	max.ftz.f32 	%f117, %f22, %f116;
	mov.f32 	%f118, 0f3f800000;   	// 1
	min.ftz.f32 	%f119, %f117, %f118;
	ld.f32 	%f120, [%rd8+12];
	sub.ftz.f32 	%f121, %f119, %f120;
	abs.ftz.f32 	%f122, %f121;
	mov.f32 	%f123, 0f00000000;   	// 0
	max.ftz.f32 	%f124, %f23, %f123;
	mov.f32 	%f125, 0f3f800000;   	// 1
	min.ftz.f32 	%f126, %f124, %f125;
	ld.f32 	%f127, [%rd8+16];
	sub.ftz.f32 	%f128, %f126, %f127;
	abs.ftz.f32 	%f129, %f128;
	mov.f32 	%f130, 0f00000000;   	// 0
	max.ftz.f32 	%f131, %f25, %f130;
	mov.f32 	%f132, 0f3f800000;   	// 1
	min.ftz.f32 	%f133, %f131, %f132;
	sub.ftz.f32 	%f134, %f133, %f25;
	abs.ftz.f32 	%f135, %f134;
	mov.f32 	%f136, 0f00000000;   	// 0
	max.ftz.f32 	%f137, %f24, %f136;
	mov.f32 	%f138, 0f3f800000;   	// 1
	min.ftz.f32 	%f139, %f137, %f138;
	ld.f32 	%f140, [%rd8+20];
	sub.ftz.f32 	%f141, %f139, %f140;
	abs.ftz.f32 	%f142, %f141;
	max.ftz.f32 	%f143, %f135, %f142;
	max.ftz.f32 	%f144, %f129, %f143;
	max.ftz.f32 	%f145, %f122, %f144;
	setp.ge.ftz.f32 	%p30, %f115, %f145;
	@!%p30 bra 	$Lt_46_514;
	.loc	27	51	0
	ld.f32 	%f146, [%rd8+24];
	mov.f32 	%f147, %f146;
	ld.f32 	%f148, [%rd8+28];
	mov.f32 	%f149, %f148;
	ld.f32 	%f150, [%rd8+32];
	mov.f32 	%f151, %f150;
	ld.f32 	%f152, [%rd8+4];
	mov.f32 	%f153, 0f00000000;   	// 0
	setp.eq.ftz.f32 	%p31, %f152, %f153;
	@!%p31 bra 	$Lt_46_57858;
	.loc	27	60	0
	ld.const.f32 	%f154, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f27, %f154, %f23;
	ld.const.f32 	%f155, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f29, %f155, %f24, %f27;
	ld.const.f32 	%f156, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f31, %f156, %f22, %f29;
	mul.ftz.f32 	%f147, %f146, %f31;
	.loc	27	61	0
	mul.ftz.f32 	%f149, %f148, %f31;
	.loc	27	62	0
	mul.ftz.f32 	%f151, %f150, %f31;
$Lt_46_57858:
	st.v4.f32 	[%rd2+0], {%f147,%f149,%f151,%f25};
	bra.uni 	$Lt_46_514;
$Lt_46_2050:
	.loc	28	42	0
	ld.f32 	%f157, [%rd8+4];
	ld.s32 	%r23, [%rd4+0];
	cvt.rn.f32.s32 	%f158, %r23;
	sub.ftz.f32 	%f159, %f157, %f158;
	cvt.rzi.ftz.s32.f32 	%r24, %f159;
	.loc	28	43	0
	ld.s32 	%r25, [%rd6+0];
	cvt.rn.f32.s32 	%f160, %r25;
	sub.ftz.f32 	%f161, %f157, %f160;
	cvt.rzi.ftz.s32.f32 	%r26, %f161;
	.loc	28	44	0
	sub.s32 	%r27, %r2, %r23;
	cvt.rn.f32.s32 	%f162, %r27;
	sub.ftz.f32 	%f163, %f157, %f162;
	cvt.rzi.ftz.s32.f32 	%r28, %f163;
	.loc	28	45	0
	sub.s32 	%r29, %r4, %r25;
	cvt.rn.f32.s32 	%f164, %r29;
	sub.ftz.f32 	%f165, %f157, %f164;
	cvt.rzi.ftz.s32.f32 	%r30, %f165;
	.loc	28	47	0
	max.s32 	%r31, %r24, %r26;
	max.s32 	%r32, %r31, %r28;
	max.s32 	%r33, %r32, %r30;
	mov.u32 	%r34, 0;
	setp.le.s32 	%p32, %r33, %r34;
	@%p32 bra 	$Lt_46_514;
	.loc	28	51	0
	cvt.rn.f32.s32 	%f166, %r33;
	sub.ftz.f32 	%f167, %f157, %f166;
	ld.f32 	%f168, [%rd2+12];
	div.approx.ftz.f32 	%f169, %f167, %f157;
	mul.ftz.f32 	%f170, %f168, %f169;
	st.f32 	[%rd2+12], %f170;
	bra.uni 	$Lt_46_514;
$Lt_46_2306:
	ld.v4.f32 	{%f22,%f23,%f24,%f25}, [%rd2+0];
	.loc	30	44	0
	ld.f32 	%f171, [%rd8+8];
	ld.f32 	%f172, [%rd8+12];
	ld.f32 	%f173, [%rd8+16];
	sub.ftz.f32 	%f174, %f172, %f171;
	mul.ftz.f32 	%f175, %f173, %f174;
	.loc	30	46	0
	sub.ftz.f32 	%f176, %f171, %f175;
	.loc	30	47	0
	add.ftz.f32 	%f177, %f171, %f175;
	.loc	30	50	0
	add.ftz.f32 	%f178, %f172, %f175;
	ld.const.f32 	%f179, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f27, %f179, %f23;
	mov.f32 	%f180, 0f00000000;   	// 0
	max.ftz.f32 	%f181, %f178, %f180;
	mov.f32 	%f182, 0f00000000;   	// 0
	max.ftz.f32 	%f183, %f176, %f182;
	ld.const.f32 	%f184, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f29, %f184, %f24, %f27;
	mov.f32 	%f185, 0f3f800000;   	// 1
	min.ftz.f32 	%f186, %f181, %f185;
	mov.f32 	%f187, 0f3f800000;   	// 1
	min.ftz.f32 	%f188, %f183, %f187;
	ld.const.f32 	%f189, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f31, %f189, %f22, %f29;
	set.gt.ftz.u32.f32 	%r35, %f188, %f31;
	neg.s32 	%r36, %r35;
	set.le.ftz.u32.f32 	%r37, %f186, %f31;
	neg.s32 	%r38, %r37;
	or.b32 	%r39, %r36, %r38;
	mov.u32 	%r40, 0;
	setp.eq.s32 	%p33, %r39, %r40;
	@%p33 bra 	$Lt_46_59138;
	mov.f32 	%f190, 0f00000000;   	// 0
	bra.uni 	$Lt_46_59906;
$Lt_46_59138:
	mov.f32 	%f191, 0f00000000;   	// 0
	max.ftz.f32 	%f192, %f177, %f191;
	mov.f32 	%f193, 0f3f800000;   	// 1
	min.ftz.f32 	%f194, %f192, %f193;
	set.le.ftz.u32.f32 	%r41, %f194, %f31;
	neg.s32 	%r42, %r41;
	sub.ftz.f32 	%f195, %f172, %f175;
	mov.f32 	%f196, 0f00000000;   	// 0
	max.ftz.f32 	%f197, %f195, %f196;
	mov.f32 	%f198, 0f3f800000;   	// 1
	min.ftz.f32 	%f199, %f197, %f198;
	set.lt.ftz.u32.f32 	%r43, %f31, %f199;
	neg.s32 	%r44, %r43;
	and.b32 	%r45, %r42, %r44;
	mov.u32 	%r46, 0;
	setp.eq.s32 	%p34, %r45, %r46;
	@%p34 bra 	$Lt_46_59650;
	mov.f32 	%f190, 0f3f800000;   	// 1
	bra.uni 	$Lt_46_59906;
$Lt_46_59650:
	add.ftz.f32 	%f200, %f175, %f175;
	setp.gt.ftz.f32 	%p35, %f194, %f31;
	@!%p35 bra 	$Lt_46_60162;
	.loc	30	62	0
	sub.ftz.f32 	%f201, %f31, %f188;
	div.approx.ftz.f32 	%f190, %f201, %f200;
	bra.uni 	$Lt_46_59906;
$Lt_46_60162:
	.loc	30	66	0
	sub.ftz.f32 	%f202, %f186, %f31;
	div.approx.ftz.f32 	%f190, %f202, %f200;
$Lt_46_59906:
$Lt_46_59394:
$Lt_46_58882:
	.loc	30	69	0
	mov.f32 	%f203, 0f3f800000;   	// 1
	sub.ftz.f32 	%f204, %f203, %f190;
	ld.f32 	%f205, [%rd8+4];
	mov.f32 	%f206, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p36, %f205, %f206;
	selp.f32 	%f190, %f204, %f190, %p36;
	.loc	30	77	0
	ld.const.f32 	%f207, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f208, %f207, %f190;
	ld.const.f32 	%f209, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f210, %f209, %f190;
	ld.const.f32 	%f211, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f212, %f211, %f190;
	st.v4.f32 	[%rd2+0], {%f208,%f210,%f212,%f25};
	.loc	38	79	0
	bra.uni 	$Lt_46_514;
$Lt_46_2562:
	.loc	38	80	0
	ld.f32 	%f213, [%rd8+44];
	mov.f32 	%f214, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p37, %f213, %f214;
	@!%p37 bra 	$L_46_49922;
	ld.s32 	%r47, [%rd4+0];
	cvt.rn.f32.s32 	%f215, %r47;
	cvt.rn.f32.s32 	%f216, %r2;
	mul.ftz.f32 	%f217, %f216, %f213;
	setp.lt.ftz.f32 	%p38, %f215, %f217;
	@%p38 bra 	$L_46_49666;
$L_46_49922:
	mov.f32 	%f218, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p39, %f213, %f218;
	@!%p39 bra 	$Lt_46_514;
	ld.s32 	%r48, [%rd6+0];
	cvt.rn.f32.s32 	%f219, %r48;
	cvt.rn.f32.s32 	%f220, %r4;
	mul.ftz.f32 	%f221, %f220, %f213;
	neg.ftz.f32 	%f222, %f221;
	setp.lt.ftz.f32 	%p40, %f219, %f222;
	@!%p40 bra 	$Lt_46_514;
$L_46_49666:
	ld.v4.f32 	{%f22,%f23,%f24,%f25}, [%rd2+0];
	.loc	31	47	0
	ld.const.f32 	%f223, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f224, %f223, %f23;
	ld.const.f32 	%f225, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f226, %f225, %f24, %f224;
	ld.const.f32 	%f227, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f31, %f227, %f22, %f226;
	mov.f32 	%f228, %f31;
	.loc	31	49	0
	ld.f32 	%f229, [%rd8+8];
	ld.f32 	%f230, [%rd8+4];
	sub.ftz.f32 	%f231, %f230, %f229;
	.loc	31	50	0
	ld.f32 	%f232, [%rd8+16];
	ld.f32 	%f233, [%rd8+12];
	sub.ftz.f32 	%f234, %f233, %f232;
	mov.f32 	%f235, 0f3f800000;   	// 1
	set.neu.ftz.u32.f32 	%r49, %f234, %f235;
	neg.s32 	%r50, %r49;
	mov.f32 	%f236, 0f3f800000;   	// 1
	set.neu.ftz.u32.f32 	%r51, %f231, %f236;
	neg.s32 	%r52, %r51;
	or.b32 	%r53, %r50, %r52;
	mov.u32 	%r54, 0;
	setp.eq.s32 	%p41, %r53, %r54;
	@%p41 bra 	$Lt_46_60418;
	.loc	20	143	0
	cvt.s32.u32 	%r55, %ctaid.y;
	cvt.s32.u32 	%r56, %ntid.y;
	mul.lo.s32 	%r57, %r55, %r56;
	cvt.s32.u32 	%r58, %ctaid.x;
	cvt.s32.u32 	%r59, %ntid.x;
	mul.lo.s32 	%r60, %r58, %r59;
	mov.u32 	%r61, %tid.y;
	add.u32 	%r62, %r57, %r61;
	mov.u32 	%r63, %tid.x;
	add.u32 	%r64, %r60, %r63;
	shr.u32 	%r65, %r62, 13;
	mov.s32 	%r66, 1;
	sub.s32 	%r67, %r66, %r64;
	sub.u32 	%r68, %r64, %r62;
	sub.u32 	%r69, %r67, %r62;
	xor.b32 	%r70, %r65, %r69;
	shl.b32 	%r71, %r70, 8;
	sub.u32 	%r72, %r68, %r70;
	sub.u32 	%r73, %r62, %r70;
	xor.b32 	%r74, %r71, %r72;
	shr.u32 	%r75, %r74, 13;
	sub.u32 	%r76, %r73, %r74;
	sub.u32 	%r77, %r70, %r74;
	xor.b32 	%r78, %r75, %r76;
	shr.u32 	%r79, %r78, 12;
	sub.u32 	%r80, %r77, %r78;
	xor.b32 	%r81, %r79, %r80;
	sub.u32 	%r82, %r74, %r78;
	sub.u32 	%r83, %r82, %r81;
	shl.b32 	%r84, %r81, 16;
	xor.b32 	%r85, %r83, %r84;
	.loc	20	144	0
	sub.u32 	%r86, %r78, %r81;
	sub.u32 	%r87, %r86, %r85;
	shr.u32 	%r88, %r85, 5;
	xor.b32 	%r89, %r87, %r88;
	.loc	20	145	0
	sub.u32 	%r90, %r81, %r85;
	sub.u32 	%r91, %r90, %r89;
	shr.u32 	%r92, %r89, 3;
	xor.b32 	%r93, %r91, %r92;
	.loc	20	146	0
	sub.u32 	%r94, %r85, %r89;
	sub.u32 	%r95, %r94, %r93;
	shl.b32 	%r96, %r93, 10;
	xor.b32 	%r97, %r95, %r96;
	.loc	20	147	0
	sub.u32 	%r98, %r89, %r93;
	sub.u32 	%r99, %r98, %r97;
	shr.u32 	%r100, %r97, 15;
	xor.b32 	%r101, %r99, %r100;
	.loc	31	57	0
	mov.f32 	%f237, 0f3b270d73;   	// 0.00254902
	mul.lo.u32 	%r102, %r101, 1103515245;
	add.u32 	%r103, %r102, 12345;
	shr.u32 	%r104, %r103, 16;
	and.b32 	%r105, %r104, 255;
	shl.b32 	%r106, %r105, 7;
	mul.lo.u32 	%r107, %r101, -1029531031;
	sub.u32 	%r108, %r107, 740551042;
	shr.u32 	%r109, %r108, 16;
	and.b32 	%r110, %r109, 255;
	xor.b32 	%r111, %r106, %r110;
	cvt.rn.f32.s32 	%f238, %r111;
	mov.f32 	%f239, 0f467ffe00;   	// 16383.5
	div.approx.ftz.f32 	%f240, %f238, %f239;
	mov.f32 	%f241, 0fbf800000;   	// -1
	add.ftz.f32 	%f242, %f240, %f241;
	fma.rn.ftz.f32 	%f228, %f237, %f242, %f31;
$Lt_46_60418:
	.loc	31	60	0
	sub.ftz.f32 	%f243, %f228, %f229;
	ld.f32 	%f244, [%rd8+20];
	mov.f32 	%f245, 0f3f800000;   	// 1
	setp.neu.ftz.f32 	%p42, %f244, %f245;
	@!%p42 bra 	$Lt_46_61186;
	mov.f32 	%f246, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p43, %f243, %f246;
	@!%p43 bra 	$Lt_46_61698;
	.loc	31	66	0
	mov.f32 	%f228, %f232;
	bra.uni 	$Lt_46_60930;
$Lt_46_61698:
	.loc	31	70	0
	rcp.approx.ftz.f32 	%f247, %f231;
	mul.ftz.f32 	%f248, %f243, %f247;
	lg2.approx.ftz.f32 	%f249, %f248;
	mul.ftz.f32 	%f250, %f244, %f249;
	ex2.approx.ftz.f32 	%f251, %f250;
	fma.rn.ftz.f32 	%f228, %f234, %f251, %f232;
	bra.uni 	$Lt_46_60930;
$Lt_46_61186:
	.loc	31	77	0
	rcp.approx.ftz.f32 	%f252, %f231;
	mul.ftz.f32 	%f253, %f243, %f252;
	fma.rn.ftz.f32 	%f228, %f234, %f253, %f232;
$Lt_46_60930:
	.loc	31	81	0
	ld.f32 	%f254, [%rd8+28];
	.loc	31	89	0
	ld.f32 	%f255, [%rd8+24];
	sin.approx.ftz.f32 	%f256, %f254;
	ld.const.f32 	%f257, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f258, %f257, %f23;
	ld.const.f32 	%f259, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f260, %f259, %f23;
	cos.approx.ftz.f32 	%f261, %f254;
	ld.f32 	%f262, [%rd8+40];
	ld.const.f32 	%f263, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f264, %f263, %f24, %f258;
	ld.const.f32 	%f265, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f266, %f265, %f24, %f260;
	ld.const.f32 	%f267, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f268, %f267, %f22, %f264;
	ld.const.f32 	%f269, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f270, %f269, %f22, %f266;
	mul.ftz.f32 	%f271, %f256, %f268;
	mul.ftz.f32 	%f272, %f270, %f261;
	sub.ftz.f32 	%f273, %f272, %f271;
	ld.f32 	%f274, [%rd8+32];
	sub.ftz.f32 	%f275, %f274, %f273;
	fma.rn.ftz.f32 	%f276, %f262, %f275, %f273;
	mul.ftz.f32 	%f277, %f255, %f276;
	.loc	31	90	0
	mul.ftz.f32 	%f278, %f261, %f268;
	fma.rn.ftz.f32 	%f279, %f270, %f256, %f278;
	ld.f32 	%f280, [%rd8+36];
	sub.ftz.f32 	%f281, %f280, %f279;
	fma.rn.ftz.f32 	%f282, %f262, %f281, %f279;
	mul.ftz.f32 	%f283, %f255, %f282;
	.loc	31	92	0
	ld.const.f32 	%f284, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f285, %f284, %f277;
	ld.const.f32 	%f286, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f287, %f286, %f228, %f285;
	ld.const.f32 	%f288, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f289, %f288, %f283, %f287;
	ld.const.f32 	%f290, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f291, %f290, %f277;
	ld.const.f32 	%f292, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f293, %f292, %f228, %f291;
	ld.const.f32 	%f294, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f295, %f294, %f283, %f293;
	ld.const.f32 	%f296, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f297, %f296, %f277;
	ld.const.f32 	%f298, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f299, %f298, %f228, %f297;
	ld.const.f32 	%f300, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f301, %f300, %f283, %f299;
	st.v4.f32 	[%rd2+0], {%f289,%f295,%f301,%f25};
	bra.uni 	$Lt_46_514;
$Lt_46_2818:
	.loc	38	83	0
	ld.f32 	%f302, [%rd8+44];
	mov.f32 	%f303, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p44, %f302, %f303;
	@!%p44 bra 	$L_46_50690;
	ld.s32 	%r112, [%rd4+0];
	cvt.rn.f32.s32 	%f304, %r112;
	cvt.rn.f32.s32 	%f305, %r2;
	mul.ftz.f32 	%f306, %f305, %f302;
	setp.lt.ftz.f32 	%p45, %f304, %f306;
	@%p45 bra 	$L_46_50434;
$L_46_50690:
	mov.f32 	%f307, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p46, %f302, %f307;
	@!%p46 bra 	$Lt_46_514;
	ld.s32 	%r113, [%rd6+0];
	cvt.rn.f32.s32 	%f308, %r113;
	cvt.rn.f32.s32 	%f309, %r4;
	mul.ftz.f32 	%f310, %f309, %f302;
	neg.ftz.f32 	%f311, %f310;
	setp.lt.ftz.f32 	%p47, %f308, %f311;
	@!%p47 bra 	$Lt_46_514;
$L_46_50434:
	ld.v4.f32 	{%f312,%f313,%f314,%f25}, [%rd2+0];
	.loc	31	113	0
	ld.f32 	%f315, [%rd8+8];
	ld.f32 	%f316, [%rd8+4];
	sub.ftz.f32 	%f317, %f316, %f315;
	.loc	31	114	0
	ld.f32 	%f318, [%rd8+16];
	ld.f32 	%f319, [%rd8+12];
	sub.ftz.f32 	%f320, %f319, %f318;
	.loc	31	116	0
	ld.const.f32 	%f321, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f322, %f313, %f321;
	ld.const.f32 	%f323, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f324, %f323, %f314, %f322;
	ld.const.f32 	%f325, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f326, %f325, %f312, %f324;
	sub.ftz.f32 	%f327, %f326, %f315;
	ld.f32 	%f328, [%rd8+20];
	mov.f32 	%f329, 0f3f800000;   	// 1
	setp.neu.ftz.f32 	%p48, %f328, %f329;
	@!%p48 bra 	$Lt_46_62210;
	.loc	31	120	0
	mov.f32 	%f330, 0f00000000;   	// 0
	max.ftz.f32 	%f331, %f327, %f330;
	div.approx.ftz.f32 	%f332, %f331, %f317;
	lg2.approx.ftz.f32 	%f333, %f332;
	mul.ftz.f32 	%f334, %f328, %f333;
	ex2.approx.ftz.f32 	%f335, %f334;
	fma.rn.ftz.f32 	%f336, %f320, %f335, %f318;
	bra.uni 	$Lt_46_61954;
$Lt_46_62210:
	.loc	31	129	0
	div.approx.ftz.f32 	%f337, %f327, %f317;
	fma.rn.ftz.f32 	%f336, %f320, %f337, %f318;
$Lt_46_61954:
	.loc	31	135	0
	ld.const.f32 	%f338, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f339, %f338, %f336;
	ld.const.f32 	%f340, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f341, %f340, %f336;
	ld.const.f32 	%f342, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f343, %f342, %f336;
	st.v4.f32 	[%rd2+0], {%f339,%f341,%f343,%f25};
	bra.uni 	$Lt_46_514;
$Lt_46_3074:
	.loc	38	86	0
	ld.f32 	%f344, [%rd8+44];
	mov.f32 	%f345, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p49, %f344, %f345;
	@!%p49 bra 	$L_46_51458;
	ld.s32 	%r114, [%rd4+0];
	cvt.rn.f32.s32 	%f346, %r114;
	cvt.rn.f32.s32 	%f347, %r2;
	mul.ftz.f32 	%f348, %f347, %f344;
	setp.lt.ftz.f32 	%p50, %f346, %f348;
	@%p50 bra 	$L_46_51202;
$L_46_51458:
	mov.f32 	%f349, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p51, %f344, %f349;
	@!%p51 bra 	$Lt_46_514;
	ld.s32 	%r115, [%rd6+0];
	cvt.rn.f32.s32 	%f350, %r115;
	cvt.rn.f32.s32 	%f351, %r4;
	mul.ftz.f32 	%f352, %f351, %f344;
	neg.ftz.f32 	%f353, %f352;
	setp.lt.ftz.f32 	%p52, %f350, %f353;
	@!%p52 bra 	$Lt_46_514;
$L_46_51202:
	.loc	31	153	0
	ld.f32 	%f25, [%rd2+12];
	.loc	31	160	0
	ld.const.f32 	%f354, [k601YPbPr_To_RGB32f+24];
	ld.const.f32 	%f355, [k601YPbPr_To_RGB32f+12];
	ld.const.f32 	%f356, [k601YPbPr_To_RGB32f+0];
	st.v4.f32 	[%rd2+0], {%f354,%f355,%f356,%f25};
	bra.uni 	$Lt_46_514;
$Lt_46_3330:
	.loc	32	42	0
	ld.f32 	%f357, [%rd8+4];
	ld.f32 	%f22, [%rd2+0];
	mov.f32 	%f358, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p53, %f22, %f358;
	@!%p53 bra 	$Lt_46_62722;
	.loc	22	292	0
	lg2.approx.ftz.f32 	%f359, %f22;
	mul.ftz.f32 	%f360, %f357, %f359;
	ex2.approx.ftz.f32 	%f361, %f360;
	bra.uni 	$Lt_46_62466;
$Lt_46_62722:
	neg.ftz.f32 	%f362, %f22;
	lg2.approx.ftz.f32 	%f363, %f362;
	mul.ftz.f32 	%f364, %f357, %f363;
	ex2.approx.ftz.f32 	%f365, %f364;
	neg.ftz.f32 	%f361, %f365;
$Lt_46_62466:
	ld.f32 	%f23, [%rd2+4];
	mov.f32 	%f366, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p54, %f23, %f366;
	@!%p54 bra 	$Lt_46_63234;
	.loc	22	293	0
	lg2.approx.ftz.f32 	%f367, %f23;
	mul.ftz.f32 	%f368, %f357, %f367;
	ex2.approx.ftz.f32 	%f369, %f368;
	bra.uni 	$Lt_46_62978;
$Lt_46_63234:
	neg.ftz.f32 	%f370, %f23;
	lg2.approx.ftz.f32 	%f371, %f370;
	mul.ftz.f32 	%f372, %f357, %f371;
	ex2.approx.ftz.f32 	%f373, %f372;
	neg.ftz.f32 	%f369, %f373;
$Lt_46_62978:
	ld.f32 	%f24, [%rd2+8];
	mov.f32 	%f374, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p55, %f24, %f374;
	@!%p55 bra 	$Lt_46_63746;
	.loc	22	294	0
	lg2.approx.ftz.f32 	%f375, %f24;
	mul.ftz.f32 	%f376, %f357, %f375;
	ex2.approx.ftz.f32 	%f377, %f376;
	bra.uni 	$Lt_46_63490;
$Lt_46_63746:
	neg.ftz.f32 	%f378, %f24;
	lg2.approx.ftz.f32 	%f379, %f378;
	mul.ftz.f32 	%f380, %f357, %f379;
	ex2.approx.ftz.f32 	%f381, %f380;
	neg.ftz.f32 	%f377, %f381;
$Lt_46_63490:
	ld.f32 	%f25, [%rd2+12];
	mov.f32 	%f382, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p56, %f25, %f382;
	@!%p56 bra 	$Lt_46_64258;
	.loc	22	295	0
	lg2.approx.ftz.f32 	%f383, %f25;
	mul.ftz.f32 	%f384, %f357, %f383;
	ex2.approx.ftz.f32 	%f385, %f384;
	bra.uni 	$Lt_46_64002;
$Lt_46_64258:
	neg.ftz.f32 	%f386, %f25;
	lg2.approx.ftz.f32 	%f387, %f386;
	mul.ftz.f32 	%f388, %f357, %f387;
	ex2.approx.ftz.f32 	%f389, %f388;
	neg.ftz.f32 	%f385, %f389;
$Lt_46_64002:
	st.v4.f32 	[%rd2+0], {%f361,%f369,%f377,%f385};
	.loc	38	91	0
	bra.uni 	$Lt_46_514;
$Lt_46_3586:
	.loc	33	41	0
	ld.s32 	%r116, [%rd4+0];
	sub.s32 	%r117, %r2, %r116;
	sub.s32 	%r118, %r117, 1;
	st.s32 	[%rd4+0], %r118;
	.loc	38	94	0
	bra.uni 	$Lt_46_514;
$Lt_46_3842:
	.loc	38	96	0
	ld.f32 	%f390, [%rd8+16];
	ld.f32 	%f391, [%rd8+4];
	ld.s32 	%r119, [%rd4+0];
	ld.s32 	%r120, [%rd6+0];
	mov.f32 	%f392, 0f3f000000;   	// 0.5
	mul.ftz.f32 	%f393, %f391, %f392;
	ld.f32 	%f394, [%rd8+8];
	mov.f32 	%f395, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p57, %f394, %f395;
	@!%p57 bra 	$Lt_46_64770;
	.loc	34	48	0
	cvt.rzi.ftz.u32.f32 	%r121, %f390;
	.loc	20	139	0
	mul.lo.s32 	%r122, %r119, 3;
	sub.u32 	%r123, %r122, %r120;
	sub.u32 	%r124, %r123, %r121;
	shr.u32 	%r125, %r121, 13;
	xor.b32 	%r126, %r124, %r125;
	.loc	20	140	0
	sub.u32 	%r127, %r120, %r121;
	sub.u32 	%r128, %r127, %r126;
	shl.b32 	%r129, %r126, 8;
	xor.b32 	%r130, %r128, %r129;
	.loc	20	141	0
	sub.u32 	%r131, %r121, %r126;
	sub.u32 	%r132, %r131, %r130;
	shr.u32 	%r133, %r130, 13;
	xor.b32 	%r134, %r132, %r133;
	.loc	20	142	0
	sub.u32 	%r135, %r126, %r130;
	sub.u32 	%r136, %r135, %r134;
	shr.u32 	%r137, %r134, 12;
	xor.b32 	%r138, %r136, %r137;
	.loc	20	143	0
	sub.u32 	%r139, %r130, %r134;
	sub.u32 	%r140, %r139, %r138;
	shl.b32 	%r141, %r138, 16;
	xor.b32 	%r142, %r140, %r141;
	.loc	20	144	0
	sub.u32 	%r143, %r134, %r138;
	sub.u32 	%r144, %r143, %r142;
	shr.u32 	%r145, %r142, 5;
	xor.b32 	%r146, %r144, %r145;
	.loc	20	145	0
	sub.u32 	%r147, %r138, %r142;
	sub.u32 	%r148, %r147, %r146;
	shr.u32 	%r149, %r146, 3;
	xor.b32 	%r150, %r148, %r149;
	.loc	20	146	0
	sub.u32 	%r151, %r142, %r146;
	sub.u32 	%r152, %r151, %r150;
	shl.b32 	%r153, %r150, 10;
	xor.b32 	%r154, %r152, %r153;
	.loc	20	147	0
	sub.u32 	%r155, %r146, %r150;
	sub.u32 	%r156, %r155, %r154;
	shr.u32 	%r157, %r154, 15;
	xor.b32 	%r158, %r156, %r157;
	.loc	34	48	0
	mul.lo.u32 	%r159, %r158, 1103515245;
	add.u32 	%r160, %r159, 12345;
	shr.u32 	%r161, %r160, 16;
	and.b32 	%r162, %r161, 255;
	shl.b32 	%r163, %r162, 7;
	mul.lo.u32 	%r164, %r158, -1029531031;
	sub.u32 	%r165, %r164, 740551042;
	shr.u32 	%r166, %r165, 16;
	and.b32 	%r167, %r166, 255;
	xor.b32 	%r168, %r163, %r167;
	cvt.rn.f32.s32 	%f396, %r168;
	mov.f32 	%f397, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f398, %f396, %f397;
	mul.ftz.f32 	%f399, %f391, %f398;
	sub.ftz.f32 	%f400, %f399, %f393;
	.loc	20	139	0
	add.u32 	%r169, %r124, 1;
	xor.b32 	%r170, %r125, %r169;
	.loc	20	140	0
	sub.u32 	%r171, %r127, %r170;
	shl.b32 	%r172, %r170, 8;
	xor.b32 	%r173, %r171, %r172;
	.loc	20	141	0
	sub.u32 	%r174, %r121, %r170;
	sub.u32 	%r175, %r174, %r173;
	shr.u32 	%r176, %r173, 13;
	xor.b32 	%r177, %r175, %r176;
	.loc	20	142	0
	sub.u32 	%r178, %r170, %r173;
	sub.u32 	%r179, %r178, %r177;
	shr.u32 	%r180, %r177, 12;
	xor.b32 	%r181, %r179, %r180;
	.loc	20	143	0
	sub.u32 	%r182, %r173, %r177;
	sub.u32 	%r183, %r182, %r181;
	shl.b32 	%r184, %r181, 16;
	xor.b32 	%r185, %r183, %r184;
	.loc	20	144	0
	sub.u32 	%r186, %r177, %r181;
	sub.u32 	%r187, %r186, %r185;
	shr.u32 	%r188, %r185, 5;
	xor.b32 	%r189, %r187, %r188;
	.loc	20	145	0
	sub.u32 	%r190, %r181, %r185;
	sub.u32 	%r191, %r190, %r189;
	shr.u32 	%r192, %r189, 3;
	xor.b32 	%r193, %r191, %r192;
	.loc	20	146	0
	sub.u32 	%r194, %r185, %r189;
	sub.u32 	%r195, %r194, %r193;
	shl.b32 	%r196, %r193, 10;
	xor.b32 	%r197, %r195, %r196;
	.loc	20	147	0
	sub.u32 	%r198, %r189, %r193;
	sub.u32 	%r199, %r198, %r197;
	shr.u32 	%r200, %r197, 15;
	xor.b32 	%r201, %r199, %r200;
	.loc	34	49	0
	mul.lo.u32 	%r202, %r201, 1103515245;
	add.u32 	%r203, %r202, 12345;
	shr.u32 	%r204, %r203, 16;
	and.b32 	%r205, %r204, 255;
	shl.b32 	%r206, %r205, 7;
	mul.lo.u32 	%r207, %r201, -1029531031;
	sub.u32 	%r208, %r207, 740551042;
	shr.u32 	%r209, %r208, 16;
	and.b32 	%r210, %r209, 255;
	xor.b32 	%r211, %r206, %r210;
	cvt.rn.f32.s32 	%f401, %r211;
	mov.f32 	%f402, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f403, %f401, %f402;
	mul.ftz.f32 	%f404, %f391, %f403;
	sub.ftz.f32 	%f405, %f404, %f393;
	.loc	20	139	0
	add.u32 	%r212, %r124, 2;
	xor.b32 	%r213, %r125, %r212;
	.loc	20	140	0
	sub.u32 	%r214, %r127, %r213;
	shl.b32 	%r215, %r213, 8;
	xor.b32 	%r216, %r214, %r215;
	.loc	20	141	0
	sub.u32 	%r217, %r121, %r213;
	sub.u32 	%r218, %r217, %r216;
	shr.u32 	%r219, %r216, 13;
	xor.b32 	%r220, %r218, %r219;
	.loc	20	142	0
	sub.u32 	%r221, %r213, %r216;
	sub.u32 	%r222, %r221, %r220;
	shr.u32 	%r223, %r220, 12;
	xor.b32 	%r224, %r222, %r223;
	.loc	20	143	0
	sub.u32 	%r225, %r216, %r220;
	sub.u32 	%r226, %r225, %r224;
	shl.b32 	%r227, %r224, 16;
	xor.b32 	%r228, %r226, %r227;
	.loc	20	144	0
	sub.u32 	%r229, %r220, %r224;
	sub.u32 	%r230, %r229, %r228;
	shr.u32 	%r231, %r228, 5;
	xor.b32 	%r232, %r230, %r231;
	.loc	20	145	0
	sub.u32 	%r233, %r224, %r228;
	sub.u32 	%r234, %r233, %r232;
	shr.u32 	%r235, %r232, 3;
	xor.b32 	%r236, %r234, %r235;
	.loc	20	146	0
	sub.u32 	%r237, %r228, %r232;
	sub.u32 	%r238, %r237, %r236;
	shl.b32 	%r239, %r236, 10;
	xor.b32 	%r240, %r238, %r239;
	.loc	20	147	0
	sub.u32 	%r241, %r232, %r236;
	sub.u32 	%r242, %r241, %r240;
	shr.u32 	%r243, %r240, 15;
	xor.b32 	%r244, %r242, %r243;
	.loc	34	50	0
	mul.lo.u32 	%r245, %r244, 1103515245;
	add.u32 	%r246, %r245, 12345;
	shr.u32 	%r247, %r246, 16;
	and.b32 	%r248, %r247, 255;
	shl.b32 	%r249, %r248, 7;
	mul.lo.u32 	%r250, %r244, -1029531031;
	sub.u32 	%r251, %r250, 740551042;
	shr.u32 	%r252, %r251, 16;
	and.b32 	%r253, %r252, 255;
	xor.b32 	%r254, %r249, %r253;
	cvt.rn.f32.s32 	%f406, %r254;
	mov.f32 	%f407, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f408, %f406, %f407;
	mul.ftz.f32 	%f409, %f391, %f408;
	sub.ftz.f32 	%f410, %f409, %f393;
	bra.uni 	$Lt_46_64514;
$Lt_46_64770:
	.loc	34	54	0
	cvt.rzi.ftz.u32.f32 	%r255, %f390;
	.loc	20	139	0
	shr.u32 	%r256, %r255, 13;
	sub.u32 	%r257, %r119, %r120;
	sub.u32 	%r258, %r257, %r255;
	xor.b32 	%r259, %r256, %r258;
	.loc	20	140	0
	sub.u32 	%r260, %r120, %r255;
	sub.u32 	%r261, %r260, %r259;
	shl.b32 	%r262, %r259, 8;
	xor.b32 	%r263, %r261, %r262;
	.loc	20	141	0
	sub.u32 	%r264, %r255, %r259;
	sub.u32 	%r265, %r264, %r263;
	shr.u32 	%r266, %r263, 13;
	xor.b32 	%r267, %r265, %r266;
	.loc	20	142	0
	sub.u32 	%r268, %r259, %r263;
	sub.u32 	%r269, %r268, %r267;
	shr.u32 	%r270, %r267, 12;
	xor.b32 	%r271, %r269, %r270;
	.loc	20	143	0
	sub.u32 	%r272, %r263, %r267;
	sub.u32 	%r273, %r272, %r271;
	shl.b32 	%r274, %r271, 16;
	xor.b32 	%r275, %r273, %r274;
	.loc	20	144	0
	sub.u32 	%r276, %r267, %r271;
	sub.u32 	%r277, %r276, %r275;
	shr.u32 	%r278, %r275, 5;
	xor.b32 	%r279, %r277, %r278;
	.loc	20	145	0
	sub.u32 	%r280, %r271, %r275;
	sub.u32 	%r281, %r280, %r279;
	shr.u32 	%r282, %r279, 3;
	xor.b32 	%r283, %r281, %r282;
	.loc	20	146	0
	sub.u32 	%r284, %r275, %r279;
	sub.u32 	%r285, %r284, %r283;
	shl.b32 	%r286, %r283, 10;
	xor.b32 	%r287, %r285, %r286;
	.loc	20	147	0
	sub.u32 	%r288, %r279, %r283;
	sub.u32 	%r289, %r288, %r287;
	shr.u32 	%r290, %r287, 15;
	xor.b32 	%r291, %r289, %r290;
	.loc	34	54	0
	mul.lo.u32 	%r292, %r291, 1103515245;
	add.u32 	%r293, %r292, 12345;
	shr.u32 	%r294, %r293, 16;
	and.b32 	%r295, %r294, 255;
	shl.b32 	%r296, %r295, 7;
	mul.lo.u32 	%r297, %r291, -1029531031;
	sub.u32 	%r298, %r297, 740551042;
	shr.u32 	%r299, %r298, 16;
	and.b32 	%r300, %r299, 255;
	xor.b32 	%r301, %r296, %r300;
	cvt.rn.f32.s32 	%f411, %r301;
	mov.f32 	%f412, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f413, %f411, %f412;
	mul.ftz.f32 	%f414, %f391, %f413;
	sub.ftz.f32 	%f410, %f414, %f393;
	mov.f32 	%f405, %f410;
	mov.f32 	%f400, %f410;
$Lt_46_64514:
	ld.v4.f32 	{%f415,%f416,%f417,_}, [%rd2+0];
	.loc	34	57	0
	add.ftz.f32 	%f418, %f417, %f400;
	st.f32 	[%rd2+8], %f418;
	.loc	34	58	0
	add.ftz.f32 	%f419, %f416, %f405;
	.loc	34	59	0
	add.ftz.f32 	%f420, %f415, %f410;
	st.v2.f32 	[%rd2+0], {%f420,%f419};
	ld.f32 	%f421, [%rd8+12];
	mov.f32 	%f422, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p58, %f421, %f422;
	@!%p58 bra 	$Lt_46_514;
	.loc	22	345	0
	ld.f32 	%f423, [%rd2+4];
	.loc	22	346	0
	ld.f32 	%f424, [%rd2+8];
	.loc	22	347	0
	ld.f32 	%f425, [%rd2+12];
	.loc	34	63	0
	mov.f32 	%f426, 0f00000000;   	// 0
	max.ftz.f32 	%f427, %f420, %f426;
	mov.f32 	%f428, 0f3f800000;   	// 1
	min.ftz.f32 	%f429, %f427, %f428;
	mov.f32 	%f430, 0f00000000;   	// 0
	max.ftz.f32 	%f431, %f423, %f430;
	mov.f32 	%f432, 0f3f800000;   	// 1
	min.ftz.f32 	%f433, %f431, %f432;
	mov.f32 	%f434, 0f00000000;   	// 0
	max.ftz.f32 	%f435, %f424, %f434;
	mov.f32 	%f436, 0f3f800000;   	// 1
	min.ftz.f32 	%f437, %f435, %f436;
	mov.f32 	%f438, 0f00000000;   	// 0
	max.ftz.f32 	%f439, %f425, %f438;
	mov.f32 	%f440, 0f3f800000;   	// 1
	min.ftz.f32 	%f441, %f439, %f440;
	st.v4.f32 	[%rd2+0], {%f429,%f433,%f437,%f441};
	bra.uni 	$Lt_46_514;
$Lt_46_4098:
	.loc	38	99	0
	ld.s32 	%r302, [%rd4+0];
	cvt.rn.f32.s32 	%f442, %r302;
	cvt.rn.f32.s32 	%f443, %r2;
	ld.f32 	%f444, [%rd8+20];
	mul.ftz.f32 	%f445, %f443, %f444;
	setp.lt.ftz.f32 	%p59, %f442, %f445;
	@!%p59 bra 	$Lt_46_514;
	ld.v4.f32 	{%f22,%f23,%f24,%f25}, [%rd2+0];
	.loc	35	52	0
	ld.f32 	%f446, [%rd8+4];
	ld.f32 	%f447, [%rd8+8];
	ld.const.f32 	%f448, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f449, %f448, %f23;
	ld.const.f32 	%f450, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f451, %f450, %f24, %f449;
	ld.const.f32 	%f452, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f453, %f452, %f22, %f451;
	fma.rn.ftz.f32 	%f454, %f447, %f453, %f446;
	.loc	35	53	0
	ld.f32 	%f455, [%rd8+16];
	ld.f32 	%f456, [%rd8+12];
	ld.const.f32 	%f457, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f458, %f457, %f23;
	ld.const.f32 	%f459, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f460, %f459, %f23;
	ld.const.f32 	%f461, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f462, %f461, %f24, %f458;
	ld.const.f32 	%f463, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f464, %f463, %f24, %f460;
	ld.const.f32 	%f465, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f466, %f465, %f22, %f462;
	ld.const.f32 	%f467, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f468, %f467, %f22, %f464;
	mul.ftz.f32 	%f469, %f466, %f455;
	mul.ftz.f32 	%f470, %f456, %f468;
	sub.ftz.f32 	%f471, %f470, %f469;
	.loc	35	54	0
	mul.ftz.f32 	%f472, %f468, %f455;
	fma.rn.ftz.f32 	%f473, %f456, %f466, %f472;
	.loc	35	56	0
	ld.const.f32 	%f474, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f475, %f474, %f471;
	ld.const.f32 	%f476, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f477, %f476, %f454, %f475;
	ld.const.f32 	%f478, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f479, %f478, %f473, %f477;
	ld.const.f32 	%f480, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f481, %f480, %f471;
	ld.const.f32 	%f482, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f483, %f482, %f454, %f481;
	ld.const.f32 	%f484, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f485, %f484, %f473, %f483;
	ld.const.f32 	%f486, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f487, %f486, %f471;
	ld.const.f32 	%f488, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f489, %f488, %f454, %f487;
	ld.const.f32 	%f490, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f491, %f490, %f473, %f489;
	st.v4.f32 	[%rd2+0], {%f479,%f485,%f491,%f25};
	bra.uni 	$Lt_46_514;
$Lt_46_4354:
	ld.v4.f32 	{%f22,%f23,%f24,_}, [%rd2+0];
	.loc	36	46	0
	ld.f32 	%f492, [%rd8+8];
	ld.const.f32 	%f493, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f27, %f493, %f23;
	ld.const.f32 	%f494, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f29, %f494, %f24, %f27;
	ld.const.f32 	%f495, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f31, %f495, %f22, %f29;
	ld.f32 	%f496, [%rd8+20];
	sub.ftz.f32 	%f497, %f496, %f492;
	fma.rn.ftz.f32 	%f498, %f31, %f497, %f492;
	.loc	36	47	0
	ld.f32 	%f499, [%rd8+12];
	ld.f32 	%f500, [%rd8+24];
	sub.ftz.f32 	%f501, %f500, %f499;
	fma.rn.ftz.f32 	%f502, %f31, %f501, %f499;
	.loc	36	49	0
	ld.f32 	%f503, [%rd8+4];
	ld.f32 	%f504, [%rd8+28];
	ld.f32 	%f505, [%rd8+16];
	sub.ftz.f32 	%f506, %f505, %f503;
	fma.rn.ftz.f32 	%f507, %f31, %f506, %f503;
	sub.ftz.f32 	%f508, %f507, %f22;
	fma.rn.ftz.f32 	%f509, %f504, %f508, %f22;
	.loc	36	50	0
	ld.f32 	%f510, [%rd2+4];
	ld.f32 	%f511, [%rd8+28];
	sub.ftz.f32 	%f512, %f498, %f510;
	fma.rn.ftz.f32 	%f513, %f511, %f512, %f510;
	st.v2.f32 	[%rd2+0], {%f509,%f513};
	.loc	36	51	0
	ld.f32 	%f514, [%rd2+8];
	ld.f32 	%f515, [%rd8+28];
	sub.ftz.f32 	%f516, %f502, %f514;
	fma.rn.ftz.f32 	%f517, %f515, %f516, %f514;
	st.f32 	[%rd2+8], %f517;
	.loc	38	103	0
	bra.uni 	$Lt_46_514;
$Lt_46_4610:
	.loc	37	41	0
	ld.s32 	%r303, [%rd6+0];
	sub.s32 	%r304, %r4, %r303;
	sub.s32 	%r305, %r304, 1;
	st.s32 	[%rd6+0], %r305;
$Lt_46_514:
	.loc	38	108	0
	ret;
$LDWend__Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter:
	} // _Z11ApplyFilterR8PixelRGBRiS1_iiRK15PointwiseFilter

	.entry PointwiseFilterHostKernel1 (
		.param .u64 __cudaparm_PointwiseFilterHostKernel1_inSrcImage,
		.param .s32 __cudaparm_PointwiseFilterHostKernel1_inSrcPitch,
		.param .u64 __cudaparm_PointwiseFilterHostKernel1_inDestImage,
		.param .s32 __cudaparm_PointwiseFilterHostKernel1_inDestPitch,
		.param .u32 __cudaparm_PointwiseFilterHostKernel1_inDeviceFormat,
		.param .s32 __cudaparm_PointwiseFilterHostKernel1_inWidth,
		.param .s32 __cudaparm_PointwiseFilterHostKernel1_inHeight,
		.param .align 4 .b8 __cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0[48])
	{
	.reg .u32 %r<316>;
	.reg .u64 %rd<15>;
	.reg .f32 %f<450>;
	.reg .pred %p<61>;
	.loc	38	122	0
$LDWbegin_PointwiseFilterHostKernel1:
	.loc	38	124	0
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	mov.u32 	%r4, %tid.x;
	add.u32 	%r5, %r3, %r4;
	mov.s32 	%r6, %r5;
	.loc	38	125	0
	cvt.s32.u32 	%r7, %ctaid.y;
	cvt.s32.u32 	%r8, %ntid.y;
	mul.lo.s32 	%r9, %r7, %r8;
	mov.u32 	%r10, %tid.y;
	add.u32 	%r11, %r9, %r10;
	mov.s32 	%r12, %r11;
	ld.param.s32 	%r13, [__cudaparm_PointwiseFilterHostKernel1_inHeight];
	ld.param.s32 	%r14, [__cudaparm_PointwiseFilterHostKernel1_inWidth];
	set.gt.u32.s32 	%r15, %r13, %r11;
	neg.s32 	%r16, %r15;
	set.gt.u32.s32 	%r17, %r14, %r5;
	neg.s32 	%r18, %r17;
	and.b32 	%r19, %r16, %r18;
	mov.u32 	%r20, 0;
	setp.eq.s32 	%p1, %r19, %r20;
	@%p1 bra 	$Lt_47_66050;
	ld.param.s32 	%r21, [__cudaparm_PointwiseFilterHostKernel1_inDeviceFormat];
	mov.s32 	%r22, 0;
	setp.eq.s32 	%p2, %r21, %r22;
	ld.param.u64 	%rd1, [__cudaparm_PointwiseFilterHostKernel1_inSrcImage];
	ld.param.s32 	%r23, [__cudaparm_PointwiseFilterHostKernel1_inSrcPitch];
	@!%p2 bra 	$Lt_47_55042;
	.loc	20	115	0
	mul.lo.s32 	%r24, %r23, %r11;
	add.s32 	%r25, %r5, %r24;
	cvt.s64.s32 	%rd2, %r25;
	mul.wide.s32 	%rd3, %r25, 8;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.v4.u16 	{%r26,%r27,%r28,%r29}, [%rd4+0];
	.loc	38	129	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r28;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r29;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_47_54786;
$Lt_47_55042:
	mul.lo.s32 	%r30, %r23, %r11;
	add.s32 	%r31, %r5, %r30;
	cvt.s64.s32 	%rd5, %r31;
	mul.wide.s32 	%rd6, %r31, 16;
	add.u64 	%rd7, %rd1, %rd6;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd7+0];
$Lt_47_54786:
	mov.f32 	%f5, %f1;
	mov.f32 	%f6, %f2;
	mov.f32 	%f7, %f3;
	mov.f32 	%f8, %f4;
	mov.f32 	%f9, %f8;
	.loc	38	54	0
	ld.param.u32 	%r32, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+0];
	mov.u32 	%r33, 0;
	setp.eq.s32 	%p3, %r32, %r33;
	@%p3 bra 	$Lt_47_258;
	mov.u32 	%r34, 1;
	setp.eq.s32 	%p4, %r32, %r34;
	@%p4 bra 	$Lt_47_770;
	mov.u32 	%r35, 2;
	setp.eq.s32 	%p5, %r32, %r35;
	@%p5 bra 	$Lt_47_1026;
	mov.u32 	%r36, 3;
	setp.eq.s32 	%p6, %r32, %r36;
	@%p6 bra 	$Lt_47_1282;
	mov.u32 	%r37, 4;
	setp.eq.s32 	%p7, %r32, %r37;
	@%p7 bra 	$Lt_47_1538;
	mov.u32 	%r38, 5;
	setp.eq.s32 	%p8, %r32, %r38;
	@%p8 bra 	$Lt_47_1794;
	mov.u32 	%r39, 6;
	setp.eq.s32 	%p9, %r32, %r39;
	@%p9 bra 	$Lt_47_2050;
	mov.u32 	%r40, 7;
	setp.eq.s32 	%p10, %r32, %r40;
	@%p10 bra 	$Lt_47_2306;
	mov.u32 	%r41, 8;
	setp.eq.s32 	%p11, %r32, %r41;
	@%p11 bra 	$Lt_47_2562;
	mov.u32 	%r42, 9;
	setp.eq.s32 	%p12, %r32, %r42;
	@%p12 bra 	$Lt_47_2818;
	mov.u32 	%r43, 10;
	setp.eq.s32 	%p13, %r32, %r43;
	@%p13 bra 	$Lt_47_3074;
	mov.u32 	%r44, 11;
	setp.eq.s32 	%p14, %r32, %r44;
	@%p14 bra 	$Lt_47_3330;
	mov.u32 	%r45, 12;
	setp.eq.s32 	%p15, %r32, %r45;
	@%p15 bra 	$Lt_47_3586;
	mov.u32 	%r46, 13;
	setp.eq.s32 	%p16, %r32, %r46;
	@%p16 bra 	$Lt_47_3842;
	mov.u32 	%r47, 14;
	setp.eq.s32 	%p17, %r32, %r47;
	@%p17 bra 	$Lt_47_4098;
	mov.u32 	%r48, 15;
	setp.eq.s32 	%p18, %r32, %r48;
	@%p18 bra 	$Lt_47_4354;
	mov.u32 	%r49, 16;
	setp.eq.s32 	%p19, %r32, %r49;
	@%p19 bra 	$Lt_47_4610;
	bra.uni 	$Lt_47_514;
$Lt_47_258:
	.loc	38	57	0
	ld.param.f32 	%f10, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+8];
	mov.f32 	%f11, 0f3f800000;    	// 1
	setp.eq.ftz.f32 	%p20, %f10, %f11;
	ld.param.f32 	%f12, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+12];
	mov.f32 	%f13, 0f3f800000;    	// 1
	setp.eq.ftz.f32 	%p21, %f12, %f13;
	ld.param.f32 	%f14, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+4];
	ld.param.f32 	%f15, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+16];
	mov.f32 	%f16, 0f3f800000;    	// 1
	setp.eq.ftz.f32 	%p22, %f15, %f16;
	@!%p22 bra 	$Lt_47_55554;
	.loc	21	53	0
	cvt.ftz.sat.f32.f32 	%f17, %f8;
	mov.f32 	%f18, %f14;
	mul.ftz.f32 	%f19, %f17, %f18;
	selp.f32 	%f20, %f18, %f19, %p20;
	mov.f32 	%f21, 0f3f800000;    	// 1
	sub.ftz.f32 	%f22, %f21, %f20;
	selp.f32 	%f23, %f22, %f20, %p21;
	mov.f32 	%f9, %f23;
	.loc	21	57	0
	mov.f32 	%f5, %f23;
	mov.f32 	%f6, %f23;
	mov.f32 	%f7, %f23;
	bra.uni 	$Lt_47_514;
$Lt_47_55554:
	@!%p20 bra 	$Lt_47_56066;
	.loc	21	61	0
	mov.f32 	%f18, %f14;
	mov.f32 	%f24, 0f3f800000;    	// 1
	sub.ftz.f32 	%f25, %f24, %f18;
	selp.f32 	%f9, %f25, %f18, %p21;
	bra.uni 	$Lt_47_514;
$Lt_47_56066:
	.loc	21	73	0
	cvt.ftz.sat.f32.f32 	%f26, %f8;
	mov.f32 	%f27, 0f3f800000;    	// 1
	sub.ftz.f32 	%f28, %f27, %f26;
	selp.f32 	%f29, %f28, %f26, %p21;
	mul.ftz.f32 	%f9, %f29, %f14;
	bra.uni 	$Lt_47_514;
$Lt_47_770:
	.loc	22	267	0
	ld.const.f32 	%f30, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f30, %f6;
	ld.const.f32 	%f32, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f32, %f7, %f31;
	ld.const.f32 	%f34, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f34, %f5, %f33;
	ld.const.f32 	%f36, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f37, %f36, %f35;
	.loc	22	268	0
	ld.const.f32 	%f38, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f39, %f38, %f35;
	.loc	23	44	0
	ld.const.f32 	%f40, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f40, %f35;
	mov.f32 	%f6, %f37;
	mov.f32 	%f7, %f39;
	mov.f32 	%f9, %f8;
	.loc	38	61	0
	bra.uni 	$Lt_47_514;
$Lt_47_1026:
	.loc	38	63	0
	ld.param.f32 	%f41, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+20];
	ld.param.f32 	%f42, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+12];
	ld.param.f32 	%f43, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+4];
	mov.f32 	%f44, 0f00000000;    	// 0
	setp.neu.ftz.f32 	%p23, %f43, %f44;
	@!%p23 bra 	$Lt_47_56578;
	.loc	24	44	0
	ld.param.f32 	%f45, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+8];
	sub.ftz.f32 	%f46, %f7, %f42;
	fma.rn.ftz.f32 	%f7, %f45, %f46, %f41;
	.loc	24	45	0
	sub.ftz.f32 	%f47, %f6, %f42;
	fma.rn.ftz.f32 	%f6, %f45, %f47, %f41;
	.loc	24	46	0
	sub.ftz.f32 	%f48, %f5, %f42;
	fma.rn.ftz.f32 	%f5, %f45, %f48, %f41;
	bra.uni 	$Lt_47_514;
$Lt_47_56578:
	.loc	24	50	0
	ld.param.f32 	%f49, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+24];
	setp.gt.ftz.f32 	%p24, %f7, %f42;
	selp.f32 	%f7, %f49, %f41, %p24;
	.loc	24	51	0
	setp.gt.ftz.f32 	%p25, %f6, %f42;
	selp.f32 	%f6, %f49, %f41, %p25;
	.loc	24	52	0
	setp.gt.ftz.f32 	%p26, %f5, %f42;
	selp.f32 	%f5, %f49, %f41, %p26;
	bra.uni 	$Lt_47_514;
$Lt_47_1282:
	.loc	25	47	0
	ld.param.f32 	%f50, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+4];
	mul.ftz.f32 	%f5, %f50, %f5;
	ld.param.f32 	%f51, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+8];
	mul.ftz.f32 	%f6, %f51, %f6;
	ld.param.f32 	%f52, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+12];
	mul.ftz.f32 	%f7, %f52, %f7;
	.loc	38	67	0
	bra.uni 	$Lt_47_514;
$Lt_47_1538:
	.loc	26	48	0
	ld.param.f32 	%f53, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+4];
	mov.f32 	%f54, 0f00000000;    	// 0
	setp.eq.ftz.f32 	%p27, %f53, %f54;
	ld.param.f32 	%f55, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+8];
	mov.f32 	%f56, 0f00000000;    	// 0
	max.ftz.f32 	%f57, %f5, %f56;
	mov.f32 	%f58, 0f3f800000;    	// 1
	min.ftz.f32 	%f59, %f57, %f58;
	ld.param.f32 	%f60, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+12];
	sub.ftz.f32 	%f61, %f59, %f60;
	abs.ftz.f32 	%f62, %f61;
	mov.f32 	%f63, 0f00000000;    	// 0
	max.ftz.f32 	%f64, %f6, %f63;
	mov.f32 	%f65, 0f3f800000;    	// 1
	min.ftz.f32 	%f66, %f64, %f65;
	ld.param.f32 	%f67, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+16];
	sub.ftz.f32 	%f68, %f66, %f67;
	abs.ftz.f32 	%f69, %f68;
	mov.f32 	%f70, 0f00000000;    	// 0
	max.ftz.f32 	%f71, %f8, %f70;
	mov.f32 	%f72, 0f3f800000;    	// 1
	min.ftz.f32 	%f73, %f71, %f72;
	sub.ftz.f32 	%f74, %f73, %f8;
	abs.ftz.f32 	%f75, %f74;
	mov.f32 	%f76, 0f00000000;    	// 0
	max.ftz.f32 	%f77, %f7, %f76;
	mov.f32 	%f78, 0f3f800000;    	// 1
	min.ftz.f32 	%f79, %f77, %f78;
	ld.param.f32 	%f80, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+20];
	sub.ftz.f32 	%f81, %f79, %f80;
	abs.ftz.f32 	%f82, %f81;
	max.ftz.f32 	%f83, %f75, %f82;
	max.ftz.f32 	%f84, %f69, %f83;
	max.ftz.f32 	%f85, %f62, %f84;
	setp.ge.ftz.f32 	%p28, %f55, %f85;
	xor.pred 	%p29, %p27, %p28;
	@!%p29 bra 	$Lt_47_514;
	.loc	22	267	0
	ld.const.f32 	%f86, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f86, %f6;
	ld.const.f32 	%f87, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f87, %f7, %f31;
	ld.const.f32 	%f88, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f88, %f5, %f33;
	ld.const.f32 	%f89, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f90, %f89, %f35;
	.loc	22	268	0
	ld.const.f32 	%f91, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f92, %f91, %f35;
	.loc	23	44	0
	ld.const.f32 	%f93, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f93, %f35;
	mov.f32 	%f6, %f90;
	mov.f32 	%f7, %f92;
	bra.uni 	$Lt_47_514;
$Lt_47_1794:
	.loc	27	48	0
	ld.param.f32 	%f94, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+8];
	mov.f32 	%f95, 0f00000000;    	// 0
	max.ftz.f32 	%f96, %f5, %f95;
	mov.f32 	%f97, 0f3f800000;    	// 1
	min.ftz.f32 	%f98, %f96, %f97;
	ld.param.f32 	%f99, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+12];
	sub.ftz.f32 	%f100, %f98, %f99;
	abs.ftz.f32 	%f101, %f100;
	mov.f32 	%f102, 0f00000000;   	// 0
	max.ftz.f32 	%f103, %f6, %f102;
	mov.f32 	%f104, 0f3f800000;   	// 1
	min.ftz.f32 	%f105, %f103, %f104;
	ld.param.f32 	%f106, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+16];
	sub.ftz.f32 	%f107, %f105, %f106;
	abs.ftz.f32 	%f108, %f107;
	mov.f32 	%f109, 0f00000000;   	// 0
	max.ftz.f32 	%f110, %f8, %f109;
	mov.f32 	%f111, 0f3f800000;   	// 1
	min.ftz.f32 	%f112, %f110, %f111;
	sub.ftz.f32 	%f113, %f112, %f8;
	abs.ftz.f32 	%f114, %f113;
	mov.f32 	%f115, 0f00000000;   	// 0
	max.ftz.f32 	%f116, %f7, %f115;
	mov.f32 	%f117, 0f3f800000;   	// 1
	min.ftz.f32 	%f118, %f116, %f117;
	ld.param.f32 	%f119, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+20];
	sub.ftz.f32 	%f120, %f118, %f119;
	abs.ftz.f32 	%f121, %f120;
	max.ftz.f32 	%f122, %f114, %f121;
	max.ftz.f32 	%f123, %f108, %f122;
	max.ftz.f32 	%f124, %f101, %f123;
	setp.ge.ftz.f32 	%p30, %f94, %f124;
	@!%p30 bra 	$Lt_47_514;
	.loc	27	51	0
	ld.param.f32 	%f125, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+24];
	mov.f32 	%f126, %f125;
	ld.param.f32 	%f127, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+28];
	mov.f32 	%f128, %f127;
	ld.param.f32 	%f129, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+32];
	mov.f32 	%f130, %f129;
	ld.param.f32 	%f131, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+4];
	mov.f32 	%f132, 0f00000000;   	// 0
	setp.eq.ftz.f32 	%p31, %f131, %f132;
	@!%p31 bra 	$Lt_47_57858;
	.loc	27	60	0
	ld.const.f32 	%f133, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f133, %f6;
	ld.const.f32 	%f134, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f134, %f7, %f31;
	ld.const.f32 	%f135, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f135, %f5, %f33;
	mul.ftz.f32 	%f126, %f35, %f125;
	.loc	27	61	0
	mul.ftz.f32 	%f128, %f35, %f127;
	.loc	27	62	0
	mul.ftz.f32 	%f130, %f35, %f129;
$Lt_47_57858:
	.loc	27	65	0
	mov.f32 	%f5, %f126;
	mov.f32 	%f6, %f128;
	mov.f32 	%f7, %f130;
	bra.uni 	$Lt_47_514;
$Lt_47_2050:
	.loc	28	47	0
	sub.s32 	%r50, %r13, %r11;
	sub.s32 	%r51, %r14, %r5;
	cvt.rn.f32.s32 	%f136, %r5;
	cvt.rn.f32.s32 	%f137, %r11;
	cvt.rn.f32.s32 	%f138, %r50;
	cvt.rn.f32.s32 	%f139, %r51;
	ld.param.f32 	%f18, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+4];
	sub.ftz.f32 	%f140, %f18, %f136;
	sub.ftz.f32 	%f141, %f18, %f137;
	sub.ftz.f32 	%f142, %f18, %f138;
	sub.ftz.f32 	%f143, %f18, %f139;
	cvt.rzi.ftz.s32.f32 	%r52, %f140;
	cvt.rzi.ftz.s32.f32 	%r53, %f141;
	cvt.rzi.ftz.s32.f32 	%r54, %f142;
	cvt.rzi.ftz.s32.f32 	%r55, %f143;
	max.s32 	%r56, %r52, %r53;
	max.s32 	%r57, %r55, %r56;
	max.s32 	%r58, %r54, %r57;
	mov.u32 	%r59, 0;
	setp.le.s32 	%p32, %r58, %r59;
	@%p32 bra 	$Lt_47_514;
	.loc	28	51	0
	cvt.rn.f32.s32 	%f144, %r58;
	sub.ftz.f32 	%f145, %f18, %f144;
	div.approx.ftz.f32 	%f146, %f145, %f18;
	mul.ftz.f32 	%f9, %f8, %f146;
	bra.uni 	$Lt_47_514;
$Lt_47_2306:
	.loc	30	50	0
	ld.const.f32 	%f147, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f147, %f6;
	ld.param.f32 	%f148, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+12];
	ld.param.f32 	%f45, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+8];
	sub.ftz.f32 	%f149, %f148, %f45;
	ld.const.f32 	%f150, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f150, %f7, %f31;
	ld.param.f32 	%f151, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+16];
	mul.ftz.f32 	%f152, %f151, %f149;
	ld.const.f32 	%f153, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f153, %f5, %f33;
	add.ftz.f32 	%f154, %f152, %f148;
	sub.ftz.f32 	%f155, %f45, %f152;
	mov.f32 	%f156, 0f00000000;   	// 0
	max.ftz.f32 	%f157, %f154, %f156;
	mov.f32 	%f158, 0f00000000;   	// 0
	max.ftz.f32 	%f159, %f155, %f158;
	mov.f32 	%f160, 0f3f800000;   	// 1
	min.ftz.f32 	%f161, %f157, %f160;
	mov.f32 	%f162, 0f3f800000;   	// 1
	min.ftz.f32 	%f163, %f159, %f162;
	set.gt.ftz.u32.f32 	%r60, %f163, %f35;
	neg.s32 	%r61, %r60;
	set.le.ftz.u32.f32 	%r62, %f161, %f35;
	neg.s32 	%r63, %r62;
	or.b32 	%r64, %r61, %r63;
	mov.u32 	%r65, 0;
	setp.eq.s32 	%p33, %r64, %r65;
	@%p33 bra 	$Lt_47_59138;
	mov.f32 	%f164, 0f00000000;   	// 0
	bra.uni 	$Lt_47_59906;
$Lt_47_59138:
	add.ftz.f32 	%f165, %f152, %f45;
	mov.f32 	%f166, 0f00000000;   	// 0
	max.ftz.f32 	%f167, %f165, %f166;
	mov.f32 	%f168, 0f3f800000;   	// 1
	min.ftz.f32 	%f169, %f167, %f168;
	set.le.ftz.u32.f32 	%r66, %f169, %f35;
	neg.s32 	%r67, %r66;
	sub.ftz.f32 	%f170, %f148, %f152;
	mov.f32 	%f171, 0f00000000;   	// 0
	max.ftz.f32 	%f172, %f170, %f171;
	mov.f32 	%f173, 0f3f800000;   	// 1
	min.ftz.f32 	%f174, %f172, %f173;
	set.lt.ftz.u32.f32 	%r68, %f35, %f174;
	neg.s32 	%r69, %r68;
	and.b32 	%r70, %r67, %r69;
	mov.u32 	%r71, 0;
	setp.eq.s32 	%p34, %r70, %r71;
	@%p34 bra 	$Lt_47_59650;
	mov.f32 	%f164, 0f3f800000;   	// 1
	bra.uni 	$Lt_47_59906;
$Lt_47_59650:
	add.ftz.f32 	%f175, %f152, %f152;
	setp.gt.ftz.f32 	%p35, %f169, %f35;
	@!%p35 bra 	$Lt_47_60162;
	.loc	30	62	0
	sub.ftz.f32 	%f176, %f35, %f163;
	div.approx.ftz.f32 	%f164, %f176, %f175;
	bra.uni 	$Lt_47_59906;
$Lt_47_60162:
	.loc	30	66	0
	sub.ftz.f32 	%f177, %f161, %f35;
	div.approx.ftz.f32 	%f164, %f177, %f175;
$Lt_47_59906:
$Lt_47_59394:
$Lt_47_58882:
	.loc	30	69	0
	mov.f32 	%f178, 0f3f800000;   	// 1
	sub.ftz.f32 	%f179, %f178, %f164;
	ld.param.f32 	%f180, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+4];
	mov.f32 	%f181, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p36, %f180, %f181;
	selp.f32 	%f164, %f179, %f164, %p36;
	.loc	30	77	0
	ld.const.f32 	%f182, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f182, %f164;
	ld.const.f32 	%f183, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f6, %f183, %f164;
	ld.const.f32 	%f184, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f7, %f184, %f164;
	.loc	38	79	0
	bra.uni 	$Lt_47_514;
$Lt_47_2562:
	.loc	38	80	0
	ld.param.f32 	%f185, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+44];
	mov.f32 	%f186, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p37, %f185, %f186;
	@!%p37 bra 	$L_47_52482;
	cvt.rn.f32.s32 	%f187, %r5;
	cvt.rn.f32.s32 	%f188, %r14;
	mul.ftz.f32 	%f189, %f188, %f185;
	setp.lt.ftz.f32 	%p38, %f187, %f189;
	@%p38 bra 	$L_47_52226;
$L_47_52482:
	mov.f32 	%f190, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p39, %f185, %f190;
	@!%p39 bra 	$Lt_47_514;
	cvt.rn.f32.s32 	%f191, %r11;
	cvt.rn.f32.s32 	%f192, %r13;
	mul.ftz.f32 	%f193, %f192, %f185;
	neg.ftz.f32 	%f194, %f193;
	setp.lt.ftz.f32 	%p40, %f191, %f194;
	@!%p40 bra 	$Lt_47_514;
$L_47_52226:
	.loc	31	47	0
	ld.const.f32 	%f195, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f196, %f195, %f6;
	ld.const.f32 	%f197, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f198, %f197, %f7, %f196;
	ld.const.f32 	%f199, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f199, %f5, %f198;
	mov.f32 	%f200, %f35;
	ld.param.f32 	%f45, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+8];
	ld.param.f32 	%f201, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+4];
	sub.ftz.f32 	%f202, %f201, %f45;
	ld.param.f32 	%f203, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+16];
	ld.param.f32 	%f204, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+12];
	sub.ftz.f32 	%f205, %f204, %f203;
	mov.f32 	%f206, 0f3f800000;   	// 1
	set.neu.ftz.u32.f32 	%r72, %f205, %f206;
	neg.s32 	%r73, %r72;
	mov.f32 	%f207, 0f3f800000;   	// 1
	set.neu.ftz.u32.f32 	%r74, %f202, %f207;
	neg.s32 	%r75, %r74;
	or.b32 	%r76, %r73, %r75;
	mov.u32 	%r77, 0;
	setp.eq.s32 	%p41, %r76, %r77;
	@%p41 bra 	$Lt_47_60418;
	.loc	20	143	0
	mov.s32 	%r78, 1;
	sub.s32 	%r79, %r78, %r5;
	shr.u32 	%r80, %r11, 13;
	sub.u32 	%r81, %r5, %r11;
	sub.u32 	%r82, %r79, %r11;
	xor.b32 	%r83, %r80, %r82;
	shl.b32 	%r84, %r83, 8;
	sub.u32 	%r85, %r81, %r83;
	sub.u32 	%r86, %r11, %r83;
	xor.b32 	%r87, %r84, %r85;
	shr.u32 	%r88, %r87, 13;
	sub.u32 	%r89, %r86, %r87;
	sub.u32 	%r90, %r83, %r87;
	xor.b32 	%r91, %r88, %r89;
	shr.u32 	%r92, %r91, 12;
	sub.u32 	%r93, %r90, %r91;
	xor.b32 	%r94, %r92, %r93;
	sub.u32 	%r95, %r87, %r91;
	sub.u32 	%r96, %r95, %r94;
	shl.b32 	%r97, %r94, 16;
	xor.b32 	%r98, %r96, %r97;
	.loc	20	144	0
	sub.u32 	%r99, %r91, %r94;
	sub.u32 	%r100, %r99, %r98;
	shr.u32 	%r101, %r98, 5;
	xor.b32 	%r102, %r100, %r101;
	.loc	20	145	0
	sub.u32 	%r103, %r94, %r98;
	sub.u32 	%r104, %r103, %r102;
	shr.u32 	%r105, %r102, 3;
	xor.b32 	%r106, %r104, %r105;
	.loc	20	146	0
	sub.u32 	%r107, %r98, %r102;
	sub.u32 	%r108, %r107, %r106;
	shl.b32 	%r109, %r106, 10;
	xor.b32 	%r110, %r108, %r109;
	.loc	20	147	0
	sub.u32 	%r111, %r102, %r106;
	sub.u32 	%r112, %r111, %r110;
	shr.u32 	%r113, %r110, 15;
	xor.b32 	%r114, %r112, %r113;
	.loc	31	57	0
	mov.f32 	%f208, 0f3b270d73;   	// 0.00254902
	mul.lo.u32 	%r115, %r114, 1103515245;
	add.u32 	%r116, %r115, 12345;
	shr.u32 	%r117, %r116, 16;
	and.b32 	%r118, %r117, 255;
	shl.b32 	%r119, %r118, 7;
	mul.lo.u32 	%r120, %r114, -1029531031;
	sub.u32 	%r121, %r120, 740551042;
	shr.u32 	%r122, %r121, 16;
	and.b32 	%r123, %r122, 255;
	xor.b32 	%r124, %r119, %r123;
	cvt.rn.f32.s32 	%f209, %r124;
	mov.f32 	%f210, 0f467ffe00;   	// 16383.5
	div.approx.ftz.f32 	%f211, %f209, %f210;
	mov.f32 	%f212, 0fbf800000;   	// -1
	add.ftz.f32 	%f213, %f211, %f212;
	fma.rn.ftz.f32 	%f200, %f208, %f213, %f35;
$Lt_47_60418:
	sub.ftz.f32 	%f214, %f200, %f45;
	ld.param.f32 	%f215, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+20];
	mov.f32 	%f216, 0f3f800000;   	// 1
	setp.neu.ftz.f32 	%p42, %f215, %f216;
	@!%p42 bra 	$Lt_47_61186;
	mov.f32 	%f217, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p43, %f214, %f217;
	@!%p43 bra 	$Lt_47_61698;
	.loc	31	66	0
	mov.f32 	%f200, %f203;
	bra.uni 	$Lt_47_60930;
$Lt_47_61698:
	.loc	31	70	0
	rcp.approx.ftz.f32 	%f218, %f202;
	mul.ftz.f32 	%f219, %f214, %f218;
	lg2.approx.ftz.f32 	%f220, %f219;
	mul.ftz.f32 	%f221, %f215, %f220;
	ex2.approx.ftz.f32 	%f222, %f221;
	fma.rn.ftz.f32 	%f200, %f205, %f222, %f203;
	bra.uni 	$Lt_47_60930;
$Lt_47_61186:
	.loc	31	77	0
	rcp.approx.ftz.f32 	%f223, %f202;
	mul.ftz.f32 	%f224, %f214, %f223;
	fma.rn.ftz.f32 	%f200, %f205, %f224, %f203;
$Lt_47_60930:
	.loc	22	267	0
	ld.param.f32 	%f225, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+28];
	sin.approx.ftz.f32 	%f226, %f225;
	ld.const.f32 	%f227, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f228, %f227, %f6;
	ld.const.f32 	%f229, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f230, %f229, %f6;
	cos.approx.ftz.f32 	%f231, %f225;
	ld.const.f32 	%f232, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f233, %f232, %f7, %f228;
	ld.const.f32 	%f234, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f235, %f234, %f7, %f230;
	ld.const.f32 	%f236, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f237, %f236, %f5, %f233;
	ld.const.f32 	%f238, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f239, %f238, %f5, %f235;
	mul.ftz.f32 	%f240, %f226, %f237;
	mul.ftz.f32 	%f241, %f231, %f237;
	mul.ftz.f32 	%f242, %f239, %f231;
	sub.ftz.f32 	%f243, %f242, %f240;
	fma.rn.ftz.f32 	%f244, %f239, %f226, %f241;
	ld.param.f32 	%f245, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+32];
	sub.ftz.f32 	%f246, %f245, %f243;
	ld.param.f32 	%f247, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+36];
	sub.ftz.f32 	%f248, %f247, %f244;
	ld.param.f32 	%f249, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+40];
	fma.rn.ftz.f32 	%f250, %f249, %f246, %f243;
	fma.rn.ftz.f32 	%f251, %f249, %f248, %f244;
	ld.param.f32 	%f252, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+24];
	mul.ftz.f32 	%f253, %f250, %f252;
	mul.ftz.f32 	%f254, %f251, %f252;
	ld.const.f32 	%f255, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f256, %f255, %f253;
	ld.const.f32 	%f257, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f258, %f257, %f200, %f256;
	ld.const.f32 	%f259, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f260, %f259, %f254, %f258;
	.loc	22	268	0
	ld.const.f32 	%f261, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f262, %f261, %f253;
	ld.const.f32 	%f263, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f264, %f263, %f200, %f262;
	ld.const.f32 	%f265, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f266, %f265, %f254, %f264;
	.loc	31	92	0
	ld.const.f32 	%f267, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f268, %f267, %f253;
	ld.const.f32 	%f269, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f270, %f269, %f200, %f268;
	ld.const.f32 	%f271, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f5, %f271, %f254, %f270;
	mov.f32 	%f6, %f260;
	mov.f32 	%f7, %f266;
	bra.uni 	$Lt_47_514;
$Lt_47_2818:
	.loc	38	83	0
	ld.param.f32 	%f272, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+44];
	mov.f32 	%f273, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p44, %f272, %f273;
	@!%p44 bra 	$L_47_53250;
	cvt.rn.f32.s32 	%f274, %r5;
	cvt.rn.f32.s32 	%f275, %r14;
	mul.ftz.f32 	%f276, %f275, %f272;
	setp.lt.ftz.f32 	%p45, %f274, %f276;
	@%p45 bra 	$L_47_52994;
$L_47_53250:
	mov.f32 	%f277, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p46, %f272, %f277;
	@!%p46 bra 	$Lt_47_514;
	cvt.rn.f32.s32 	%f278, %r11;
	cvt.rn.f32.s32 	%f279, %r13;
	mul.ftz.f32 	%f280, %f279, %f272;
	neg.ftz.f32 	%f281, %f280;
	setp.lt.ftz.f32 	%p47, %f278, %f281;
	@!%p47 bra 	$Lt_47_514;
$L_47_52994:
	.loc	31	110	0
	ld.const.f32 	%f282, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f282, %f6;
	ld.param.f32 	%f45, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+8];
	ld.param.f32 	%f283, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+4];
	sub.ftz.f32 	%f284, %f283, %f45;
	ld.param.f32 	%f285, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+16];
	ld.param.f32 	%f286, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+12];
	sub.ftz.f32 	%f287, %f286, %f285;
	ld.const.f32 	%f288, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f288, %f7, %f31;
	ld.const.f32 	%f289, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f289, %f5, %f33;
	sub.ftz.f32 	%f290, %f35, %f45;
	ld.param.f32 	%f291, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+20];
	mov.f32 	%f292, 0f3f800000;   	// 1
	setp.neu.ftz.f32 	%p48, %f291, %f292;
	@!%p48 bra 	$Lt_47_62210;
	.loc	42	523	0
	mov.f32 	%f293, 0f00000000;   	// 0
	max.ftz.f32 	%f294, %f290, %f293;
	div.approx.ftz.f32 	%f295, %f294, %f284;
	lg2.approx.ftz.f32 	%f296, %f295;
	mul.ftz.f32 	%f297, %f291, %f296;
	ex2.approx.ftz.f32 	%f298, %f297;
	.loc	31	120	0
	fma.rn.ftz.f32 	%f299, %f287, %f298, %f285;
	bra.uni 	$Lt_47_61954;
$Lt_47_62210:
	.loc	31	129	0
	div.approx.ftz.f32 	%f300, %f290, %f284;
	fma.rn.ftz.f32 	%f299, %f287, %f300, %f285;
$Lt_47_61954:
	.loc	31	135	0
	ld.const.f32 	%f301, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f301, %f299;
	ld.const.f32 	%f302, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f6, %f302, %f299;
	ld.const.f32 	%f303, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f7, %f303, %f299;
	bra.uni 	$Lt_47_514;
$Lt_47_3074:
	.loc	38	86	0
	ld.param.f32 	%f304, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+44];
	mov.f32 	%f305, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p49, %f304, %f305;
	@!%p49 bra 	$L_47_54018;
	cvt.rn.f32.s32 	%f306, %r5;
	cvt.rn.f32.s32 	%f307, %r14;
	mul.ftz.f32 	%f308, %f307, %f304;
	setp.lt.ftz.f32 	%p50, %f306, %f308;
	@%p50 bra 	$L_47_53762;
$L_47_54018:
	mov.f32 	%f309, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p51, %f304, %f309;
	@!%p51 bra 	$Lt_47_514;
	cvt.rn.f32.s32 	%f310, %r11;
	cvt.rn.f32.s32 	%f311, %r13;
	mul.ftz.f32 	%f312, %f311, %f304;
	neg.ftz.f32 	%f313, %f312;
	setp.lt.ftz.f32 	%p52, %f310, %f313;
	@!%p52 bra 	$Lt_47_514;
$L_47_53762:
	.loc	31	160	0
	ld.const.f32 	%f5, [k601YPbPr_To_RGB32f+24];
	ld.const.f32 	%f6, [k601YPbPr_To_RGB32f+12];
	ld.const.f32 	%f7, [k601YPbPr_To_RGB32f+0];
	bra.uni 	$Lt_47_514;
$Lt_47_3330:
	.loc	32	42	0
	ld.param.f32 	%f314, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+4];
	mov.f32 	%f315, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p53, %f5, %f315;
	@!%p53 bra 	$Lt_47_62722;
	.loc	22	292	0
	mov.f32 	%f18, %f314;
	lg2.approx.ftz.f32 	%f316, %f5;
	mul.ftz.f32 	%f317, %f18, %f316;
	ex2.approx.ftz.f32 	%f318, %f317;
	bra.uni 	$Lt_47_62466;
$Lt_47_62722:
	mov.f32 	%f18, %f314;
	neg.ftz.f32 	%f319, %f5;
	lg2.approx.ftz.f32 	%f320, %f319;
	mul.ftz.f32 	%f321, %f18, %f320;
	ex2.approx.ftz.f32 	%f322, %f321;
	neg.ftz.f32 	%f318, %f322;
$Lt_47_62466:
	mov.f32 	%f323, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p54, %f6, %f323;
	@!%p54 bra 	$Lt_47_63234;
	.loc	22	293	0
	lg2.approx.ftz.f32 	%f324, %f6;
	mul.ftz.f32 	%f325, %f18, %f324;
	ex2.approx.ftz.f32 	%f326, %f325;
	bra.uni 	$Lt_47_62978;
$Lt_47_63234:
	neg.ftz.f32 	%f327, %f6;
	lg2.approx.ftz.f32 	%f328, %f327;
	mul.ftz.f32 	%f329, %f18, %f328;
	ex2.approx.ftz.f32 	%f330, %f329;
	neg.ftz.f32 	%f326, %f330;
$Lt_47_62978:
	mov.f32 	%f331, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p55, %f7, %f331;
	@!%p55 bra 	$Lt_47_63746;
	.loc	22	294	0
	lg2.approx.ftz.f32 	%f332, %f7;
	mul.ftz.f32 	%f333, %f18, %f332;
	ex2.approx.ftz.f32 	%f334, %f333;
	bra.uni 	$Lt_47_63490;
$Lt_47_63746:
	neg.ftz.f32 	%f335, %f7;
	lg2.approx.ftz.f32 	%f336, %f335;
	mul.ftz.f32 	%f337, %f18, %f336;
	ex2.approx.ftz.f32 	%f338, %f337;
	neg.ftz.f32 	%f334, %f338;
$Lt_47_63490:
	mov.f32 	%f339, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p56, %f8, %f339;
	@!%p56 bra 	$Lt_47_64258;
	.loc	22	295	0
	lg2.approx.ftz.f32 	%f340, %f8;
	mul.ftz.f32 	%f341, %f18, %f340;
	ex2.approx.ftz.f32 	%f342, %f341;
	bra.uni 	$Lt_47_64002;
$Lt_47_64258:
	neg.ftz.f32 	%f343, %f8;
	lg2.approx.ftz.f32 	%f344, %f343;
	mul.ftz.f32 	%f345, %f18, %f344;
	ex2.approx.ftz.f32 	%f346, %f345;
	neg.ftz.f32 	%f342, %f346;
$Lt_47_64002:
	.loc	32	42	0
	mov.f32 	%f5, %f318;
	mov.f32 	%f6, %f326;
	mov.f32 	%f7, %f334;
	mov.f32 	%f9, %f342;
	.loc	38	91	0
	bra.uni 	$Lt_47_514;
$Lt_47_3586:
	.loc	33	41	0
	sub.s32 	%r125, %r14, %r5;
	sub.s32 	%r6, %r125, 1;
	.loc	38	94	0
	bra.uni 	$Lt_47_514;
$Lt_47_3842:
	.loc	38	96	0
	ld.param.f32 	%f18, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+4];
	mov.f32 	%f347, 0f3f000000;   	// 0.5
	mul.ftz.f32 	%f348, %f18, %f347;
	ld.param.f32 	%f349, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+16];
	ld.param.f32 	%f350, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+8];
	mov.f32 	%f351, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p57, %f350, %f351;
	@!%p57 bra 	$Lt_47_64770;
	.loc	20	143	0
	cvt.rzi.ftz.u32.f32 	%r126, %f349;
	mul.lo.s32 	%r127, %r5, 3;
	shr.u32 	%r128, %r126, 13;
	sub.u32 	%r129, %r11, %r126;
	sub.u32 	%r130, %r127, %r11;
	sub.u32 	%r131, %r130, %r126;
	xor.b32 	%r132, %r131, %r128;
	shl.b32 	%r133, %r132, 8;
	sub.u32 	%r134, %r129, %r132;
	sub.u32 	%r135, %r126, %r132;
	xor.b32 	%r136, %r133, %r134;
	shr.u32 	%r137, %r136, 13;
	sub.u32 	%r138, %r135, %r136;
	sub.u32 	%r139, %r132, %r136;
	xor.b32 	%r140, %r137, %r138;
	shr.u32 	%r141, %r140, 12;
	sub.u32 	%r142, %r139, %r140;
	xor.b32 	%r143, %r141, %r142;
	shl.b32 	%r144, %r143, 16;
	sub.u32 	%r145, %r136, %r140;
	sub.u32 	%r146, %r145, %r143;
	xor.b32 	%r147, %r144, %r146;
	.loc	20	144	0
	sub.u32 	%r148, %r140, %r143;
	sub.u32 	%r149, %r148, %r147;
	shr.u32 	%r150, %r147, 5;
	xor.b32 	%r151, %r149, %r150;
	.loc	20	145	0
	sub.u32 	%r152, %r143, %r147;
	sub.u32 	%r153, %r152, %r151;
	shr.u32 	%r154, %r151, 3;
	xor.b32 	%r155, %r153, %r154;
	.loc	20	146	0
	sub.u32 	%r156, %r147, %r151;
	sub.u32 	%r157, %r156, %r155;
	shl.b32 	%r158, %r155, 10;
	xor.b32 	%r159, %r157, %r158;
	.loc	20	147	0
	sub.u32 	%r160, %r151, %r155;
	sub.u32 	%r161, %r160, %r159;
	shr.u32 	%r162, %r159, 15;
	xor.b32 	%r163, %r161, %r162;
	.loc	34	48	0
	mul.lo.u32 	%r164, %r163, 1103515245;
	add.u32 	%r165, %r164, 12345;
	shr.u32 	%r166, %r165, 16;
	and.b32 	%r167, %r166, 255;
	shl.b32 	%r168, %r167, 7;
	mul.lo.u32 	%r169, %r163, -1029531031;
	sub.u32 	%r170, %r169, 740551042;
	shr.u32 	%r171, %r170, 16;
	and.b32 	%r172, %r171, 255;
	xor.b32 	%r173, %r168, %r172;
	cvt.rn.f32.s32 	%f352, %r173;
	mov.f32 	%f353, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f354, %f352, %f353;
	mul.ftz.f32 	%f355, %f18, %f354;
	sub.ftz.f32 	%f356, %f355, %f348;
	.loc	20	143	0
	add.u32 	%r174, %r131, 1;
	xor.b32 	%r175, %r174, %r128;
	shl.b32 	%r176, %r175, 8;
	sub.u32 	%r177, %r129, %r175;
	sub.u32 	%r178, %r126, %r175;
	xor.b32 	%r179, %r176, %r177;
	shr.u32 	%r180, %r179, 13;
	sub.u32 	%r181, %r178, %r179;
	sub.u32 	%r182, %r175, %r179;
	xor.b32 	%r183, %r180, %r181;
	shr.u32 	%r184, %r183, 12;
	sub.u32 	%r185, %r182, %r183;
	xor.b32 	%r186, %r184, %r185;
	sub.u32 	%r187, %r179, %r183;
	sub.u32 	%r188, %r187, %r186;
	shl.b32 	%r189, %r186, 16;
	xor.b32 	%r190, %r188, %r189;
	.loc	20	144	0
	sub.u32 	%r191, %r183, %r186;
	sub.u32 	%r192, %r191, %r190;
	shr.u32 	%r193, %r190, 5;
	xor.b32 	%r194, %r192, %r193;
	.loc	20	145	0
	sub.u32 	%r195, %r186, %r190;
	sub.u32 	%r196, %r195, %r194;
	shr.u32 	%r197, %r194, 3;
	xor.b32 	%r198, %r196, %r197;
	.loc	20	146	0
	sub.u32 	%r199, %r190, %r194;
	sub.u32 	%r200, %r199, %r198;
	shl.b32 	%r201, %r198, 10;
	xor.b32 	%r202, %r200, %r201;
	.loc	20	147	0
	sub.u32 	%r203, %r194, %r198;
	sub.u32 	%r204, %r203, %r202;
	shr.u32 	%r205, %r202, 15;
	xor.b32 	%r206, %r204, %r205;
	.loc	34	49	0
	mul.lo.u32 	%r207, %r206, 1103515245;
	add.u32 	%r208, %r207, 12345;
	shr.u32 	%r209, %r208, 16;
	and.b32 	%r210, %r209, 255;
	shl.b32 	%r211, %r210, 7;
	mul.lo.u32 	%r212, %r206, -1029531031;
	sub.u32 	%r213, %r212, 740551042;
	shr.u32 	%r214, %r213, 16;
	and.b32 	%r215, %r214, 255;
	xor.b32 	%r216, %r211, %r215;
	cvt.rn.f32.s32 	%f357, %r216;
	mov.f32 	%f358, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f359, %f357, %f358;
	mul.ftz.f32 	%f360, %f18, %f359;
	sub.ftz.f32 	%f361, %f360, %f348;
	.loc	20	143	0
	add.u32 	%r217, %r131, 2;
	xor.b32 	%r218, %r217, %r128;
	shl.b32 	%r219, %r218, 8;
	sub.u32 	%r220, %r129, %r218;
	sub.u32 	%r221, %r126, %r218;
	xor.b32 	%r222, %r219, %r220;
	shr.u32 	%r223, %r222, 13;
	sub.u32 	%r224, %r221, %r222;
	sub.u32 	%r225, %r218, %r222;
	xor.b32 	%r226, %r223, %r224;
	shr.u32 	%r227, %r226, 12;
	sub.u32 	%r228, %r225, %r226;
	xor.b32 	%r229, %r227, %r228;
	sub.u32 	%r230, %r222, %r226;
	sub.u32 	%r231, %r230, %r229;
	shl.b32 	%r232, %r229, 16;
	xor.b32 	%r233, %r231, %r232;
	.loc	20	144	0
	sub.u32 	%r234, %r226, %r229;
	sub.u32 	%r235, %r234, %r233;
	shr.u32 	%r236, %r233, 5;
	xor.b32 	%r237, %r235, %r236;
	.loc	20	145	0
	sub.u32 	%r238, %r229, %r233;
	sub.u32 	%r239, %r238, %r237;
	shr.u32 	%r240, %r237, 3;
	xor.b32 	%r241, %r239, %r240;
	.loc	20	146	0
	sub.u32 	%r242, %r233, %r237;
	sub.u32 	%r243, %r242, %r241;
	shl.b32 	%r244, %r241, 10;
	xor.b32 	%r245, %r243, %r244;
	.loc	20	147	0
	sub.u32 	%r246, %r237, %r241;
	sub.u32 	%r247, %r246, %r245;
	shr.u32 	%r248, %r245, 15;
	xor.b32 	%r249, %r247, %r248;
	.loc	34	50	0
	mul.lo.u32 	%r250, %r249, 1103515245;
	add.u32 	%r251, %r250, 12345;
	shr.u32 	%r252, %r251, 16;
	and.b32 	%r253, %r252, 255;
	shl.b32 	%r254, %r253, 7;
	mul.lo.u32 	%r255, %r249, -1029531031;
	sub.u32 	%r256, %r255, 740551042;
	shr.u32 	%r257, %r256, 16;
	and.b32 	%r258, %r257, 255;
	xor.b32 	%r259, %r254, %r258;
	cvt.rn.f32.s32 	%f362, %r259;
	mov.f32 	%f363, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f364, %f362, %f363;
	mul.ftz.f32 	%f365, %f18, %f364;
	sub.ftz.f32 	%f366, %f365, %f348;
	bra.uni 	$Lt_47_64514;
$Lt_47_64770:
	.loc	20	143	0
	cvt.rzi.ftz.u32.f32 	%r260, %f349;
	sub.u32 	%r261, %r5, %r11;
	shr.u32 	%r262, %r260, 13;
	sub.u32 	%r263, %r11, %r260;
	sub.u32 	%r264, %r261, %r260;
	xor.b32 	%r265, %r264, %r262;
	shl.b32 	%r266, %r265, 8;
	sub.u32 	%r267, %r263, %r265;
	sub.u32 	%r268, %r260, %r265;
	xor.b32 	%r269, %r266, %r267;
	shr.u32 	%r270, %r269, 13;
	sub.u32 	%r271, %r268, %r269;
	sub.u32 	%r272, %r265, %r269;
	xor.b32 	%r273, %r270, %r271;
	shr.u32 	%r274, %r273, 12;
	sub.u32 	%r275, %r272, %r273;
	xor.b32 	%r276, %r274, %r275;
	shl.b32 	%r277, %r276, 16;
	sub.u32 	%r278, %r269, %r273;
	sub.u32 	%r279, %r278, %r276;
	xor.b32 	%r280, %r277, %r279;
	.loc	20	144	0
	sub.u32 	%r281, %r273, %r276;
	sub.u32 	%r282, %r281, %r280;
	shr.u32 	%r283, %r280, 5;
	xor.b32 	%r284, %r282, %r283;
	.loc	20	145	0
	sub.u32 	%r285, %r276, %r280;
	sub.u32 	%r286, %r285, %r284;
	shr.u32 	%r287, %r284, 3;
	xor.b32 	%r288, %r286, %r287;
	.loc	20	146	0
	sub.u32 	%r289, %r280, %r284;
	sub.u32 	%r290, %r289, %r288;
	shl.b32 	%r291, %r288, 10;
	xor.b32 	%r292, %r290, %r291;
	.loc	20	147	0
	sub.u32 	%r293, %r284, %r288;
	sub.u32 	%r294, %r293, %r292;
	shr.u32 	%r295, %r292, 15;
	xor.b32 	%r296, %r294, %r295;
	.loc	34	54	0
	mul.lo.u32 	%r297, %r296, 1103515245;
	mul.lo.u32 	%r298, %r296, -1029531031;
	add.u32 	%r299, %r297, 12345;
	sub.u32 	%r300, %r298, 740551042;
	shr.u32 	%r301, %r299, 16;
	shr.u32 	%r302, %r300, 16;
	and.b32 	%r303, %r301, 255;
	and.b32 	%r304, %r302, 255;
	shl.b32 	%r305, %r303, 7;
	xor.b32 	%r306, %r305, %r304;
	cvt.rn.f32.s32 	%f367, %r306;
	mov.f32 	%f368, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f369, %f367, %f368;
	mul.ftz.f32 	%f370, %f18, %f369;
	sub.ftz.f32 	%f371, %f370, %f348;
	mov.f32 	%f366, %f371;
	mov.f32 	%f361, %f371;
	mov.f32 	%f356, %f371;
$Lt_47_64514:
	.loc	34	57	0
	add.ftz.f32 	%f7, %f356, %f7;
	.loc	34	58	0
	add.ftz.f32 	%f6, %f361, %f6;
	.loc	34	59	0
	add.ftz.f32 	%f5, %f5, %f366;
	ld.param.f32 	%f372, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+12];
	mov.f32 	%f373, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p58, %f372, %f373;
	@!%p58 bra 	$Lt_47_514;
	.loc	34	63	0
	mov.f32 	%f374, 0f00000000;   	// 0
	max.ftz.f32 	%f375, %f5, %f374;
	mov.f32 	%f376, 0f3f800000;   	// 1
	min.ftz.f32 	%f5, %f375, %f376;
	mov.f32 	%f377, 0f00000000;   	// 0
	max.ftz.f32 	%f378, %f6, %f377;
	mov.f32 	%f379, 0f3f800000;   	// 1
	min.ftz.f32 	%f6, %f378, %f379;
	mov.f32 	%f380, 0f00000000;   	// 0
	max.ftz.f32 	%f381, %f7, %f380;
	mov.f32 	%f382, 0f3f800000;   	// 1
	min.ftz.f32 	%f7, %f381, %f382;
	mov.f32 	%f383, 0f00000000;   	// 0
	max.ftz.f32 	%f384, %f8, %f383;
	mov.f32 	%f385, 0f3f800000;   	// 1
	min.ftz.f32 	%f9, %f384, %f385;
	bra.uni 	$Lt_47_514;
$Lt_47_4098:
	.loc	38	99	0
	cvt.rn.f32.s32 	%f386, %r5;
	cvt.rn.f32.s32 	%f387, %r14;
	ld.param.f32 	%f388, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+20];
	mul.ftz.f32 	%f389, %f387, %f388;
	setp.lt.ftz.f32 	%p59, %f386, %f389;
	@!%p59 bra 	$Lt_47_514;
	.loc	22	267	0
	ld.const.f32 	%f390, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f390, %f6;
	ld.const.f32 	%f391, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f392, %f391, %f6;
	ld.const.f32 	%f393, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f394, %f393, %f6;
	ld.const.f32 	%f395, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f395, %f7, %f31;
	ld.const.f32 	%f396, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f397, %f396, %f7, %f392;
	ld.const.f32 	%f398, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f399, %f398, %f7, %f394;
	ld.const.f32 	%f400, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f400, %f5, %f33;
	ld.const.f32 	%f401, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f402, %f401, %f5, %f397;
	ld.const.f32 	%f403, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f404, %f403, %f5, %f399;
	ld.param.f32 	%f405, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+4];
	ld.param.f32 	%f406, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+8];
	fma.rn.ftz.f32 	%f407, %f35, %f406, %f405;
	ld.param.f32 	%f408, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+16];
	mul.ftz.f32 	%f409, %f402, %f408;
	ld.param.f32 	%f410, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+12];
	mul.ftz.f32 	%f411, %f402, %f410;
	mul.ftz.f32 	%f412, %f404, %f410;
	sub.ftz.f32 	%f413, %f412, %f409;
	fma.rn.ftz.f32 	%f414, %f404, %f408, %f411;
	ld.const.f32 	%f415, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f416, %f415, %f413;
	ld.const.f32 	%f417, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f418, %f417, %f407, %f416;
	ld.const.f32 	%f419, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f420, %f419, %f414, %f418;
	.loc	22	268	0
	ld.const.f32 	%f421, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f422, %f421, %f413;
	ld.const.f32 	%f423, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f424, %f423, %f407, %f422;
	ld.const.f32 	%f425, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f426, %f425, %f414, %f424;
	.loc	35	56	0
	ld.const.f32 	%f427, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f428, %f427, %f413;
	ld.const.f32 	%f429, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f430, %f429, %f407, %f428;
	ld.const.f32 	%f431, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f5, %f431, %f414, %f430;
	mov.f32 	%f6, %f420;
	mov.f32 	%f7, %f426;
	bra.uni 	$Lt_47_514;
$Lt_47_4354:
	.loc	36	46	0
	ld.const.f32 	%f432, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f432, %f6;
	ld.const.f32 	%f433, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f433, %f7, %f31;
	ld.const.f32 	%f434, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f434, %f5, %f33;
	ld.param.f32 	%f45, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+8];
	ld.param.f32 	%f435, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+20];
	sub.ftz.f32 	%f436, %f435, %f45;
	fma.rn.ftz.f32 	%f437, %f35, %f436, %f45;
	.loc	36	47	0
	ld.param.f32 	%f438, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+12];
	ld.param.f32 	%f439, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+24];
	sub.ftz.f32 	%f440, %f439, %f438;
	fma.rn.ftz.f32 	%f441, %f35, %f440, %f438;
	.loc	36	49	0
	ld.param.f32 	%f442, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+28];
	ld.param.f32 	%f18, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+4];
	ld.param.f32 	%f443, [__cudaparm_PointwiseFilterHostKernel1___val_paraminFilter0+16];
	sub.ftz.f32 	%f444, %f443, %f18;
	fma.rn.ftz.f32 	%f445, %f35, %f444, %f18;
	sub.ftz.f32 	%f446, %f445, %f5;
	fma.rn.ftz.f32 	%f5, %f442, %f446, %f5;
	.loc	36	50	0
	sub.ftz.f32 	%f447, %f437, %f6;
	fma.rn.ftz.f32 	%f6, %f442, %f447, %f6;
	.loc	36	51	0
	sub.ftz.f32 	%f448, %f441, %f7;
	fma.rn.ftz.f32 	%f7, %f442, %f448, %f7;
	.loc	38	103	0
	bra.uni 	$Lt_47_514;
$Lt_47_4610:
	.loc	37	41	0
	sub.s32 	%r307, %r13, %r11;
	sub.s32 	%r12, %r307, 1;
$Lt_47_514:
	.loc	38	131	0
	ld.param.s32 	%r308, [__cudaparm_PointwiseFilterHostKernel1_inDestPitch];
	mul.lo.s32 	%r309, %r308, %r12;
	add.s32 	%r310, %r6, %r309;
	cvt.s64.s32 	%rd8, %r310;
	ld.param.u64 	%rd9, [__cudaparm_PointwiseFilterHostKernel1_inDestImage];
	@!%p2 bra 	$Lt_47_66306;
	.loc	20	126	0
	mul.lo.u64 	%rd10, %rd8, 8;
	add.u64 	%rd11, %rd9, %rd10;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f5;
	mov.b32		%r311, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r312, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f7;
	mov.b32		%r313, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f9;
	mov.b32		%r314, %b1; }
	st.global.v4.u16 	[%rd11+0], {%r311,%r312,%r313,%r314};
	.loc	38	133	0
	bra.uni 	$Lt_47_66050;
$Lt_47_66306:
	.loc	20	126	0
	mul.lo.u64 	%rd12, %rd8, 16;
	add.u64 	%rd13, %rd9, %rd12;
	st.global.v4.f32 	[%rd13+0], {%f5,%f6,%f7,%f9};
$Lt_47_66050:
$Lt_47_54274:
	.loc	38	135	0
	exit;
$LDWend_PointwiseFilterHostKernel1:
	} // PointwiseFilterHostKernel1

	.entry PointwiseFilterHostKernel2 (
		.param .u64 __cudaparm_PointwiseFilterHostKernel2_inSrcImage,
		.param .s32 __cudaparm_PointwiseFilterHostKernel2_inSrcPitch,
		.param .u64 __cudaparm_PointwiseFilterHostKernel2_inDestImage,
		.param .s32 __cudaparm_PointwiseFilterHostKernel2_inDestPitch,
		.param .u32 __cudaparm_PointwiseFilterHostKernel2_inDeviceFormat,
		.param .s32 __cudaparm_PointwiseFilterHostKernel2_inWidth,
		.param .s32 __cudaparm_PointwiseFilterHostKernel2_inHeight,
		.param .align 4 .b8 __cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0[48],
		.param .align 4 .b8 __cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1[48])
	{
	.reg .u32 %r<590>;
	.reg .u64 %rd<15>;
	.reg .f32 %f<877>;
	.reg .pred %p<118>;
	.loc	38	145	0
$LDWbegin_PointwiseFilterHostKernel2:
	.loc	38	147	0
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	mov.u32 	%r4, %tid.x;
	add.u32 	%r5, %r3, %r4;
	mov.s32 	%r6, %r5;
	.loc	38	148	0
	cvt.s32.u32 	%r7, %ctaid.y;
	cvt.s32.u32 	%r8, %ntid.y;
	mul.lo.s32 	%r9, %r7, %r8;
	mov.u32 	%r10, %tid.y;
	add.u32 	%r11, %r9, %r10;
	mov.s32 	%r12, %r11;
	ld.param.s32 	%r13, [__cudaparm_PointwiseFilterHostKernel2_inHeight];
	ld.param.s32 	%r14, [__cudaparm_PointwiseFilterHostKernel2_inWidth];
	set.gt.u32.s32 	%r15, %r13, %r11;
	neg.s32 	%r16, %r15;
	set.gt.u32.s32 	%r17, %r14, %r5;
	neg.s32 	%r18, %r17;
	and.b32 	%r19, %r16, %r18;
	mov.u32 	%r20, 0;
	setp.eq.s32 	%p1, %r19, %r20;
	@%p1 bra 	$Lt_48_128258;
	ld.param.s32 	%r21, [__cudaparm_PointwiseFilterHostKernel2_inDeviceFormat];
	mov.s32 	%r22, 0;
	setp.eq.s32 	%p2, %r21, %r22;
	ld.param.u64 	%rd1, [__cudaparm_PointwiseFilterHostKernel2_inSrcImage];
	ld.param.s32 	%r23, [__cudaparm_PointwiseFilterHostKernel2_inSrcPitch];
	@!%p2 bra 	$Lt_48_106498;
	.loc	20	115	0
	mul.lo.s32 	%r24, %r23, %r11;
	add.s32 	%r25, %r5, %r24;
	cvt.s64.s32 	%rd2, %r25;
	mul.wide.s32 	%rd3, %r25, 8;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.v4.u16 	{%r26,%r27,%r28,%r29}, [%rd4+0];
	.loc	38	152	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r28;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r29;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_48_106242;
$Lt_48_106498:
	mul.lo.s32 	%r30, %r23, %r11;
	add.s32 	%r31, %r5, %r30;
	cvt.s64.s32 	%rd5, %r31;
	mul.wide.s32 	%rd6, %r31, 16;
	add.u64 	%rd7, %rd1, %rd6;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd7+0];
$Lt_48_106242:
	mov.f32 	%f5, %f1;
	mov.f32 	%f6, %f2;
	mov.f32 	%f7, %f3;
	mov.f32 	%f8, %f4;
	mov.f32 	%f9, %f8;
	.loc	38	54	0
	ld.param.u32 	%r32, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+0];
	mov.u32 	%r33, 0;
	setp.eq.s32 	%p3, %r32, %r33;
	@%p3 bra 	$Lt_48_5378;
	mov.u32 	%r34, 1;
	setp.eq.s32 	%p4, %r32, %r34;
	@%p4 bra 	$Lt_48_5890;
	mov.u32 	%r35, 2;
	setp.eq.s32 	%p5, %r32, %r35;
	@%p5 bra 	$Lt_48_6146;
	mov.u32 	%r36, 3;
	setp.eq.s32 	%p6, %r32, %r36;
	@%p6 bra 	$Lt_48_6402;
	mov.u32 	%r37, 4;
	setp.eq.s32 	%p7, %r32, %r37;
	@%p7 bra 	$Lt_48_6658;
	mov.u32 	%r38, 5;
	setp.eq.s32 	%p8, %r32, %r38;
	@%p8 bra 	$Lt_48_6914;
	mov.u32 	%r39, 6;
	setp.eq.s32 	%p9, %r32, %r39;
	@%p9 bra 	$Lt_48_7170;
	mov.u32 	%r40, 7;
	setp.eq.s32 	%p10, %r32, %r40;
	@%p10 bra 	$Lt_48_7426;
	mov.u32 	%r41, 8;
	setp.eq.s32 	%p11, %r32, %r41;
	@%p11 bra 	$Lt_48_7682;
	mov.u32 	%r42, 9;
	setp.eq.s32 	%p12, %r32, %r42;
	@%p12 bra 	$Lt_48_7938;
	mov.u32 	%r43, 10;
	setp.eq.s32 	%p13, %r32, %r43;
	@%p13 bra 	$Lt_48_8194;
	mov.u32 	%r44, 11;
	setp.eq.s32 	%p14, %r32, %r44;
	@%p14 bra 	$Lt_48_8450;
	mov.u32 	%r45, 12;
	setp.eq.s32 	%p15, %r32, %r45;
	@%p15 bra 	$Lt_48_8706;
	mov.u32 	%r46, 13;
	setp.eq.s32 	%p16, %r32, %r46;
	@%p16 bra 	$Lt_48_8962;
	mov.u32 	%r47, 14;
	setp.eq.s32 	%p17, %r32, %r47;
	@%p17 bra 	$Lt_48_9218;
	mov.u32 	%r48, 15;
	setp.eq.s32 	%p18, %r32, %r48;
	@%p18 bra 	$Lt_48_9474;
	mov.u32 	%r49, 16;
	setp.eq.s32 	%p19, %r32, %r49;
	@%p19 bra 	$Lt_48_9730;
	bra.uni 	$Lt_48_5634;
$Lt_48_5378:
	.loc	38	57	0
	ld.param.f32 	%f10, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+8];
	mov.f32 	%f11, 0f3f800000;    	// 1
	setp.eq.ftz.f32 	%p20, %f10, %f11;
	ld.param.f32 	%f12, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+12];
	mov.f32 	%f13, 0f3f800000;    	// 1
	setp.eq.ftz.f32 	%p21, %f12, %f13;
	ld.param.f32 	%f14, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+4];
	ld.param.f32 	%f15, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+16];
	mov.f32 	%f16, 0f3f800000;    	// 1
	setp.eq.ftz.f32 	%p22, %f15, %f16;
	@!%p22 bra 	$Lt_48_107010;
	.loc	21	53	0
	cvt.ftz.sat.f32.f32 	%f17, %f8;
	mov.f32 	%f18, %f14;
	mul.ftz.f32 	%f19, %f17, %f18;
	selp.f32 	%f20, %f18, %f19, %p20;
	mov.f32 	%f21, 0f3f800000;    	// 1
	sub.ftz.f32 	%f22, %f21, %f20;
	selp.f32 	%f23, %f22, %f20, %p21;
	mov.f32 	%f9, %f23;
	.loc	21	57	0
	mov.f32 	%f5, %f23;
	mov.f32 	%f6, %f23;
	mov.f32 	%f7, %f23;
	bra.uni 	$Lt_48_5634;
$Lt_48_107010:
	@!%p20 bra 	$Lt_48_107522;
	.loc	21	61	0
	mov.f32 	%f18, %f14;
	mov.f32 	%f24, 0f3f800000;    	// 1
	sub.ftz.f32 	%f25, %f24, %f18;
	selp.f32 	%f9, %f25, %f18, %p21;
	bra.uni 	$Lt_48_5634;
$Lt_48_107522:
	.loc	21	73	0
	cvt.ftz.sat.f32.f32 	%f26, %f8;
	mov.f32 	%f27, 0f3f800000;    	// 1
	sub.ftz.f32 	%f28, %f27, %f26;
	selp.f32 	%f29, %f28, %f26, %p21;
	mul.ftz.f32 	%f9, %f29, %f14;
	bra.uni 	$Lt_48_5634;
$Lt_48_5890:
	.loc	22	267	0
	ld.const.f32 	%f30, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f30, %f6;
	ld.const.f32 	%f32, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f32, %f7, %f31;
	ld.const.f32 	%f34, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f34, %f5, %f33;
	ld.const.f32 	%f36, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f37, %f36, %f35;
	.loc	22	268	0
	ld.const.f32 	%f38, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f39, %f38, %f35;
	.loc	23	44	0
	ld.const.f32 	%f40, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f40, %f35;
	mov.f32 	%f6, %f37;
	mov.f32 	%f7, %f39;
	mov.f32 	%f9, %f8;
	.loc	38	61	0
	bra.uni 	$Lt_48_5634;
$Lt_48_6146:
	.loc	38	63	0
	ld.param.f32 	%f41, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+20];
	ld.param.f32 	%f42, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+12];
	ld.param.f32 	%f43, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+4];
	mov.f32 	%f44, 0f00000000;    	// 0
	setp.neu.ftz.f32 	%p23, %f43, %f44;
	@!%p23 bra 	$Lt_48_108034;
	.loc	24	44	0
	ld.param.f32 	%f45, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+8];
	sub.ftz.f32 	%f46, %f7, %f42;
	fma.rn.ftz.f32 	%f7, %f45, %f46, %f41;
	.loc	24	45	0
	sub.ftz.f32 	%f47, %f6, %f42;
	fma.rn.ftz.f32 	%f6, %f45, %f47, %f41;
	.loc	24	46	0
	sub.ftz.f32 	%f48, %f5, %f42;
	fma.rn.ftz.f32 	%f5, %f45, %f48, %f41;
	bra.uni 	$Lt_48_5634;
$Lt_48_108034:
	.loc	24	50	0
	ld.param.f32 	%f49, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+24];
	setp.gt.ftz.f32 	%p24, %f7, %f42;
	selp.f32 	%f7, %f49, %f41, %p24;
	.loc	24	51	0
	setp.gt.ftz.f32 	%p25, %f6, %f42;
	selp.f32 	%f6, %f49, %f41, %p25;
	.loc	24	52	0
	setp.gt.ftz.f32 	%p26, %f5, %f42;
	selp.f32 	%f5, %f49, %f41, %p26;
	bra.uni 	$Lt_48_5634;
$Lt_48_6402:
	.loc	25	47	0
	ld.param.f32 	%f50, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+4];
	mul.ftz.f32 	%f5, %f50, %f5;
	ld.param.f32 	%f51, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+8];
	mul.ftz.f32 	%f6, %f51, %f6;
	ld.param.f32 	%f52, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+12];
	mul.ftz.f32 	%f7, %f52, %f7;
	.loc	38	67	0
	bra.uni 	$Lt_48_5634;
$Lt_48_6658:
	.loc	26	48	0
	ld.param.f32 	%f53, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+4];
	mov.f32 	%f54, 0f00000000;    	// 0
	setp.eq.ftz.f32 	%p27, %f53, %f54;
	ld.param.f32 	%f55, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+8];
	mov.f32 	%f56, 0f00000000;    	// 0
	max.ftz.f32 	%f57, %f5, %f56;
	mov.f32 	%f58, 0f3f800000;    	// 1
	min.ftz.f32 	%f59, %f57, %f58;
	ld.param.f32 	%f60, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+12];
	sub.ftz.f32 	%f61, %f59, %f60;
	abs.ftz.f32 	%f62, %f61;
	mov.f32 	%f63, 0f00000000;    	// 0
	max.ftz.f32 	%f64, %f6, %f63;
	mov.f32 	%f65, 0f3f800000;    	// 1
	min.ftz.f32 	%f66, %f64, %f65;
	ld.param.f32 	%f67, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+16];
	sub.ftz.f32 	%f68, %f66, %f67;
	abs.ftz.f32 	%f69, %f68;
	mov.f32 	%f70, 0f00000000;    	// 0
	max.ftz.f32 	%f71, %f8, %f70;
	mov.f32 	%f72, 0f3f800000;    	// 1
	min.ftz.f32 	%f73, %f71, %f72;
	sub.ftz.f32 	%f74, %f73, %f8;
	abs.ftz.f32 	%f75, %f74;
	mov.f32 	%f76, 0f00000000;    	// 0
	max.ftz.f32 	%f77, %f7, %f76;
	mov.f32 	%f78, 0f3f800000;    	// 1
	min.ftz.f32 	%f79, %f77, %f78;
	ld.param.f32 	%f80, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+20];
	sub.ftz.f32 	%f81, %f79, %f80;
	abs.ftz.f32 	%f82, %f81;
	max.ftz.f32 	%f83, %f75, %f82;
	max.ftz.f32 	%f84, %f69, %f83;
	max.ftz.f32 	%f85, %f62, %f84;
	setp.ge.ftz.f32 	%p28, %f55, %f85;
	xor.pred 	%p29, %p27, %p28;
	@!%p29 bra 	$Lt_48_5634;
	.loc	22	267	0
	ld.const.f32 	%f86, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f86, %f6;
	ld.const.f32 	%f87, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f87, %f7, %f31;
	ld.const.f32 	%f88, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f88, %f5, %f33;
	ld.const.f32 	%f89, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f90, %f89, %f35;
	.loc	22	268	0
	ld.const.f32 	%f91, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f92, %f91, %f35;
	.loc	23	44	0
	ld.const.f32 	%f93, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f93, %f35;
	mov.f32 	%f6, %f90;
	mov.f32 	%f7, %f92;
	bra.uni 	$Lt_48_5634;
$Lt_48_6914:
	.loc	27	48	0
	ld.param.f32 	%f94, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+8];
	mov.f32 	%f95, 0f00000000;    	// 0
	max.ftz.f32 	%f96, %f5, %f95;
	mov.f32 	%f97, 0f3f800000;    	// 1
	min.ftz.f32 	%f98, %f96, %f97;
	ld.param.f32 	%f99, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+12];
	sub.ftz.f32 	%f100, %f98, %f99;
	abs.ftz.f32 	%f101, %f100;
	mov.f32 	%f102, 0f00000000;   	// 0
	max.ftz.f32 	%f103, %f6, %f102;
	mov.f32 	%f104, 0f3f800000;   	// 1
	min.ftz.f32 	%f105, %f103, %f104;
	ld.param.f32 	%f106, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+16];
	sub.ftz.f32 	%f107, %f105, %f106;
	abs.ftz.f32 	%f108, %f107;
	mov.f32 	%f109, 0f00000000;   	// 0
	max.ftz.f32 	%f110, %f8, %f109;
	mov.f32 	%f111, 0f3f800000;   	// 1
	min.ftz.f32 	%f112, %f110, %f111;
	sub.ftz.f32 	%f113, %f112, %f8;
	abs.ftz.f32 	%f114, %f113;
	mov.f32 	%f115, 0f00000000;   	// 0
	max.ftz.f32 	%f116, %f7, %f115;
	mov.f32 	%f117, 0f3f800000;   	// 1
	min.ftz.f32 	%f118, %f116, %f117;
	ld.param.f32 	%f119, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+20];
	sub.ftz.f32 	%f120, %f118, %f119;
	abs.ftz.f32 	%f121, %f120;
	max.ftz.f32 	%f122, %f114, %f121;
	max.ftz.f32 	%f123, %f108, %f122;
	max.ftz.f32 	%f124, %f101, %f123;
	setp.ge.ftz.f32 	%p30, %f94, %f124;
	@!%p30 bra 	$Lt_48_5634;
	.loc	27	51	0
	ld.param.f32 	%f125, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+24];
	mov.f32 	%f126, %f125;
	ld.param.f32 	%f127, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+28];
	mov.f32 	%f128, %f127;
	ld.param.f32 	%f129, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+32];
	mov.f32 	%f130, %f129;
	ld.param.f32 	%f131, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+4];
	mov.f32 	%f132, 0f00000000;   	// 0
	setp.eq.ftz.f32 	%p31, %f131, %f132;
	@!%p31 bra 	$Lt_48_109314;
	.loc	27	60	0
	ld.const.f32 	%f133, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f133, %f6;
	ld.const.f32 	%f134, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f134, %f7, %f31;
	ld.const.f32 	%f135, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f135, %f5, %f33;
	mul.ftz.f32 	%f126, %f35, %f125;
	.loc	27	61	0
	mul.ftz.f32 	%f128, %f35, %f127;
	.loc	27	62	0
	mul.ftz.f32 	%f130, %f35, %f129;
$Lt_48_109314:
	.loc	27	65	0
	mov.f32 	%f5, %f126;
	mov.f32 	%f6, %f128;
	mov.f32 	%f7, %f130;
	bra.uni 	$Lt_48_5634;
$Lt_48_7170:
	.loc	28	47	0
	sub.s32 	%r50, %r13, %r11;
	sub.s32 	%r51, %r14, %r5;
	cvt.rn.f32.s32 	%f136, %r5;
	cvt.rn.f32.s32 	%f137, %r11;
	cvt.rn.f32.s32 	%f138, %r50;
	cvt.rn.f32.s32 	%f139, %r51;
	ld.param.f32 	%f18, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+4];
	sub.ftz.f32 	%f140, %f18, %f136;
	sub.ftz.f32 	%f141, %f18, %f137;
	sub.ftz.f32 	%f142, %f18, %f138;
	sub.ftz.f32 	%f143, %f18, %f139;
	cvt.rzi.ftz.s32.f32 	%r52, %f140;
	cvt.rzi.ftz.s32.f32 	%r53, %f141;
	cvt.rzi.ftz.s32.f32 	%r54, %f142;
	cvt.rzi.ftz.s32.f32 	%r55, %f143;
	max.s32 	%r56, %r52, %r53;
	max.s32 	%r57, %r55, %r56;
	max.s32 	%r58, %r54, %r57;
	mov.u32 	%r59, 0;
	setp.le.s32 	%p32, %r58, %r59;
	@%p32 bra 	$Lt_48_5634;
	.loc	28	51	0
	cvt.rn.f32.s32 	%f144, %r58;
	sub.ftz.f32 	%f145, %f18, %f144;
	div.approx.ftz.f32 	%f146, %f145, %f18;
	mul.ftz.f32 	%f9, %f8, %f146;
	bra.uni 	$Lt_48_5634;
$Lt_48_7426:
	.loc	30	50	0
	ld.const.f32 	%f147, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f147, %f6;
	ld.param.f32 	%f148, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+12];
	ld.param.f32 	%f45, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+8];
	sub.ftz.f32 	%f149, %f148, %f45;
	ld.const.f32 	%f150, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f150, %f7, %f31;
	ld.param.f32 	%f151, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+16];
	mul.ftz.f32 	%f152, %f151, %f149;
	ld.const.f32 	%f153, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f153, %f5, %f33;
	add.ftz.f32 	%f154, %f152, %f148;
	sub.ftz.f32 	%f155, %f45, %f152;
	mov.f32 	%f156, 0f00000000;   	// 0
	max.ftz.f32 	%f157, %f154, %f156;
	mov.f32 	%f158, 0f00000000;   	// 0
	max.ftz.f32 	%f159, %f155, %f158;
	mov.f32 	%f160, 0f3f800000;   	// 1
	min.ftz.f32 	%f161, %f157, %f160;
	mov.f32 	%f162, 0f3f800000;   	// 1
	min.ftz.f32 	%f163, %f159, %f162;
	set.gt.ftz.u32.f32 	%r60, %f163, %f35;
	neg.s32 	%r61, %r60;
	set.le.ftz.u32.f32 	%r62, %f161, %f35;
	neg.s32 	%r63, %r62;
	or.b32 	%r64, %r61, %r63;
	mov.u32 	%r65, 0;
	setp.eq.s32 	%p33, %r64, %r65;
	@%p33 bra 	$Lt_48_110594;
	mov.f32 	%f164, 0f00000000;   	// 0
	bra.uni 	$Lt_48_111362;
$Lt_48_110594:
	add.ftz.f32 	%f165, %f152, %f45;
	mov.f32 	%f166, 0f00000000;   	// 0
	max.ftz.f32 	%f167, %f165, %f166;
	mov.f32 	%f168, 0f3f800000;   	// 1
	min.ftz.f32 	%f169, %f167, %f168;
	set.le.ftz.u32.f32 	%r66, %f169, %f35;
	neg.s32 	%r67, %r66;
	sub.ftz.f32 	%f170, %f148, %f152;
	mov.f32 	%f171, 0f00000000;   	// 0
	max.ftz.f32 	%f172, %f170, %f171;
	mov.f32 	%f173, 0f3f800000;   	// 1
	min.ftz.f32 	%f174, %f172, %f173;
	set.lt.ftz.u32.f32 	%r68, %f35, %f174;
	neg.s32 	%r69, %r68;
	and.b32 	%r70, %r67, %r69;
	mov.u32 	%r71, 0;
	setp.eq.s32 	%p34, %r70, %r71;
	@%p34 bra 	$Lt_48_111106;
	mov.f32 	%f164, 0f3f800000;   	// 1
	bra.uni 	$Lt_48_111362;
$Lt_48_111106:
	add.ftz.f32 	%f175, %f152, %f152;
	setp.gt.ftz.f32 	%p35, %f169, %f35;
	@!%p35 bra 	$Lt_48_111618;
	.loc	30	62	0
	sub.ftz.f32 	%f176, %f35, %f163;
	div.approx.ftz.f32 	%f164, %f176, %f175;
	bra.uni 	$Lt_48_111362;
$Lt_48_111618:
	.loc	30	66	0
	sub.ftz.f32 	%f177, %f161, %f35;
	div.approx.ftz.f32 	%f164, %f177, %f175;
$Lt_48_111362:
$Lt_48_110850:
$Lt_48_110338:
	.loc	30	69	0
	mov.f32 	%f178, 0f3f800000;   	// 1
	sub.ftz.f32 	%f179, %f178, %f164;
	ld.param.f32 	%f180, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+4];
	mov.f32 	%f181, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p36, %f180, %f181;
	selp.f32 	%f164, %f179, %f164, %p36;
	.loc	30	77	0
	ld.const.f32 	%f182, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f182, %f164;
	ld.const.f32 	%f183, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f6, %f183, %f164;
	ld.const.f32 	%f184, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f7, %f184, %f164;
	.loc	38	79	0
	bra.uni 	$Lt_48_5634;
$Lt_48_7682:
	.loc	38	80	0
	ld.param.f32 	%f185, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+44];
	mov.f32 	%f186, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p37, %f185, %f186;
	@!%p37 bra 	$L_48_101634;
	cvt.rn.f32.s32 	%f187, %r5;
	cvt.rn.f32.s32 	%f188, %r14;
	mul.ftz.f32 	%f189, %f188, %f185;
	setp.lt.ftz.f32 	%p38, %f187, %f189;
	@%p38 bra 	$L_48_101378;
$L_48_101634:
	mov.f32 	%f190, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p39, %f185, %f190;
	@!%p39 bra 	$Lt_48_5634;
	cvt.rn.f32.s32 	%f191, %r11;
	cvt.rn.f32.s32 	%f192, %r13;
	mul.ftz.f32 	%f193, %f192, %f185;
	neg.ftz.f32 	%f194, %f193;
	setp.lt.ftz.f32 	%p40, %f191, %f194;
	@!%p40 bra 	$Lt_48_5634;
$L_48_101378:
	.loc	31	47	0
	ld.const.f32 	%f195, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f196, %f195, %f6;
	ld.const.f32 	%f197, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f198, %f197, %f7, %f196;
	ld.const.f32 	%f199, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f199, %f5, %f198;
	mov.f32 	%f200, %f35;
	ld.param.f32 	%f45, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+8];
	ld.param.f32 	%f201, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+4];
	sub.ftz.f32 	%f202, %f201, %f45;
	ld.param.f32 	%f203, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+16];
	ld.param.f32 	%f204, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+12];
	sub.ftz.f32 	%f205, %f204, %f203;
	mov.f32 	%f206, 0f3f800000;   	// 1
	set.neu.ftz.u32.f32 	%r72, %f205, %f206;
	neg.s32 	%r73, %r72;
	mov.f32 	%f207, 0f3f800000;   	// 1
	set.neu.ftz.u32.f32 	%r74, %f202, %f207;
	neg.s32 	%r75, %r74;
	or.b32 	%r76, %r73, %r75;
	mov.u32 	%r77, 0;
	setp.eq.s32 	%p41, %r76, %r77;
	@%p41 bra 	$Lt_48_111874;
	.loc	20	143	0
	mov.s32 	%r78, 1;
	sub.s32 	%r79, %r78, %r5;
	shr.u32 	%r80, %r11, 13;
	sub.u32 	%r81, %r5, %r11;
	sub.u32 	%r82, %r79, %r11;
	xor.b32 	%r83, %r82, %r80;
	shl.b32 	%r84, %r83, 8;
	sub.u32 	%r85, %r81, %r83;
	sub.u32 	%r86, %r11, %r83;
	xor.b32 	%r87, %r85, %r84;
	shr.u32 	%r88, %r87, 13;
	sub.u32 	%r89, %r86, %r87;
	sub.u32 	%r90, %r83, %r87;
	xor.b32 	%r91, %r89, %r88;
	shr.u32 	%r92, %r91, 12;
	sub.u32 	%r93, %r90, %r91;
	xor.b32 	%r94, %r93, %r92;
	sub.u32 	%r95, %r87, %r91;
	sub.u32 	%r96, %r95, %r94;
	shl.b32 	%r97, %r94, 16;
	xor.b32 	%r98, %r96, %r97;
	.loc	20	144	0
	sub.u32 	%r99, %r91, %r94;
	sub.u32 	%r100, %r99, %r98;
	shr.u32 	%r101, %r98, 5;
	xor.b32 	%r102, %r100, %r101;
	.loc	20	145	0
	sub.u32 	%r103, %r94, %r98;
	sub.u32 	%r104, %r103, %r102;
	shr.u32 	%r105, %r102, 3;
	xor.b32 	%r106, %r104, %r105;
	.loc	20	146	0
	sub.u32 	%r107, %r98, %r102;
	sub.u32 	%r108, %r107, %r106;
	shl.b32 	%r109, %r106, 10;
	xor.b32 	%r110, %r108, %r109;
	.loc	20	147	0
	sub.u32 	%r111, %r102, %r106;
	sub.u32 	%r112, %r111, %r110;
	shr.u32 	%r113, %r110, 15;
	xor.b32 	%r114, %r112, %r113;
	.loc	31	57	0
	mov.f32 	%f208, 0f3b270d73;   	// 0.00254902
	mul.lo.u32 	%r115, %r114, 1103515245;
	add.u32 	%r116, %r115, 12345;
	shr.u32 	%r117, %r116, 16;
	and.b32 	%r118, %r117, 255;
	shl.b32 	%r119, %r118, 7;
	mul.lo.u32 	%r120, %r114, -1029531031;
	sub.u32 	%r121, %r120, 740551042;
	shr.u32 	%r122, %r121, 16;
	and.b32 	%r123, %r122, 255;
	xor.b32 	%r124, %r119, %r123;
	cvt.rn.f32.s32 	%f209, %r124;
	mov.f32 	%f210, 0f467ffe00;   	// 16383.5
	div.approx.ftz.f32 	%f211, %f209, %f210;
	mov.f32 	%f212, 0fbf800000;   	// -1
	add.ftz.f32 	%f213, %f211, %f212;
	fma.rn.ftz.f32 	%f200, %f208, %f213, %f35;
$Lt_48_111874:
	sub.ftz.f32 	%f214, %f200, %f45;
	ld.param.f32 	%f215, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+20];
	mov.f32 	%f216, 0f3f800000;   	// 1
	setp.neu.ftz.f32 	%p42, %f215, %f216;
	@!%p42 bra 	$Lt_48_112642;
	mov.f32 	%f217, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p43, %f214, %f217;
	@!%p43 bra 	$Lt_48_113154;
	.loc	31	66	0
	mov.f32 	%f200, %f203;
	bra.uni 	$Lt_48_112386;
$Lt_48_113154:
	.loc	31	70	0
	rcp.approx.ftz.f32 	%f218, %f202;
	mul.ftz.f32 	%f219, %f214, %f218;
	lg2.approx.ftz.f32 	%f220, %f219;
	mul.ftz.f32 	%f221, %f215, %f220;
	ex2.approx.ftz.f32 	%f222, %f221;
	fma.rn.ftz.f32 	%f200, %f205, %f222, %f203;
	bra.uni 	$Lt_48_112386;
$Lt_48_112642:
	.loc	31	77	0
	rcp.approx.ftz.f32 	%f223, %f202;
	mul.ftz.f32 	%f224, %f214, %f223;
	fma.rn.ftz.f32 	%f200, %f205, %f224, %f203;
$Lt_48_112386:
	.loc	22	267	0
	ld.const.f32 	%f225, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f226, %f225, %f6;
	ld.const.f32 	%f227, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f228, %f227, %f6;
	ld.param.f32 	%f229, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+28];
	sin.approx.ftz.f32 	%f230, %f229;
	cos.approx.ftz.f32 	%f231, %f229;
	ld.const.f32 	%f232, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f233, %f232, %f7, %f226;
	ld.const.f32 	%f234, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f235, %f234, %f7, %f228;
	ld.const.f32 	%f236, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f237, %f236, %f5, %f233;
	ld.const.f32 	%f238, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f239, %f238, %f5, %f235;
	mul.ftz.f32 	%f240, %f230, %f237;
	mul.ftz.f32 	%f241, %f231, %f237;
	mul.ftz.f32 	%f242, %f239, %f231;
	sub.ftz.f32 	%f243, %f242, %f240;
	fma.rn.ftz.f32 	%f244, %f239, %f230, %f241;
	ld.param.f32 	%f245, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+32];
	sub.ftz.f32 	%f246, %f245, %f243;
	ld.param.f32 	%f247, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+36];
	sub.ftz.f32 	%f248, %f247, %f244;
	ld.param.f32 	%f249, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+40];
	fma.rn.ftz.f32 	%f250, %f249, %f246, %f243;
	fma.rn.ftz.f32 	%f251, %f249, %f248, %f244;
	ld.param.f32 	%f252, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+24];
	mul.ftz.f32 	%f253, %f250, %f252;
	mul.ftz.f32 	%f254, %f251, %f252;
	ld.const.f32 	%f255, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f256, %f255, %f253;
	ld.const.f32 	%f257, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f258, %f257, %f200, %f256;
	ld.const.f32 	%f259, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f260, %f259, %f254, %f258;
	.loc	22	268	0
	ld.const.f32 	%f261, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f262, %f261, %f253;
	ld.const.f32 	%f263, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f264, %f263, %f200, %f262;
	ld.const.f32 	%f265, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f266, %f265, %f254, %f264;
	.loc	31	92	0
	ld.const.f32 	%f267, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f268, %f267, %f253;
	ld.const.f32 	%f269, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f270, %f269, %f200, %f268;
	ld.const.f32 	%f271, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f5, %f271, %f254, %f270;
	mov.f32 	%f6, %f260;
	mov.f32 	%f7, %f266;
	bra.uni 	$Lt_48_5634;
$Lt_48_7938:
	.loc	38	83	0
	ld.param.f32 	%f272, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+44];
	mov.f32 	%f273, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p44, %f272, %f273;
	@!%p44 bra 	$L_48_102402;
	cvt.rn.f32.s32 	%f274, %r5;
	cvt.rn.f32.s32 	%f275, %r14;
	mul.ftz.f32 	%f276, %f275, %f272;
	setp.lt.ftz.f32 	%p45, %f274, %f276;
	@%p45 bra 	$L_48_102146;
$L_48_102402:
	mov.f32 	%f277, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p46, %f272, %f277;
	@!%p46 bra 	$Lt_48_5634;
	cvt.rn.f32.s32 	%f278, %r11;
	cvt.rn.f32.s32 	%f279, %r13;
	mul.ftz.f32 	%f280, %f279, %f272;
	neg.ftz.f32 	%f281, %f280;
	setp.lt.ftz.f32 	%p47, %f278, %f281;
	@!%p47 bra 	$Lt_48_5634;
$L_48_102146:
	.loc	31	110	0
	ld.const.f32 	%f282, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f282, %f6;
	ld.param.f32 	%f45, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+8];
	ld.param.f32 	%f283, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+4];
	sub.ftz.f32 	%f284, %f283, %f45;
	ld.param.f32 	%f285, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+16];
	ld.param.f32 	%f286, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+12];
	sub.ftz.f32 	%f287, %f286, %f285;
	ld.const.f32 	%f288, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f288, %f7, %f31;
	ld.const.f32 	%f289, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f289, %f5, %f33;
	sub.ftz.f32 	%f290, %f35, %f45;
	ld.param.f32 	%f291, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+20];
	mov.f32 	%f292, 0f3f800000;   	// 1
	setp.neu.ftz.f32 	%p48, %f291, %f292;
	@!%p48 bra 	$Lt_48_113666;
	.loc	42	523	0
	mov.f32 	%f293, 0f00000000;   	// 0
	max.ftz.f32 	%f294, %f290, %f293;
	div.approx.ftz.f32 	%f295, %f294, %f284;
	lg2.approx.ftz.f32 	%f296, %f295;
	mul.ftz.f32 	%f297, %f291, %f296;
	ex2.approx.ftz.f32 	%f298, %f297;
	.loc	31	120	0
	fma.rn.ftz.f32 	%f299, %f287, %f298, %f285;
	bra.uni 	$Lt_48_113410;
$Lt_48_113666:
	.loc	31	129	0
	div.approx.ftz.f32 	%f300, %f290, %f284;
	fma.rn.ftz.f32 	%f299, %f287, %f300, %f285;
$Lt_48_113410:
	.loc	31	135	0
	ld.const.f32 	%f301, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f301, %f299;
	ld.const.f32 	%f302, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f6, %f302, %f299;
	ld.const.f32 	%f303, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f7, %f303, %f299;
	bra.uni 	$Lt_48_5634;
$Lt_48_8194:
	.loc	38	86	0
	ld.param.f32 	%f304, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+44];
	mov.f32 	%f305, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p49, %f304, %f305;
	@!%p49 bra 	$L_48_103170;
	cvt.rn.f32.s32 	%f306, %r5;
	cvt.rn.f32.s32 	%f307, %r14;
	mul.ftz.f32 	%f308, %f307, %f304;
	setp.lt.ftz.f32 	%p50, %f306, %f308;
	@%p50 bra 	$L_48_102914;
$L_48_103170:
	mov.f32 	%f309, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p51, %f304, %f309;
	@!%p51 bra 	$Lt_48_5634;
	cvt.rn.f32.s32 	%f310, %r11;
	cvt.rn.f32.s32 	%f311, %r13;
	mul.ftz.f32 	%f312, %f311, %f304;
	neg.ftz.f32 	%f313, %f312;
	setp.lt.ftz.f32 	%p52, %f310, %f313;
	@!%p52 bra 	$Lt_48_5634;
$L_48_102914:
	.loc	31	160	0
	ld.const.f32 	%f5, [k601YPbPr_To_RGB32f+24];
	ld.const.f32 	%f6, [k601YPbPr_To_RGB32f+12];
	ld.const.f32 	%f7, [k601YPbPr_To_RGB32f+0];
	bra.uni 	$Lt_48_5634;
$Lt_48_8450:
	.loc	32	42	0
	ld.param.f32 	%f314, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+4];
	mov.f32 	%f315, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p53, %f5, %f315;
	@!%p53 bra 	$Lt_48_114178;
	.loc	22	292	0
	mov.f32 	%f18, %f314;
	lg2.approx.ftz.f32 	%f316, %f5;
	mul.ftz.f32 	%f317, %f18, %f316;
	ex2.approx.ftz.f32 	%f318, %f317;
	bra.uni 	$Lt_48_113922;
$Lt_48_114178:
	mov.f32 	%f18, %f314;
	neg.ftz.f32 	%f319, %f5;
	lg2.approx.ftz.f32 	%f320, %f319;
	mul.ftz.f32 	%f321, %f18, %f320;
	ex2.approx.ftz.f32 	%f322, %f321;
	neg.ftz.f32 	%f318, %f322;
$Lt_48_113922:
	mov.f32 	%f323, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p54, %f6, %f323;
	@!%p54 bra 	$Lt_48_114690;
	.loc	22	293	0
	lg2.approx.ftz.f32 	%f324, %f6;
	mul.ftz.f32 	%f325, %f18, %f324;
	ex2.approx.ftz.f32 	%f326, %f325;
	bra.uni 	$Lt_48_114434;
$Lt_48_114690:
	neg.ftz.f32 	%f327, %f6;
	lg2.approx.ftz.f32 	%f328, %f327;
	mul.ftz.f32 	%f329, %f18, %f328;
	ex2.approx.ftz.f32 	%f330, %f329;
	neg.ftz.f32 	%f326, %f330;
$Lt_48_114434:
	mov.f32 	%f331, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p55, %f7, %f331;
	@!%p55 bra 	$Lt_48_115202;
	.loc	22	294	0
	lg2.approx.ftz.f32 	%f332, %f7;
	mul.ftz.f32 	%f333, %f18, %f332;
	ex2.approx.ftz.f32 	%f334, %f333;
	bra.uni 	$Lt_48_114946;
$Lt_48_115202:
	neg.ftz.f32 	%f335, %f7;
	lg2.approx.ftz.f32 	%f336, %f335;
	mul.ftz.f32 	%f337, %f18, %f336;
	ex2.approx.ftz.f32 	%f338, %f337;
	neg.ftz.f32 	%f334, %f338;
$Lt_48_114946:
	mov.f32 	%f339, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p56, %f8, %f339;
	@!%p56 bra 	$Lt_48_115714;
	.loc	22	295	0
	lg2.approx.ftz.f32 	%f340, %f8;
	mul.ftz.f32 	%f341, %f18, %f340;
	ex2.approx.ftz.f32 	%f342, %f341;
	bra.uni 	$Lt_48_115458;
$Lt_48_115714:
	neg.ftz.f32 	%f343, %f8;
	lg2.approx.ftz.f32 	%f344, %f343;
	mul.ftz.f32 	%f345, %f18, %f344;
	ex2.approx.ftz.f32 	%f346, %f345;
	neg.ftz.f32 	%f342, %f346;
$Lt_48_115458:
	.loc	32	42	0
	mov.f32 	%f5, %f318;
	mov.f32 	%f6, %f326;
	mov.f32 	%f7, %f334;
	mov.f32 	%f9, %f342;
	.loc	38	91	0
	bra.uni 	$Lt_48_5634;
$Lt_48_8706:
	.loc	33	41	0
	sub.s32 	%r125, %r14, %r5;
	sub.s32 	%r6, %r125, 1;
	.loc	38	94	0
	bra.uni 	$Lt_48_5634;
$Lt_48_8962:
	.loc	38	96	0
	ld.param.f32 	%f18, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+4];
	mov.f32 	%f347, 0f3f000000;   	// 0.5
	mul.ftz.f32 	%f348, %f18, %f347;
	ld.param.f32 	%f349, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+16];
	ld.param.f32 	%f350, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+8];
	mov.f32 	%f351, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p57, %f350, %f351;
	@!%p57 bra 	$Lt_48_116226;
	.loc	20	143	0
	mul.lo.s32 	%r126, %r5, 3;
	cvt.rzi.ftz.u32.f32 	%r127, %f349;
	sub.u32 	%r128, %r126, %r11;
	shr.u32 	%r129, %r127, 13;
	sub.u32 	%r130, %r11, %r127;
	sub.u32 	%r131, %r128, %r127;
	xor.b32 	%r132, %r131, %r129;
	shl.b32 	%r133, %r132, 8;
	sub.u32 	%r134, %r130, %r132;
	sub.u32 	%r135, %r127, %r132;
	xor.b32 	%r136, %r133, %r134;
	shr.u32 	%r137, %r136, 13;
	sub.u32 	%r138, %r135, %r136;
	sub.u32 	%r139, %r132, %r136;
	xor.b32 	%r140, %r137, %r138;
	shr.u32 	%r141, %r140, 12;
	sub.u32 	%r142, %r139, %r140;
	xor.b32 	%r143, %r141, %r142;
	shl.b32 	%r144, %r143, 16;
	sub.u32 	%r145, %r136, %r140;
	sub.u32 	%r146, %r145, %r143;
	xor.b32 	%r147, %r144, %r146;
	.loc	20	144	0
	sub.u32 	%r148, %r140, %r143;
	sub.u32 	%r149, %r148, %r147;
	shr.u32 	%r150, %r147, 5;
	xor.b32 	%r151, %r149, %r150;
	.loc	20	145	0
	sub.u32 	%r152, %r143, %r147;
	sub.u32 	%r153, %r152, %r151;
	shr.u32 	%r154, %r151, 3;
	xor.b32 	%r155, %r153, %r154;
	.loc	20	146	0
	sub.u32 	%r156, %r147, %r151;
	sub.u32 	%r157, %r156, %r155;
	shl.b32 	%r158, %r155, 10;
	xor.b32 	%r159, %r157, %r158;
	.loc	20	147	0
	sub.u32 	%r160, %r151, %r155;
	sub.u32 	%r161, %r160, %r159;
	shr.u32 	%r162, %r159, 15;
	xor.b32 	%r163, %r161, %r162;
	.loc	34	48	0
	mul.lo.u32 	%r164, %r163, 1103515245;
	add.u32 	%r165, %r164, 12345;
	shr.u32 	%r166, %r165, 16;
	and.b32 	%r167, %r166, 255;
	shl.b32 	%r168, %r167, 7;
	mul.lo.u32 	%r169, %r163, -1029531031;
	sub.u32 	%r170, %r169, 740551042;
	shr.u32 	%r171, %r170, 16;
	and.b32 	%r172, %r171, 255;
	xor.b32 	%r173, %r168, %r172;
	cvt.rn.f32.s32 	%f352, %r173;
	mov.f32 	%f353, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f354, %f352, %f353;
	mul.ftz.f32 	%f355, %f18, %f354;
	sub.ftz.f32 	%f356, %f355, %f348;
	.loc	20	143	0
	add.u32 	%r174, %r131, 1;
	xor.b32 	%r175, %r174, %r129;
	shl.b32 	%r176, %r175, 8;
	sub.u32 	%r177, %r130, %r175;
	sub.u32 	%r178, %r127, %r175;
	xor.b32 	%r179, %r176, %r177;
	shr.u32 	%r180, %r179, 13;
	sub.u32 	%r181, %r178, %r179;
	sub.u32 	%r182, %r175, %r179;
	xor.b32 	%r183, %r180, %r181;
	shr.u32 	%r184, %r183, 12;
	sub.u32 	%r185, %r182, %r183;
	xor.b32 	%r186, %r184, %r185;
	sub.u32 	%r187, %r179, %r183;
	sub.u32 	%r188, %r187, %r186;
	shl.b32 	%r189, %r186, 16;
	xor.b32 	%r190, %r188, %r189;
	.loc	20	144	0
	sub.u32 	%r191, %r183, %r186;
	sub.u32 	%r192, %r191, %r190;
	shr.u32 	%r193, %r190, 5;
	xor.b32 	%r194, %r192, %r193;
	.loc	20	145	0
	sub.u32 	%r195, %r186, %r190;
	sub.u32 	%r196, %r195, %r194;
	shr.u32 	%r197, %r194, 3;
	xor.b32 	%r198, %r196, %r197;
	.loc	20	146	0
	sub.u32 	%r199, %r190, %r194;
	sub.u32 	%r200, %r199, %r198;
	shl.b32 	%r201, %r198, 10;
	xor.b32 	%r202, %r200, %r201;
	.loc	20	147	0
	sub.u32 	%r203, %r194, %r198;
	sub.u32 	%r204, %r203, %r202;
	shr.u32 	%r205, %r202, 15;
	xor.b32 	%r206, %r204, %r205;
	.loc	34	49	0
	mul.lo.u32 	%r207, %r206, 1103515245;
	add.u32 	%r208, %r207, 12345;
	shr.u32 	%r209, %r208, 16;
	and.b32 	%r210, %r209, 255;
	shl.b32 	%r211, %r210, 7;
	mul.lo.u32 	%r212, %r206, -1029531031;
	sub.u32 	%r213, %r212, 740551042;
	shr.u32 	%r214, %r213, 16;
	and.b32 	%r215, %r214, 255;
	xor.b32 	%r216, %r211, %r215;
	cvt.rn.f32.s32 	%f357, %r216;
	mov.f32 	%f358, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f359, %f357, %f358;
	mul.ftz.f32 	%f360, %f18, %f359;
	sub.ftz.f32 	%f361, %f360, %f348;
	.loc	20	143	0
	add.u32 	%r217, %r131, 2;
	xor.b32 	%r218, %r217, %r129;
	shl.b32 	%r219, %r218, 8;
	sub.u32 	%r220, %r130, %r218;
	sub.u32 	%r221, %r127, %r218;
	xor.b32 	%r222, %r219, %r220;
	shr.u32 	%r223, %r222, 13;
	sub.u32 	%r224, %r221, %r222;
	sub.u32 	%r225, %r218, %r222;
	xor.b32 	%r226, %r223, %r224;
	shr.u32 	%r227, %r226, 12;
	sub.u32 	%r228, %r225, %r226;
	xor.b32 	%r229, %r227, %r228;
	sub.u32 	%r230, %r222, %r226;
	sub.u32 	%r231, %r230, %r229;
	shl.b32 	%r232, %r229, 16;
	xor.b32 	%r233, %r231, %r232;
	.loc	20	144	0
	sub.u32 	%r234, %r226, %r229;
	sub.u32 	%r235, %r234, %r233;
	shr.u32 	%r236, %r233, 5;
	xor.b32 	%r237, %r235, %r236;
	.loc	20	145	0
	sub.u32 	%r238, %r229, %r233;
	sub.u32 	%r239, %r238, %r237;
	shr.u32 	%r240, %r237, 3;
	xor.b32 	%r241, %r239, %r240;
	.loc	20	146	0
	sub.u32 	%r242, %r233, %r237;
	sub.u32 	%r243, %r242, %r241;
	shl.b32 	%r244, %r241, 10;
	xor.b32 	%r245, %r243, %r244;
	.loc	20	147	0
	sub.u32 	%r246, %r237, %r241;
	sub.u32 	%r247, %r246, %r245;
	shr.u32 	%r248, %r245, 15;
	xor.b32 	%r249, %r247, %r248;
	.loc	34	50	0
	mul.lo.u32 	%r250, %r249, 1103515245;
	add.u32 	%r251, %r250, 12345;
	shr.u32 	%r252, %r251, 16;
	and.b32 	%r253, %r252, 255;
	shl.b32 	%r254, %r253, 7;
	mul.lo.u32 	%r255, %r249, -1029531031;
	sub.u32 	%r256, %r255, 740551042;
	shr.u32 	%r257, %r256, 16;
	and.b32 	%r258, %r257, 255;
	xor.b32 	%r259, %r254, %r258;
	cvt.rn.f32.s32 	%f362, %r259;
	mov.f32 	%f363, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f364, %f362, %f363;
	mul.ftz.f32 	%f365, %f18, %f364;
	sub.ftz.f32 	%f366, %f365, %f348;
	bra.uni 	$Lt_48_115970;
$Lt_48_116226:
	.loc	20	143	0
	sub.u32 	%r81, %r5, %r11;
	cvt.rzi.ftz.u32.f32 	%r260, %f349;
	shr.u32 	%r261, %r260, 13;
	sub.u32 	%r262, %r81, %r260;
	sub.u32 	%r263, %r11, %r260;
	xor.b32 	%r264, %r262, %r261;
	shl.b32 	%r265, %r264, 8;
	sub.u32 	%r266, %r263, %r264;
	sub.u32 	%r267, %r260, %r264;
	xor.b32 	%r268, %r265, %r266;
	shr.u32 	%r269, %r268, 13;
	sub.u32 	%r270, %r267, %r268;
	sub.u32 	%r271, %r264, %r268;
	xor.b32 	%r272, %r269, %r270;
	shr.u32 	%r273, %r272, 12;
	sub.u32 	%r274, %r271, %r272;
	xor.b32 	%r275, %r273, %r274;
	shl.b32 	%r276, %r275, 16;
	sub.u32 	%r277, %r268, %r272;
	sub.u32 	%r278, %r277, %r275;
	xor.b32 	%r279, %r276, %r278;
	.loc	20	144	0
	sub.u32 	%r280, %r272, %r275;
	sub.u32 	%r281, %r280, %r279;
	shr.u32 	%r282, %r279, 5;
	xor.b32 	%r283, %r281, %r282;
	.loc	20	145	0
	sub.u32 	%r284, %r275, %r279;
	sub.u32 	%r285, %r284, %r283;
	shr.u32 	%r286, %r283, 3;
	xor.b32 	%r287, %r285, %r286;
	.loc	20	146	0
	sub.u32 	%r288, %r279, %r283;
	sub.u32 	%r289, %r288, %r287;
	shl.b32 	%r290, %r287, 10;
	xor.b32 	%r291, %r289, %r290;
	.loc	20	147	0
	sub.u32 	%r292, %r283, %r287;
	sub.u32 	%r293, %r292, %r291;
	shr.u32 	%r294, %r291, 15;
	xor.b32 	%r295, %r293, %r294;
	.loc	34	54	0
	mul.lo.u32 	%r296, %r295, 1103515245;
	mul.lo.u32 	%r297, %r295, -1029531031;
	add.u32 	%r298, %r296, 12345;
	sub.u32 	%r299, %r297, 740551042;
	shr.u32 	%r300, %r298, 16;
	shr.u32 	%r301, %r299, 16;
	and.b32 	%r302, %r300, 255;
	and.b32 	%r303, %r301, 255;
	shl.b32 	%r304, %r302, 7;
	xor.b32 	%r305, %r304, %r303;
	cvt.rn.f32.s32 	%f367, %r305;
	mov.f32 	%f368, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f369, %f367, %f368;
	mul.ftz.f32 	%f370, %f18, %f369;
	sub.ftz.f32 	%f371, %f370, %f348;
	mov.f32 	%f366, %f371;
	mov.f32 	%f361, %f371;
	mov.f32 	%f356, %f371;
$Lt_48_115970:
	.loc	34	57	0
	add.ftz.f32 	%f7, %f356, %f7;
	.loc	34	58	0
	add.ftz.f32 	%f6, %f361, %f6;
	.loc	34	59	0
	add.ftz.f32 	%f5, %f5, %f366;
	ld.param.f32 	%f372, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+12];
	mov.f32 	%f373, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p58, %f372, %f373;
	@!%p58 bra 	$Lt_48_5634;
	.loc	34	63	0
	mov.f32 	%f374, 0f00000000;   	// 0
	max.ftz.f32 	%f375, %f5, %f374;
	mov.f32 	%f376, 0f3f800000;   	// 1
	min.ftz.f32 	%f5, %f375, %f376;
	mov.f32 	%f377, 0f00000000;   	// 0
	max.ftz.f32 	%f378, %f6, %f377;
	mov.f32 	%f379, 0f3f800000;   	// 1
	min.ftz.f32 	%f6, %f378, %f379;
	mov.f32 	%f380, 0f00000000;   	// 0
	max.ftz.f32 	%f381, %f7, %f380;
	mov.f32 	%f382, 0f3f800000;   	// 1
	min.ftz.f32 	%f7, %f381, %f382;
	mov.f32 	%f383, 0f00000000;   	// 0
	max.ftz.f32 	%f384, %f8, %f383;
	mov.f32 	%f385, 0f3f800000;   	// 1
	min.ftz.f32 	%f9, %f384, %f385;
	bra.uni 	$Lt_48_5634;
$Lt_48_9218:
	.loc	38	99	0
	cvt.rn.f32.s32 	%f386, %r5;
	cvt.rn.f32.s32 	%f387, %r14;
	ld.param.f32 	%f388, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+20];
	mul.ftz.f32 	%f389, %f387, %f388;
	setp.lt.ftz.f32 	%p59, %f386, %f389;
	@!%p59 bra 	$Lt_48_5634;
	.loc	22	267	0
	ld.const.f32 	%f390, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f390, %f6;
	ld.const.f32 	%f391, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f392, %f391, %f6;
	ld.const.f32 	%f393, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f394, %f393, %f6;
	ld.const.f32 	%f395, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f395, %f7, %f31;
	ld.const.f32 	%f396, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f397, %f396, %f7, %f392;
	ld.const.f32 	%f398, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f399, %f398, %f7, %f394;
	ld.const.f32 	%f400, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f400, %f5, %f33;
	ld.const.f32 	%f401, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f402, %f401, %f5, %f397;
	ld.const.f32 	%f403, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f404, %f403, %f5, %f399;
	ld.param.f32 	%f405, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+4];
	ld.param.f32 	%f406, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+8];
	fma.rn.ftz.f32 	%f407, %f35, %f406, %f405;
	ld.param.f32 	%f408, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+16];
	mul.ftz.f32 	%f409, %f402, %f408;
	ld.param.f32 	%f410, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+12];
	mul.ftz.f32 	%f411, %f402, %f410;
	mul.ftz.f32 	%f412, %f404, %f410;
	sub.ftz.f32 	%f413, %f412, %f409;
	fma.rn.ftz.f32 	%f414, %f404, %f408, %f411;
	ld.const.f32 	%f415, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f416, %f415, %f413;
	ld.const.f32 	%f417, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f418, %f417, %f407, %f416;
	ld.const.f32 	%f419, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f420, %f419, %f414, %f418;
	.loc	22	268	0
	ld.const.f32 	%f421, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f422, %f421, %f413;
	ld.const.f32 	%f423, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f424, %f423, %f407, %f422;
	ld.const.f32 	%f425, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f426, %f425, %f414, %f424;
	.loc	35	56	0
	ld.const.f32 	%f427, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f428, %f427, %f413;
	ld.const.f32 	%f429, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f430, %f429, %f407, %f428;
	ld.const.f32 	%f431, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f5, %f431, %f414, %f430;
	mov.f32 	%f6, %f420;
	mov.f32 	%f7, %f426;
	bra.uni 	$Lt_48_5634;
$Lt_48_9474:
	.loc	36	46	0
	ld.const.f32 	%f432, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f432, %f6;
	ld.const.f32 	%f433, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f433, %f7, %f31;
	ld.const.f32 	%f434, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f434, %f5, %f33;
	ld.param.f32 	%f45, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+8];
	ld.param.f32 	%f435, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+20];
	sub.ftz.f32 	%f436, %f435, %f45;
	fma.rn.ftz.f32 	%f437, %f35, %f436, %f45;
	.loc	36	47	0
	ld.param.f32 	%f438, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+12];
	ld.param.f32 	%f439, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+24];
	sub.ftz.f32 	%f440, %f439, %f438;
	fma.rn.ftz.f32 	%f441, %f35, %f440, %f438;
	.loc	36	49	0
	ld.param.f32 	%f442, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+28];
	ld.param.f32 	%f18, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+4];
	ld.param.f32 	%f443, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter0+16];
	sub.ftz.f32 	%f444, %f443, %f18;
	fma.rn.ftz.f32 	%f445, %f35, %f444, %f18;
	sub.ftz.f32 	%f446, %f445, %f5;
	fma.rn.ftz.f32 	%f5, %f442, %f446, %f5;
	.loc	36	50	0
	sub.ftz.f32 	%f447, %f437, %f6;
	fma.rn.ftz.f32 	%f6, %f442, %f447, %f6;
	.loc	36	51	0
	sub.ftz.f32 	%f448, %f441, %f7;
	fma.rn.ftz.f32 	%f7, %f442, %f448, %f7;
	.loc	38	103	0
	bra.uni 	$Lt_48_5634;
$Lt_48_9730:
	.loc	37	41	0
	sub.s32 	%r306, %r13, %r11;
	sub.s32 	%r12, %r306, 1;
$Lt_48_5634:
	.loc	38	54	0
	ld.param.u32 	%r307, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+0];
	mov.u32 	%r308, 0;
	setp.eq.s32 	%p60, %r307, %r308;
	@%p60 bra 	$Lt_48_258;
	mov.u32 	%r309, 1;
	setp.eq.s32 	%p61, %r307, %r309;
	@%p61 bra 	$Lt_48_770;
	mov.u32 	%r310, 2;
	setp.eq.s32 	%p62, %r307, %r310;
	@%p62 bra 	$Lt_48_1026;
	mov.u32 	%r311, 3;
	setp.eq.s32 	%p63, %r307, %r311;
	@%p63 bra 	$Lt_48_1282;
	mov.u32 	%r312, 4;
	setp.eq.s32 	%p64, %r307, %r312;
	@%p64 bra 	$Lt_48_1538;
	mov.u32 	%r313, 5;
	setp.eq.s32 	%p65, %r307, %r313;
	@%p65 bra 	$Lt_48_1794;
	mov.u32 	%r314, 6;
	setp.eq.s32 	%p66, %r307, %r314;
	@%p66 bra 	$Lt_48_2050;
	mov.u32 	%r315, 7;
	setp.eq.s32 	%p67, %r307, %r315;
	@%p67 bra 	$Lt_48_2306;
	mov.u32 	%r316, 8;
	setp.eq.s32 	%p68, %r307, %r316;
	@%p68 bra 	$Lt_48_2562;
	mov.u32 	%r317, 9;
	setp.eq.s32 	%p69, %r307, %r317;
	@%p69 bra 	$Lt_48_2818;
	mov.u32 	%r318, 10;
	setp.eq.s32 	%p70, %r307, %r318;
	@%p70 bra 	$Lt_48_3074;
	mov.u32 	%r319, 11;
	setp.eq.s32 	%p71, %r307, %r319;
	@%p71 bra 	$Lt_48_3330;
	mov.u32 	%r320, 12;
	setp.eq.s32 	%p72, %r307, %r320;
	@%p72 bra 	$Lt_48_3586;
	mov.u32 	%r321, 13;
	setp.eq.s32 	%p73, %r307, %r321;
	@%p73 bra 	$Lt_48_3842;
	mov.u32 	%r322, 14;
	setp.eq.s32 	%p74, %r307, %r322;
	@%p74 bra 	$Lt_48_4098;
	mov.u32 	%r323, 15;
	setp.eq.s32 	%p75, %r307, %r323;
	@%p75 bra 	$Lt_48_4354;
	mov.u32 	%r324, 16;
	setp.eq.s32 	%p76, %r307, %r324;
	@%p76 bra 	$Lt_48_4610;
	bra.uni 	$Lt_48_514;
$Lt_48_258:
	.loc	21	42	0
	cvt.ftz.sat.f32.f32 	%f9, %f9;
	ld.param.f32 	%f449, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+8];
	mov.f32 	%f450, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p77, %f449, %f450;
	ld.param.f32 	%f451, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+12];
	mov.f32 	%f452, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p78, %f451, %f452;
	ld.param.f32 	%f453, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+4];
	ld.param.f32 	%f454, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+16];
	mov.f32 	%f455, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p79, %f454, %f455;
	@!%p79 bra 	$Lt_48_117762;
	.loc	21	45	0
	mov.f32 	%f456, %f453;
	mul.ftz.f32 	%f457, %f456, %f9;
	selp.f32 	%f458, %f456, %f457, %p77;
	.loc	21	53	0
	mov.f32 	%f459, 0f3f800000;   	// 1
	sub.ftz.f32 	%f460, %f459, %f458;
	selp.f32 	%f9, %f460, %f458, %p78;
	.loc	21	57	0
	mov.f32 	%f5, %f9;
	mov.f32 	%f6, %f9;
	mov.f32 	%f7, %f9;
	bra.uni 	$Lt_48_514;
$Lt_48_117762:
	@!%p77 bra 	$Lt_48_118274;
	.loc	21	61	0
	mov.f32 	%f456, %f453;
	mov.f32 	%f461, 0f3f800000;   	// 1
	sub.ftz.f32 	%f462, %f461, %f456;
	selp.f32 	%f9, %f462, %f456, %p78;
	bra.uni 	$Lt_48_514;
$Lt_48_118274:
	.loc	21	69	0
	mov.f32 	%f463, 0f3f800000;   	// 1
	sub.ftz.f32 	%f464, %f463, %f9;
	selp.f32 	%f465, %f464, %f9, %p78;
	.loc	21	73	0
	mul.ftz.f32 	%f9, %f465, %f453;
	bra.uni 	$Lt_48_514;
$Lt_48_770:
	.loc	22	267	0
	ld.const.f32 	%f466, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f466, %f6;
	ld.const.f32 	%f468, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f468, %f7, %f467;
	ld.const.f32 	%f470, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f470, %f5, %f469;
	ld.const.f32 	%f472, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f473, %f472, %f471;
	.loc	22	268	0
	ld.const.f32 	%f474, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f475, %f474, %f471;
	.loc	23	44	0
	ld.const.f32 	%f476, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f476, %f471;
	mov.f32 	%f6, %f473;
	mov.f32 	%f7, %f475;
	.loc	38	61	0
	bra.uni 	$Lt_48_514;
$Lt_48_1026:
	.loc	38	63	0
	ld.param.f32 	%f477, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+20];
	ld.param.f32 	%f478, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+12];
	ld.param.f32 	%f479, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+4];
	mov.f32 	%f480, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p80, %f479, %f480;
	@!%p80 bra 	$Lt_48_118786;
	.loc	24	44	0
	ld.param.f32 	%f481, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+8];
	sub.ftz.f32 	%f482, %f7, %f478;
	fma.rn.ftz.f32 	%f7, %f481, %f482, %f477;
	.loc	24	45	0
	sub.ftz.f32 	%f483, %f6, %f478;
	fma.rn.ftz.f32 	%f6, %f481, %f483, %f477;
	.loc	24	46	0
	sub.ftz.f32 	%f484, %f5, %f478;
	fma.rn.ftz.f32 	%f5, %f481, %f484, %f477;
	bra.uni 	$Lt_48_514;
$Lt_48_118786:
	.loc	24	50	0
	ld.param.f32 	%f485, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+24];
	setp.gt.ftz.f32 	%p81, %f7, %f478;
	selp.f32 	%f7, %f485, %f477, %p81;
	.loc	24	51	0
	setp.gt.ftz.f32 	%p82, %f6, %f478;
	selp.f32 	%f6, %f485, %f477, %p82;
	.loc	24	52	0
	setp.gt.ftz.f32 	%p83, %f5, %f478;
	selp.f32 	%f5, %f485, %f477, %p83;
	bra.uni 	$Lt_48_514;
$Lt_48_1282:
	.loc	25	47	0
	ld.param.f32 	%f486, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+4];
	mul.ftz.f32 	%f5, %f486, %f5;
	ld.param.f32 	%f487, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+8];
	mul.ftz.f32 	%f6, %f487, %f6;
	ld.param.f32 	%f488, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+12];
	mul.ftz.f32 	%f7, %f488, %f7;
	.loc	38	67	0
	bra.uni 	$Lt_48_514;
$Lt_48_1538:
	.loc	26	48	0
	ld.param.f32 	%f489, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+4];
	mov.f32 	%f490, 0f00000000;   	// 0
	setp.eq.ftz.f32 	%p84, %f489, %f490;
	ld.param.f32 	%f491, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+8];
	mov.f32 	%f492, 0f00000000;   	// 0
	max.ftz.f32 	%f493, %f5, %f492;
	mov.f32 	%f494, 0f3f800000;   	// 1
	min.ftz.f32 	%f495, %f493, %f494;
	ld.param.f32 	%f496, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+12];
	sub.ftz.f32 	%f497, %f495, %f496;
	abs.ftz.f32 	%f498, %f497;
	mov.f32 	%f499, 0f00000000;   	// 0
	max.ftz.f32 	%f500, %f6, %f499;
	mov.f32 	%f501, 0f3f800000;   	// 1
	min.ftz.f32 	%f502, %f500, %f501;
	ld.param.f32 	%f503, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+16];
	sub.ftz.f32 	%f504, %f502, %f503;
	abs.ftz.f32 	%f505, %f504;
	mov.f32 	%f506, 0f00000000;   	// 0
	max.ftz.f32 	%f507, %f7, %f506;
	mov.f32 	%f508, 0f3f800000;   	// 1
	min.ftz.f32 	%f509, %f507, %f508;
	ld.param.f32 	%f510, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+20];
	sub.ftz.f32 	%f511, %f509, %f510;
	abs.ftz.f32 	%f512, %f511;
	mov.f32 	%f513, 0f00000000;   	// 0
	max.ftz.f32 	%f514, %f9, %f513;
	mov.f32 	%f515, 0f3f800000;   	// 1
	min.ftz.f32 	%f516, %f514, %f515;
	sub.ftz.f32 	%f517, %f516, %f9;
	abs.ftz.f32 	%f518, %f517;
	max.ftz.f32 	%f519, %f512, %f518;
	max.ftz.f32 	%f520, %f505, %f519;
	max.ftz.f32 	%f521, %f498, %f520;
	setp.ge.ftz.f32 	%p85, %f491, %f521;
	xor.pred 	%p86, %p84, %p85;
	@!%p86 bra 	$Lt_48_514;
	.loc	22	267	0
	ld.const.f32 	%f522, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f522, %f6;
	ld.const.f32 	%f523, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f523, %f7, %f467;
	ld.const.f32 	%f524, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f524, %f5, %f469;
	ld.const.f32 	%f525, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f526, %f525, %f471;
	.loc	22	268	0
	ld.const.f32 	%f527, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f528, %f527, %f471;
	.loc	23	44	0
	ld.const.f32 	%f529, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f529, %f471;
	mov.f32 	%f6, %f526;
	mov.f32 	%f7, %f528;
	bra.uni 	$Lt_48_514;
$Lt_48_1794:
	.loc	27	48	0
	ld.param.f32 	%f530, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+8];
	mov.f32 	%f531, 0f00000000;   	// 0
	max.ftz.f32 	%f532, %f5, %f531;
	mov.f32 	%f533, 0f3f800000;   	// 1
	min.ftz.f32 	%f534, %f532, %f533;
	ld.param.f32 	%f535, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+12];
	sub.ftz.f32 	%f536, %f534, %f535;
	abs.ftz.f32 	%f537, %f536;
	mov.f32 	%f538, 0f00000000;   	// 0
	max.ftz.f32 	%f539, %f6, %f538;
	mov.f32 	%f540, 0f3f800000;   	// 1
	min.ftz.f32 	%f541, %f539, %f540;
	ld.param.f32 	%f542, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+16];
	sub.ftz.f32 	%f543, %f541, %f542;
	abs.ftz.f32 	%f544, %f543;
	mov.f32 	%f545, 0f00000000;   	// 0
	max.ftz.f32 	%f546, %f7, %f545;
	mov.f32 	%f547, 0f3f800000;   	// 1
	min.ftz.f32 	%f548, %f546, %f547;
	ld.param.f32 	%f549, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+20];
	sub.ftz.f32 	%f550, %f548, %f549;
	abs.ftz.f32 	%f551, %f550;
	mov.f32 	%f552, 0f00000000;   	// 0
	max.ftz.f32 	%f553, %f9, %f552;
	mov.f32 	%f554, 0f3f800000;   	// 1
	min.ftz.f32 	%f555, %f553, %f554;
	sub.ftz.f32 	%f556, %f555, %f9;
	abs.ftz.f32 	%f557, %f556;
	max.ftz.f32 	%f558, %f551, %f557;
	max.ftz.f32 	%f559, %f544, %f558;
	max.ftz.f32 	%f560, %f537, %f559;
	setp.ge.ftz.f32 	%p87, %f530, %f560;
	@!%p87 bra 	$Lt_48_514;
	.loc	27	51	0
	ld.param.f32 	%f561, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+24];
	mov.f32 	%f126, %f561;
	ld.param.f32 	%f562, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+28];
	mov.f32 	%f128, %f562;
	ld.param.f32 	%f563, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+32];
	mov.f32 	%f130, %f563;
	ld.param.f32 	%f564, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+4];
	mov.f32 	%f565, 0f00000000;   	// 0
	setp.eq.ftz.f32 	%p88, %f564, %f565;
	@!%p88 bra 	$Lt_48_120066;
	.loc	27	60	0
	ld.const.f32 	%f566, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f566, %f6;
	ld.const.f32 	%f567, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f567, %f7, %f467;
	ld.const.f32 	%f568, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f568, %f5, %f469;
	mul.ftz.f32 	%f126, %f471, %f561;
	.loc	27	61	0
	mul.ftz.f32 	%f128, %f471, %f562;
	.loc	27	62	0
	mul.ftz.f32 	%f130, %f471, %f563;
$Lt_48_120066:
	.loc	27	65	0
	mov.f32 	%f5, %f126;
	mov.f32 	%f6, %f128;
	mov.f32 	%f7, %f130;
	bra.uni 	$Lt_48_514;
$Lt_48_2050:
	.loc	28	47	0
	sub.s32 	%r325, %r13, %r12;
	sub.s32 	%r326, %r14, %r6;
	cvt.rn.f32.s32 	%f569, %r6;
	cvt.rn.f32.s32 	%f570, %r12;
	cvt.rn.f32.s32 	%f571, %r325;
	cvt.rn.f32.s32 	%f572, %r326;
	ld.param.f32 	%f456, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+4];
	sub.ftz.f32 	%f573, %f456, %f569;
	sub.ftz.f32 	%f574, %f456, %f570;
	sub.ftz.f32 	%f575, %f456, %f571;
	sub.ftz.f32 	%f576, %f456, %f572;
	cvt.rzi.ftz.s32.f32 	%r327, %f573;
	cvt.rzi.ftz.s32.f32 	%r328, %f574;
	cvt.rzi.ftz.s32.f32 	%r329, %f575;
	cvt.rzi.ftz.s32.f32 	%r330, %f576;
	max.s32 	%r331, %r327, %r328;
	max.s32 	%r332, %r330, %r331;
	max.s32 	%r333, %r329, %r332;
	mov.u32 	%r334, 0;
	setp.le.s32 	%p89, %r333, %r334;
	@%p89 bra 	$Lt_48_514;
	.loc	28	51	0
	cvt.rn.f32.s32 	%f577, %r333;
	sub.ftz.f32 	%f578, %f456, %f577;
	div.approx.ftz.f32 	%f579, %f578, %f456;
	mul.ftz.f32 	%f9, %f9, %f579;
	bra.uni 	$Lt_48_514;
$Lt_48_2306:
	.loc	30	50	0
	ld.const.f32 	%f580, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f580, %f6;
	ld.param.f32 	%f581, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+12];
	ld.param.f32 	%f481, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+8];
	sub.ftz.f32 	%f582, %f581, %f481;
	ld.const.f32 	%f583, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f583, %f7, %f467;
	ld.param.f32 	%f584, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+16];
	mul.ftz.f32 	%f585, %f584, %f582;
	ld.const.f32 	%f586, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f586, %f5, %f469;
	add.ftz.f32 	%f587, %f585, %f581;
	sub.ftz.f32 	%f588, %f481, %f585;
	mov.f32 	%f589, 0f00000000;   	// 0
	max.ftz.f32 	%f590, %f587, %f589;
	mov.f32 	%f591, 0f00000000;   	// 0
	max.ftz.f32 	%f592, %f588, %f591;
	mov.f32 	%f593, 0f3f800000;   	// 1
	min.ftz.f32 	%f594, %f590, %f593;
	mov.f32 	%f595, 0f3f800000;   	// 1
	min.ftz.f32 	%f596, %f592, %f595;
	set.gt.ftz.u32.f32 	%r335, %f596, %f471;
	neg.s32 	%r336, %r335;
	set.le.ftz.u32.f32 	%r337, %f594, %f471;
	neg.s32 	%r338, %r337;
	or.b32 	%r339, %r336, %r338;
	mov.u32 	%r340, 0;
	setp.eq.s32 	%p90, %r339, %r340;
	@%p90 bra 	$Lt_48_121346;
	mov.f32 	%f164, 0f00000000;   	// 0
	bra.uni 	$Lt_48_122114;
$Lt_48_121346:
	add.ftz.f32 	%f597, %f585, %f481;
	mov.f32 	%f598, 0f00000000;   	// 0
	max.ftz.f32 	%f599, %f597, %f598;
	mov.f32 	%f600, 0f3f800000;   	// 1
	min.ftz.f32 	%f601, %f599, %f600;
	set.le.ftz.u32.f32 	%r341, %f601, %f471;
	neg.s32 	%r342, %r341;
	sub.ftz.f32 	%f602, %f581, %f585;
	mov.f32 	%f603, 0f00000000;   	// 0
	max.ftz.f32 	%f604, %f602, %f603;
	mov.f32 	%f605, 0f3f800000;   	// 1
	min.ftz.f32 	%f606, %f604, %f605;
	set.lt.ftz.u32.f32 	%r343, %f471, %f606;
	neg.s32 	%r344, %r343;
	and.b32 	%r345, %r342, %r344;
	mov.u32 	%r346, 0;
	setp.eq.s32 	%p91, %r345, %r346;
	@%p91 bra 	$Lt_48_121858;
	mov.f32 	%f164, 0f3f800000;   	// 1
	bra.uni 	$Lt_48_122114;
$Lt_48_121858:
	add.ftz.f32 	%f607, %f585, %f585;
	setp.gt.ftz.f32 	%p92, %f601, %f471;
	@!%p92 bra 	$Lt_48_122370;
	.loc	30	62	0
	sub.ftz.f32 	%f608, %f471, %f596;
	div.approx.ftz.f32 	%f164, %f608, %f607;
	bra.uni 	$Lt_48_122114;
$Lt_48_122370:
	.loc	30	66	0
	sub.ftz.f32 	%f609, %f594, %f471;
	div.approx.ftz.f32 	%f164, %f609, %f607;
$Lt_48_122114:
$Lt_48_121602:
$Lt_48_121090:
	.loc	30	69	0
	mov.f32 	%f610, 0f3f800000;   	// 1
	sub.ftz.f32 	%f611, %f610, %f164;
	ld.param.f32 	%f612, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+4];
	mov.f32 	%f613, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p93, %f612, %f613;
	selp.f32 	%f164, %f611, %f164, %p93;
	.loc	30	77	0
	ld.const.f32 	%f614, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f614, %f164;
	ld.const.f32 	%f615, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f6, %f615, %f164;
	ld.const.f32 	%f616, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f7, %f616, %f164;
	.loc	38	79	0
	bra.uni 	$Lt_48_514;
$Lt_48_2562:
	.loc	38	80	0
	ld.param.f32 	%f617, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+44];
	mov.f32 	%f618, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p94, %f617, %f618;
	@!%p94 bra 	$L_48_103938;
	cvt.rn.f32.s32 	%f619, %r6;
	cvt.rn.f32.s32 	%f620, %r14;
	mul.ftz.f32 	%f621, %f620, %f617;
	setp.lt.ftz.f32 	%p95, %f619, %f621;
	@%p95 bra 	$L_48_103682;
$L_48_103938:
	mov.f32 	%f622, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p96, %f617, %f622;
	@!%p96 bra 	$Lt_48_514;
	cvt.rn.f32.s32 	%f623, %r12;
	cvt.rn.f32.s32 	%f624, %r13;
	mul.ftz.f32 	%f625, %f624, %f617;
	neg.ftz.f32 	%f626, %f625;
	setp.lt.ftz.f32 	%p97, %f623, %f626;
	@!%p97 bra 	$Lt_48_514;
$L_48_103682:
	.loc	31	47	0
	ld.const.f32 	%f627, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f628, %f627, %f6;
	ld.const.f32 	%f629, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f630, %f629, %f7, %f628;
	ld.const.f32 	%f631, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f631, %f5, %f630;
	mov.f32 	%f200, %f471;
	ld.param.f32 	%f481, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+8];
	ld.param.f32 	%f632, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+4];
	sub.ftz.f32 	%f633, %f632, %f481;
	ld.param.f32 	%f634, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+16];
	ld.param.f32 	%f635, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+12];
	sub.ftz.f32 	%f636, %f635, %f634;
	mov.f32 	%f637, 0f3f800000;   	// 1
	set.neu.ftz.u32.f32 	%r347, %f636, %f637;
	neg.s32 	%r348, %r347;
	mov.f32 	%f638, 0f3f800000;   	// 1
	set.neu.ftz.u32.f32 	%r349, %f633, %f638;
	neg.s32 	%r350, %r349;
	or.b32 	%r351, %r348, %r350;
	mov.u32 	%r352, 0;
	setp.eq.s32 	%p98, %r351, %r352;
	@%p98 bra 	$Lt_48_122626;
	.loc	20	143	0
	mov.s32 	%r353, 1;
	sub.s32 	%r354, %r353, %r5;
	shr.u32 	%r355, %r11, 13;
	sub.u32 	%r81, %r5, %r11;
	sub.u32 	%r356, %r354, %r11;
	xor.b32 	%r357, %r356, %r355;
	shl.b32 	%r358, %r357, 8;
	sub.u32 	%r359, %r81, %r357;
	sub.u32 	%r360, %r11, %r357;
	xor.b32 	%r361, %r359, %r358;
	shr.u32 	%r362, %r361, 13;
	sub.u32 	%r363, %r360, %r361;
	sub.u32 	%r364, %r357, %r361;
	xor.b32 	%r365, %r363, %r362;
	shr.u32 	%r366, %r365, 12;
	sub.u32 	%r367, %r364, %r365;
	xor.b32 	%r368, %r367, %r366;
	sub.u32 	%r369, %r361, %r365;
	sub.u32 	%r370, %r369, %r368;
	shl.b32 	%r371, %r368, 16;
	xor.b32 	%r372, %r370, %r371;
	.loc	20	144	0
	sub.u32 	%r373, %r365, %r368;
	sub.u32 	%r374, %r373, %r372;
	shr.u32 	%r375, %r372, 5;
	xor.b32 	%r376, %r374, %r375;
	.loc	20	145	0
	sub.u32 	%r377, %r368, %r372;
	sub.u32 	%r378, %r377, %r376;
	shr.u32 	%r379, %r376, 3;
	xor.b32 	%r380, %r378, %r379;
	.loc	20	146	0
	sub.u32 	%r381, %r372, %r376;
	sub.u32 	%r382, %r381, %r380;
	shl.b32 	%r383, %r380, 10;
	xor.b32 	%r384, %r382, %r383;
	.loc	20	147	0
	sub.u32 	%r385, %r376, %r380;
	sub.u32 	%r386, %r385, %r384;
	shr.u32 	%r387, %r384, 15;
	xor.b32 	%r388, %r386, %r387;
	.loc	31	57	0
	mov.f32 	%f639, 0f3b270d73;   	// 0.00254902
	mul.lo.u32 	%r389, %r388, 1103515245;
	add.u32 	%r390, %r389, 12345;
	shr.u32 	%r391, %r390, 16;
	and.b32 	%r392, %r391, 255;
	shl.b32 	%r393, %r392, 7;
	mul.lo.u32 	%r394, %r388, -1029531031;
	sub.u32 	%r395, %r394, 740551042;
	shr.u32 	%r396, %r395, 16;
	and.b32 	%r397, %r396, 255;
	xor.b32 	%r398, %r393, %r397;
	cvt.rn.f32.s32 	%f640, %r398;
	mov.f32 	%f641, 0f467ffe00;   	// 16383.5
	div.approx.ftz.f32 	%f642, %f640, %f641;
	mov.f32 	%f643, 0fbf800000;   	// -1
	add.ftz.f32 	%f644, %f642, %f643;
	fma.rn.ftz.f32 	%f200, %f639, %f644, %f471;
$Lt_48_122626:
	sub.ftz.f32 	%f645, %f200, %f481;
	ld.param.f32 	%f646, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+20];
	mov.f32 	%f647, 0f3f800000;   	// 1
	setp.neu.ftz.f32 	%p99, %f646, %f647;
	@!%p99 bra 	$Lt_48_123394;
	mov.f32 	%f648, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p100, %f645, %f648;
	@!%p100 bra 	$Lt_48_123906;
	.loc	31	66	0
	mov.f32 	%f200, %f634;
	bra.uni 	$Lt_48_123138;
$Lt_48_123906:
	.loc	31	70	0
	rcp.approx.ftz.f32 	%f649, %f633;
	mul.ftz.f32 	%f650, %f645, %f649;
	lg2.approx.ftz.f32 	%f651, %f650;
	mul.ftz.f32 	%f652, %f646, %f651;
	ex2.approx.ftz.f32 	%f653, %f652;
	fma.rn.ftz.f32 	%f200, %f636, %f653, %f634;
	bra.uni 	$Lt_48_123138;
$Lt_48_123394:
	.loc	31	77	0
	rcp.approx.ftz.f32 	%f654, %f633;
	mul.ftz.f32 	%f655, %f645, %f654;
	fma.rn.ftz.f32 	%f200, %f636, %f655, %f634;
$Lt_48_123138:
	.loc	22	267	0
	ld.param.f32 	%f656, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+28];
	sin.approx.ftz.f32 	%f657, %f656;
	ld.const.f32 	%f658, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f659, %f658, %f6;
	ld.const.f32 	%f660, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f661, %f660, %f6;
	cos.approx.ftz.f32 	%f662, %f656;
	ld.const.f32 	%f663, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f664, %f663, %f7, %f659;
	ld.const.f32 	%f665, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f666, %f665, %f7, %f661;
	ld.const.f32 	%f667, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f668, %f667, %f5, %f664;
	ld.const.f32 	%f669, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f670, %f669, %f5, %f666;
	mul.ftz.f32 	%f671, %f657, %f668;
	mul.ftz.f32 	%f672, %f662, %f668;
	mul.ftz.f32 	%f673, %f670, %f662;
	sub.ftz.f32 	%f674, %f673, %f671;
	fma.rn.ftz.f32 	%f675, %f670, %f657, %f672;
	ld.param.f32 	%f676, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+32];
	sub.ftz.f32 	%f677, %f676, %f674;
	ld.param.f32 	%f678, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+36];
	sub.ftz.f32 	%f679, %f678, %f675;
	ld.param.f32 	%f680, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+40];
	fma.rn.ftz.f32 	%f681, %f680, %f677, %f674;
	fma.rn.ftz.f32 	%f682, %f680, %f679, %f675;
	ld.param.f32 	%f683, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+24];
	mul.ftz.f32 	%f684, %f681, %f683;
	mul.ftz.f32 	%f685, %f682, %f683;
	ld.const.f32 	%f686, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f687, %f686, %f684;
	ld.const.f32 	%f688, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f689, %f688, %f200, %f687;
	ld.const.f32 	%f690, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f691, %f690, %f685, %f689;
	.loc	22	268	0
	ld.const.f32 	%f692, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f693, %f692, %f684;
	ld.const.f32 	%f694, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f695, %f694, %f200, %f693;
	ld.const.f32 	%f696, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f697, %f696, %f685, %f695;
	.loc	31	92	0
	ld.const.f32 	%f698, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f699, %f698, %f684;
	ld.const.f32 	%f700, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f701, %f700, %f200, %f699;
	ld.const.f32 	%f702, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f5, %f702, %f685, %f701;
	mov.f32 	%f6, %f691;
	mov.f32 	%f7, %f697;
	bra.uni 	$Lt_48_514;
$Lt_48_2818:
	.loc	38	83	0
	ld.param.f32 	%f703, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+44];
	mov.f32 	%f704, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p101, %f703, %f704;
	@!%p101 bra 	$L_48_104706;
	cvt.rn.f32.s32 	%f705, %r6;
	cvt.rn.f32.s32 	%f706, %r14;
	mul.ftz.f32 	%f707, %f706, %f703;
	setp.lt.ftz.f32 	%p102, %f705, %f707;
	@%p102 bra 	$L_48_104450;
$L_48_104706:
	mov.f32 	%f708, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p103, %f703, %f708;
	@!%p103 bra 	$Lt_48_514;
	cvt.rn.f32.s32 	%f709, %r12;
	cvt.rn.f32.s32 	%f710, %r13;
	mul.ftz.f32 	%f711, %f710, %f703;
	neg.ftz.f32 	%f712, %f711;
	setp.lt.ftz.f32 	%p104, %f709, %f712;
	@!%p104 bra 	$Lt_48_514;
$L_48_104450:
	.loc	31	110	0
	ld.const.f32 	%f713, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f713, %f6;
	ld.param.f32 	%f481, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+8];
	ld.param.f32 	%f714, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+4];
	sub.ftz.f32 	%f715, %f714, %f481;
	ld.param.f32 	%f716, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+16];
	ld.param.f32 	%f717, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+12];
	sub.ftz.f32 	%f718, %f717, %f716;
	ld.const.f32 	%f719, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f719, %f7, %f467;
	ld.const.f32 	%f720, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f720, %f5, %f469;
	sub.ftz.f32 	%f721, %f471, %f481;
	ld.param.f32 	%f722, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+20];
	mov.f32 	%f723, 0f3f800000;   	// 1
	setp.neu.ftz.f32 	%p105, %f722, %f723;
	@!%p105 bra 	$Lt_48_124418;
	.loc	42	523	0
	mov.f32 	%f724, 0f00000000;   	// 0
	max.ftz.f32 	%f725, %f721, %f724;
	div.approx.ftz.f32 	%f726, %f725, %f715;
	lg2.approx.ftz.f32 	%f727, %f726;
	mul.ftz.f32 	%f728, %f722, %f727;
	ex2.approx.ftz.f32 	%f729, %f728;
	.loc	31	120	0
	fma.rn.ftz.f32 	%f299, %f718, %f729, %f716;
	bra.uni 	$Lt_48_124162;
$Lt_48_124418:
	.loc	31	129	0
	div.approx.ftz.f32 	%f730, %f721, %f715;
	fma.rn.ftz.f32 	%f299, %f718, %f730, %f716;
$Lt_48_124162:
	.loc	31	135	0
	ld.const.f32 	%f731, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f731, %f299;
	ld.const.f32 	%f732, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f6, %f732, %f299;
	ld.const.f32 	%f733, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f7, %f733, %f299;
	bra.uni 	$Lt_48_514;
$Lt_48_3074:
	.loc	38	86	0
	ld.param.f32 	%f734, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+44];
	mov.f32 	%f735, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p106, %f734, %f735;
	@!%p106 bra 	$L_48_105474;
	cvt.rn.f32.s32 	%f736, %r6;
	cvt.rn.f32.s32 	%f737, %r14;
	mul.ftz.f32 	%f738, %f737, %f734;
	setp.lt.ftz.f32 	%p107, %f736, %f738;
	@%p107 bra 	$L_48_105218;
$L_48_105474:
	mov.f32 	%f739, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p108, %f734, %f739;
	@!%p108 bra 	$Lt_48_514;
	cvt.rn.f32.s32 	%f740, %r12;
	cvt.rn.f32.s32 	%f741, %r13;
	mul.ftz.f32 	%f742, %f741, %f734;
	neg.ftz.f32 	%f743, %f742;
	setp.lt.ftz.f32 	%p109, %f740, %f743;
	@!%p109 bra 	$Lt_48_514;
$L_48_105218:
	.loc	31	160	0
	ld.const.f32 	%f5, [k601YPbPr_To_RGB32f+24];
	ld.const.f32 	%f6, [k601YPbPr_To_RGB32f+12];
	ld.const.f32 	%f7, [k601YPbPr_To_RGB32f+0];
	bra.uni 	$Lt_48_514;
$Lt_48_3330:
	.loc	32	42	0
	ld.param.f32 	%f744, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+4];
	mov.f32 	%f745, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p110, %f5, %f745;
	@!%p110 bra 	$Lt_48_124930;
	.loc	22	292	0
	mov.f32 	%f456, %f744;
	lg2.approx.ftz.f32 	%f746, %f5;
	mul.ftz.f32 	%f747, %f456, %f746;
	ex2.approx.ftz.f32 	%f748, %f747;
	bra.uni 	$Lt_48_124674;
$Lt_48_124930:
	mov.f32 	%f456, %f744;
	neg.ftz.f32 	%f749, %f5;
	lg2.approx.ftz.f32 	%f750, %f749;
	mul.ftz.f32 	%f751, %f456, %f750;
	ex2.approx.ftz.f32 	%f752, %f751;
	neg.ftz.f32 	%f748, %f752;
$Lt_48_124674:
	mov.f32 	%f753, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p111, %f6, %f753;
	@!%p111 bra 	$Lt_48_125442;
	.loc	22	293	0
	lg2.approx.ftz.f32 	%f754, %f6;
	mul.ftz.f32 	%f755, %f456, %f754;
	ex2.approx.ftz.f32 	%f756, %f755;
	bra.uni 	$Lt_48_125186;
$Lt_48_125442:
	neg.ftz.f32 	%f757, %f6;
	lg2.approx.ftz.f32 	%f758, %f757;
	mul.ftz.f32 	%f759, %f456, %f758;
	ex2.approx.ftz.f32 	%f760, %f759;
	neg.ftz.f32 	%f756, %f760;
$Lt_48_125186:
	mov.f32 	%f761, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p112, %f7, %f761;
	@!%p112 bra 	$Lt_48_125954;
	.loc	22	294	0
	lg2.approx.ftz.f32 	%f762, %f7;
	mul.ftz.f32 	%f763, %f456, %f762;
	ex2.approx.ftz.f32 	%f764, %f763;
	bra.uni 	$Lt_48_125698;
$Lt_48_125954:
	neg.ftz.f32 	%f765, %f7;
	lg2.approx.ftz.f32 	%f766, %f765;
	mul.ftz.f32 	%f767, %f456, %f766;
	ex2.approx.ftz.f32 	%f768, %f767;
	neg.ftz.f32 	%f764, %f768;
$Lt_48_125698:
	mov.f32 	%f769, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p113, %f9, %f769;
	@!%p113 bra 	$Lt_48_126466;
	.loc	22	295	0
	lg2.approx.ftz.f32 	%f770, %f9;
	mul.ftz.f32 	%f771, %f456, %f770;
	ex2.approx.ftz.f32 	%f772, %f771;
	bra.uni 	$Lt_48_126210;
$Lt_48_126466:
	neg.ftz.f32 	%f773, %f9;
	lg2.approx.ftz.f32 	%f774, %f773;
	mul.ftz.f32 	%f775, %f456, %f774;
	ex2.approx.ftz.f32 	%f776, %f775;
	neg.ftz.f32 	%f772, %f776;
$Lt_48_126210:
	.loc	32	42	0
	mov.f32 	%f5, %f748;
	mov.f32 	%f6, %f756;
	mov.f32 	%f7, %f764;
	mov.f32 	%f9, %f772;
	.loc	38	91	0
	bra.uni 	$Lt_48_514;
$Lt_48_3586:
	.loc	33	41	0
	sub.s32 	%r399, %r14, %r6;
	sub.s32 	%r6, %r399, 1;
	.loc	38	94	0
	bra.uni 	$Lt_48_514;
$Lt_48_3842:
	.loc	38	96	0
	ld.param.f32 	%f456, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+4];
	mov.f32 	%f777, 0f3f000000;   	// 0.5
	mul.ftz.f32 	%f778, %f456, %f777;
	ld.param.f32 	%f779, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+16];
	ld.param.f32 	%f780, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+8];
	mov.f32 	%f781, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p114, %f780, %f781;
	@!%p114 bra 	$Lt_48_126978;
	.loc	20	143	0
	cvt.rzi.ftz.u32.f32 	%r400, %f779;
	mul.lo.s32 	%r401, %r6, 3;
	shr.u32 	%r402, %r400, 13;
	sub.u32 	%r403, %r12, %r400;
	sub.u32 	%r404, %r401, %r12;
	sub.u32 	%r405, %r404, %r400;
	xor.b32 	%r406, %r405, %r402;
	shl.b32 	%r407, %r406, 8;
	sub.u32 	%r408, %r403, %r406;
	sub.u32 	%r409, %r400, %r406;
	xor.b32 	%r410, %r407, %r408;
	shr.u32 	%r411, %r410, 13;
	sub.u32 	%r412, %r409, %r410;
	sub.u32 	%r413, %r406, %r410;
	xor.b32 	%r414, %r411, %r412;
	shr.u32 	%r415, %r414, 12;
	sub.u32 	%r416, %r413, %r414;
	xor.b32 	%r417, %r415, %r416;
	shl.b32 	%r418, %r417, 16;
	sub.u32 	%r419, %r410, %r414;
	sub.u32 	%r420, %r419, %r417;
	xor.b32 	%r421, %r418, %r420;
	.loc	20	144	0
	sub.u32 	%r422, %r414, %r417;
	sub.u32 	%r423, %r422, %r421;
	shr.u32 	%r424, %r421, 5;
	xor.b32 	%r425, %r423, %r424;
	.loc	20	145	0
	sub.u32 	%r426, %r417, %r421;
	sub.u32 	%r427, %r426, %r425;
	shr.u32 	%r428, %r425, 3;
	xor.b32 	%r429, %r427, %r428;
	.loc	20	146	0
	sub.u32 	%r430, %r421, %r425;
	sub.u32 	%r431, %r430, %r429;
	shl.b32 	%r432, %r429, 10;
	xor.b32 	%r433, %r431, %r432;
	.loc	20	147	0
	sub.u32 	%r434, %r425, %r429;
	sub.u32 	%r435, %r434, %r433;
	shr.u32 	%r436, %r433, 15;
	xor.b32 	%r437, %r435, %r436;
	.loc	34	48	0
	mul.lo.u32 	%r438, %r437, 1103515245;
	add.u32 	%r439, %r438, 12345;
	shr.u32 	%r440, %r439, 16;
	and.b32 	%r441, %r440, 255;
	shl.b32 	%r442, %r441, 7;
	mul.lo.u32 	%r443, %r437, -1029531031;
	sub.u32 	%r444, %r443, 740551042;
	shr.u32 	%r445, %r444, 16;
	and.b32 	%r446, %r445, 255;
	xor.b32 	%r447, %r442, %r446;
	cvt.rn.f32.s32 	%f782, %r447;
	mov.f32 	%f783, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f784, %f782, %f783;
	mul.ftz.f32 	%f785, %f456, %f784;
	sub.ftz.f32 	%f356, %f785, %f778;
	.loc	20	143	0
	add.u32 	%r448, %r405, 1;
	xor.b32 	%r449, %r448, %r402;
	shl.b32 	%r450, %r449, 8;
	sub.u32 	%r451, %r403, %r449;
	sub.u32 	%r452, %r400, %r449;
	xor.b32 	%r453, %r450, %r451;
	shr.u32 	%r454, %r453, 13;
	sub.u32 	%r455, %r452, %r453;
	sub.u32 	%r456, %r449, %r453;
	xor.b32 	%r457, %r454, %r455;
	shr.u32 	%r458, %r457, 12;
	sub.u32 	%r459, %r456, %r457;
	xor.b32 	%r460, %r458, %r459;
	sub.u32 	%r461, %r453, %r457;
	sub.u32 	%r462, %r461, %r460;
	shl.b32 	%r463, %r460, 16;
	xor.b32 	%r464, %r462, %r463;
	.loc	20	144	0
	sub.u32 	%r465, %r457, %r460;
	sub.u32 	%r466, %r465, %r464;
	shr.u32 	%r467, %r464, 5;
	xor.b32 	%r468, %r466, %r467;
	.loc	20	145	0
	sub.u32 	%r469, %r460, %r464;
	sub.u32 	%r470, %r469, %r468;
	shr.u32 	%r471, %r468, 3;
	xor.b32 	%r472, %r470, %r471;
	.loc	20	146	0
	sub.u32 	%r473, %r464, %r468;
	sub.u32 	%r474, %r473, %r472;
	shl.b32 	%r475, %r472, 10;
	xor.b32 	%r476, %r474, %r475;
	.loc	20	147	0
	sub.u32 	%r477, %r468, %r472;
	sub.u32 	%r478, %r477, %r476;
	shr.u32 	%r479, %r476, 15;
	xor.b32 	%r480, %r478, %r479;
	.loc	34	49	0
	mul.lo.u32 	%r481, %r480, 1103515245;
	add.u32 	%r482, %r481, 12345;
	shr.u32 	%r483, %r482, 16;
	and.b32 	%r484, %r483, 255;
	shl.b32 	%r485, %r484, 7;
	mul.lo.u32 	%r486, %r480, -1029531031;
	sub.u32 	%r487, %r486, 740551042;
	shr.u32 	%r488, %r487, 16;
	and.b32 	%r489, %r488, 255;
	xor.b32 	%r490, %r485, %r489;
	cvt.rn.f32.s32 	%f786, %r490;
	mov.f32 	%f787, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f788, %f786, %f787;
	mul.ftz.f32 	%f789, %f456, %f788;
	sub.ftz.f32 	%f361, %f789, %f778;
	.loc	20	143	0
	add.u32 	%r491, %r405, 2;
	xor.b32 	%r492, %r491, %r402;
	shl.b32 	%r493, %r492, 8;
	sub.u32 	%r494, %r403, %r492;
	sub.u32 	%r495, %r400, %r492;
	xor.b32 	%r496, %r493, %r494;
	shr.u32 	%r497, %r496, 13;
	sub.u32 	%r498, %r495, %r496;
	sub.u32 	%r499, %r492, %r496;
	xor.b32 	%r500, %r497, %r498;
	shr.u32 	%r501, %r500, 12;
	sub.u32 	%r502, %r499, %r500;
	xor.b32 	%r503, %r501, %r502;
	sub.u32 	%r504, %r496, %r500;
	sub.u32 	%r505, %r504, %r503;
	shl.b32 	%r506, %r503, 16;
	xor.b32 	%r507, %r505, %r506;
	.loc	20	144	0
	sub.u32 	%r508, %r500, %r503;
	sub.u32 	%r509, %r508, %r507;
	shr.u32 	%r510, %r507, 5;
	xor.b32 	%r511, %r509, %r510;
	.loc	20	145	0
	sub.u32 	%r512, %r503, %r507;
	sub.u32 	%r513, %r512, %r511;
	shr.u32 	%r514, %r511, 3;
	xor.b32 	%r515, %r513, %r514;
	.loc	20	146	0
	sub.u32 	%r516, %r507, %r511;
	sub.u32 	%r517, %r516, %r515;
	shl.b32 	%r518, %r515, 10;
	xor.b32 	%r519, %r517, %r518;
	.loc	20	147	0
	sub.u32 	%r520, %r511, %r515;
	sub.u32 	%r521, %r520, %r519;
	shr.u32 	%r522, %r519, 15;
	xor.b32 	%r523, %r521, %r522;
	.loc	34	50	0
	mul.lo.u32 	%r524, %r523, 1103515245;
	add.u32 	%r525, %r524, 12345;
	shr.u32 	%r526, %r525, 16;
	and.b32 	%r527, %r526, 255;
	shl.b32 	%r528, %r527, 7;
	mul.lo.u32 	%r529, %r523, -1029531031;
	sub.u32 	%r530, %r529, 740551042;
	shr.u32 	%r531, %r530, 16;
	and.b32 	%r532, %r531, 255;
	xor.b32 	%r533, %r528, %r532;
	cvt.rn.f32.s32 	%f790, %r533;
	mov.f32 	%f791, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f792, %f790, %f791;
	mul.ftz.f32 	%f793, %f456, %f792;
	sub.ftz.f32 	%f366, %f793, %f778;
	bra.uni 	$Lt_48_126722;
$Lt_48_126978:
	.loc	20	143	0
	cvt.rzi.ftz.u32.f32 	%r534, %f779;
	sub.u32 	%r535, %r6, %r12;
	shr.u32 	%r536, %r534, 13;
	sub.u32 	%r537, %r12, %r534;
	sub.u32 	%r538, %r535, %r534;
	xor.b32 	%r539, %r538, %r536;
	shl.b32 	%r540, %r539, 8;
	sub.u32 	%r541, %r537, %r539;
	sub.u32 	%r542, %r534, %r539;
	xor.b32 	%r543, %r540, %r541;
	shr.u32 	%r544, %r543, 13;
	sub.u32 	%r545, %r542, %r543;
	sub.u32 	%r546, %r539, %r543;
	xor.b32 	%r547, %r544, %r545;
	shr.u32 	%r548, %r547, 12;
	sub.u32 	%r549, %r546, %r547;
	xor.b32 	%r550, %r548, %r549;
	shl.b32 	%r551, %r550, 16;
	sub.u32 	%r552, %r543, %r547;
	sub.u32 	%r553, %r552, %r550;
	xor.b32 	%r554, %r551, %r553;
	.loc	20	144	0
	sub.u32 	%r555, %r547, %r550;
	sub.u32 	%r556, %r555, %r554;
	shr.u32 	%r557, %r554, 5;
	xor.b32 	%r558, %r556, %r557;
	.loc	20	145	0
	sub.u32 	%r559, %r550, %r554;
	sub.u32 	%r560, %r559, %r558;
	shr.u32 	%r561, %r558, 3;
	xor.b32 	%r562, %r560, %r561;
	.loc	20	146	0
	sub.u32 	%r563, %r554, %r558;
	sub.u32 	%r564, %r563, %r562;
	shl.b32 	%r565, %r562, 10;
	xor.b32 	%r566, %r564, %r565;
	.loc	20	147	0
	sub.u32 	%r567, %r558, %r562;
	sub.u32 	%r568, %r567, %r566;
	shr.u32 	%r569, %r566, 15;
	xor.b32 	%r570, %r568, %r569;
	.loc	34	54	0
	mul.lo.u32 	%r571, %r570, 1103515245;
	mul.lo.u32 	%r572, %r570, -1029531031;
	add.u32 	%r573, %r571, 12345;
	sub.u32 	%r574, %r572, 740551042;
	shr.u32 	%r575, %r573, 16;
	shr.u32 	%r576, %r574, 16;
	and.b32 	%r577, %r575, 255;
	and.b32 	%r578, %r576, 255;
	shl.b32 	%r579, %r577, 7;
	xor.b32 	%r580, %r579, %r578;
	cvt.rn.f32.s32 	%f794, %r580;
	mov.f32 	%f795, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f796, %f794, %f795;
	mul.ftz.f32 	%f797, %f456, %f796;
	sub.ftz.f32 	%f798, %f797, %f778;
	mov.f32 	%f366, %f798;
	mov.f32 	%f361, %f798;
	mov.f32 	%f356, %f798;
$Lt_48_126722:
	.loc	34	57	0
	add.ftz.f32 	%f7, %f356, %f7;
	.loc	34	58	0
	add.ftz.f32 	%f6, %f361, %f6;
	.loc	34	59	0
	add.ftz.f32 	%f5, %f5, %f366;
	ld.param.f32 	%f799, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+12];
	mov.f32 	%f800, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p115, %f799, %f800;
	@!%p115 bra 	$Lt_48_514;
	.loc	34	63	0
	mov.f32 	%f801, 0f00000000;   	// 0
	max.ftz.f32 	%f802, %f5, %f801;
	mov.f32 	%f803, 0f3f800000;   	// 1
	min.ftz.f32 	%f5, %f802, %f803;
	mov.f32 	%f804, 0f00000000;   	// 0
	max.ftz.f32 	%f805, %f6, %f804;
	mov.f32 	%f806, 0f3f800000;   	// 1
	min.ftz.f32 	%f6, %f805, %f806;
	mov.f32 	%f807, 0f00000000;   	// 0
	max.ftz.f32 	%f808, %f7, %f807;
	mov.f32 	%f809, 0f3f800000;   	// 1
	min.ftz.f32 	%f7, %f808, %f809;
	mov.f32 	%f810, 0f00000000;   	// 0
	max.ftz.f32 	%f811, %f9, %f810;
	mov.f32 	%f812, 0f3f800000;   	// 1
	min.ftz.f32 	%f9, %f811, %f812;
	bra.uni 	$Lt_48_514;
$Lt_48_4098:
	.loc	38	99	0
	cvt.rn.f32.s32 	%f813, %r6;
	cvt.rn.f32.s32 	%f814, %r14;
	ld.param.f32 	%f815, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+20];
	mul.ftz.f32 	%f816, %f814, %f815;
	setp.lt.ftz.f32 	%p116, %f813, %f816;
	@!%p116 bra 	$Lt_48_514;
	.loc	22	267	0
	ld.const.f32 	%f817, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f817, %f6;
	ld.const.f32 	%f818, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f819, %f818, %f6;
	ld.const.f32 	%f820, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f821, %f820, %f6;
	ld.const.f32 	%f822, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f822, %f7, %f467;
	ld.const.f32 	%f823, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f824, %f823, %f7, %f819;
	ld.const.f32 	%f825, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f826, %f825, %f7, %f821;
	ld.const.f32 	%f827, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f827, %f5, %f469;
	ld.const.f32 	%f828, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f829, %f828, %f5, %f824;
	ld.const.f32 	%f830, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f831, %f830, %f5, %f826;
	ld.param.f32 	%f832, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+4];
	ld.param.f32 	%f833, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+8];
	fma.rn.ftz.f32 	%f834, %f471, %f833, %f832;
	ld.param.f32 	%f835, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+16];
	mul.ftz.f32 	%f836, %f829, %f835;
	ld.param.f32 	%f837, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+12];
	mul.ftz.f32 	%f838, %f829, %f837;
	mul.ftz.f32 	%f839, %f831, %f837;
	sub.ftz.f32 	%f840, %f839, %f836;
	fma.rn.ftz.f32 	%f841, %f831, %f835, %f838;
	ld.const.f32 	%f842, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f843, %f842, %f840;
	ld.const.f32 	%f844, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f845, %f844, %f834, %f843;
	ld.const.f32 	%f846, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f847, %f846, %f841, %f845;
	.loc	22	268	0
	ld.const.f32 	%f848, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f849, %f848, %f840;
	ld.const.f32 	%f850, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f851, %f850, %f834, %f849;
	ld.const.f32 	%f852, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f853, %f852, %f841, %f851;
	.loc	35	56	0
	ld.const.f32 	%f854, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f855, %f854, %f840;
	ld.const.f32 	%f856, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f857, %f856, %f834, %f855;
	ld.const.f32 	%f858, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f5, %f858, %f841, %f857;
	mov.f32 	%f6, %f847;
	mov.f32 	%f7, %f853;
	bra.uni 	$Lt_48_514;
$Lt_48_4354:
	.loc	36	46	0
	ld.const.f32 	%f859, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f859, %f6;
	ld.const.f32 	%f860, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f860, %f7, %f467;
	ld.const.f32 	%f861, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f861, %f5, %f469;
	ld.param.f32 	%f481, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+8];
	ld.param.f32 	%f862, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+20];
	sub.ftz.f32 	%f863, %f862, %f481;
	fma.rn.ftz.f32 	%f864, %f471, %f863, %f481;
	.loc	36	47	0
	ld.param.f32 	%f865, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+12];
	ld.param.f32 	%f866, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+24];
	sub.ftz.f32 	%f867, %f866, %f865;
	fma.rn.ftz.f32 	%f868, %f471, %f867, %f865;
	.loc	36	49	0
	ld.param.f32 	%f869, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+28];
	ld.param.f32 	%f456, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+4];
	ld.param.f32 	%f870, [__cudaparm_PointwiseFilterHostKernel2___val_paraminFilter1+16];
	sub.ftz.f32 	%f871, %f870, %f456;
	fma.rn.ftz.f32 	%f872, %f471, %f871, %f456;
	sub.ftz.f32 	%f873, %f872, %f5;
	fma.rn.ftz.f32 	%f5, %f869, %f873, %f5;
	.loc	36	50	0
	sub.ftz.f32 	%f874, %f864, %f6;
	fma.rn.ftz.f32 	%f6, %f869, %f874, %f6;
	.loc	36	51	0
	sub.ftz.f32 	%f875, %f868, %f7;
	fma.rn.ftz.f32 	%f7, %f869, %f875, %f7;
	.loc	38	103	0
	bra.uni 	$Lt_48_514;
$Lt_48_4610:
	.loc	37	41	0
	sub.s32 	%r581, %r13, %r12;
	sub.s32 	%r12, %r581, 1;
$Lt_48_514:
	.loc	38	155	0
	ld.param.s32 	%r582, [__cudaparm_PointwiseFilterHostKernel2_inDestPitch];
	mul.lo.s32 	%r583, %r582, %r12;
	add.s32 	%r584, %r6, %r583;
	cvt.s64.s32 	%rd8, %r584;
	ld.param.u64 	%rd9, [__cudaparm_PointwiseFilterHostKernel2_inDestImage];
	@!%p2 bra 	$Lt_48_128514;
	.loc	20	126	0
	mul.lo.u64 	%rd10, %rd8, 8;
	add.u64 	%rd11, %rd9, %rd10;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f5;
	mov.b32		%r585, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r586, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f7;
	mov.b32		%r587, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f9;
	mov.b32		%r588, %b1; }
	st.global.v4.u16 	[%rd11+0], {%r585,%r586,%r587,%r588};
	.loc	38	157	0
	bra.uni 	$Lt_48_128258;
$Lt_48_128514:
	.loc	20	126	0
	mul.lo.u64 	%rd12, %rd8, 16;
	add.u64 	%rd13, %rd9, %rd12;
	st.global.v4.f32 	[%rd13+0], {%f5,%f6,%f7,%f9};
$Lt_48_128258:
$Lt_48_105730:
	.loc	38	159	0
	exit;
$LDWend_PointwiseFilterHostKernel2:
	} // PointwiseFilterHostKernel2

	.entry PointwiseFilterHostKernel3 (
		.param .u64 __cudaparm_PointwiseFilterHostKernel3_inSrcImage,
		.param .s32 __cudaparm_PointwiseFilterHostKernel3_inSrcPitch,
		.param .u64 __cudaparm_PointwiseFilterHostKernel3_inDestImage,
		.param .s32 __cudaparm_PointwiseFilterHostKernel3_inDestPitch,
		.param .u32 __cudaparm_PointwiseFilterHostKernel3_inDeviceFormat,
		.param .s32 __cudaparm_PointwiseFilterHostKernel3_inWidth,
		.param .s32 __cudaparm_PointwiseFilterHostKernel3_inHeight,
		.param .align 4 .b8 __cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0[48],
		.param .align 4 .b8 __cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1[48],
		.param .align 4 .b8 __cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2[48])
	{
	.reg .u32 %r<759>;
	.reg .u64 %rd<15>;
	.reg .f32 %f<1282>;
	.reg .pred %p<175>;
	.loc	38	170	0
$LDWbegin_PointwiseFilterHostKernel3:
	.loc	38	172	0
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	mov.u32 	%r4, %tid.x;
	add.u32 	%r5, %r3, %r4;
	mov.s32 	%r6, %r5;
	.loc	38	173	0
	cvt.s32.u32 	%r7, %ctaid.y;
	cvt.s32.u32 	%r8, %ntid.y;
	mul.lo.s32 	%r9, %r7, %r8;
	mov.u32 	%r10, %tid.y;
	add.u32 	%r11, %r9, %r10;
	mov.s32 	%r12, %r11;
	ld.param.s32 	%r13, [__cudaparm_PointwiseFilterHostKernel3_inHeight];
	ld.param.s32 	%r14, [__cudaparm_PointwiseFilterHostKernel3_inWidth];
	set.gt.u32.s32 	%r15, %r13, %r11;
	neg.s32 	%r16, %r15;
	set.gt.u32.s32 	%r17, %r14, %r5;
	neg.s32 	%r18, %r17;
	and.b32 	%r19, %r16, %r18;
	mov.u32 	%r20, 0;
	setp.eq.s32 	%p1, %r19, %r20;
	@%p1 bra 	$Lt_49_190466;
	ld.param.s32 	%r21, [__cudaparm_PointwiseFilterHostKernel3_inDeviceFormat];
	mov.s32 	%r22, 0;
	setp.eq.s32 	%p2, %r21, %r22;
	ld.param.u64 	%rd1, [__cudaparm_PointwiseFilterHostKernel3_inSrcImage];
	ld.param.s32 	%r23, [__cudaparm_PointwiseFilterHostKernel3_inSrcPitch];
	@!%p2 bra 	$Lt_49_157954;
	.loc	20	115	0
	mul.lo.s32 	%r24, %r23, %r11;
	add.s32 	%r25, %r5, %r24;
	cvt.s64.s32 	%rd2, %r25;
	mul.wide.s32 	%rd3, %r25, 8;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.v4.u16 	{%r26,%r27,%r28,%r29}, [%rd4+0];
	.loc	38	177	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r28;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r29;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_49_157698;
$Lt_49_157954:
	mul.lo.s32 	%r30, %r23, %r11;
	add.s32 	%r31, %r5, %r30;
	cvt.s64.s32 	%rd5, %r31;
	mul.wide.s32 	%rd6, %r31, 16;
	add.u64 	%rd7, %rd1, %rd6;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd7+0];
$Lt_49_157698:
	mov.f32 	%f5, %f1;
	mov.f32 	%f6, %f2;
	mov.f32 	%f7, %f3;
	mov.f32 	%f8, %f4;
	mov.f32 	%f9, %f8;
	.loc	38	54	0
	ld.param.u32 	%r32, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+0];
	mov.u32 	%r33, 0;
	setp.eq.s32 	%p3, %r32, %r33;
	@%p3 bra 	$Lt_49_10498;
	mov.u32 	%r34, 1;
	setp.eq.s32 	%p4, %r32, %r34;
	@%p4 bra 	$Lt_49_11010;
	mov.u32 	%r35, 2;
	setp.eq.s32 	%p5, %r32, %r35;
	@%p5 bra 	$Lt_49_11266;
	mov.u32 	%r36, 3;
	setp.eq.s32 	%p6, %r32, %r36;
	@%p6 bra 	$Lt_49_11522;
	mov.u32 	%r37, 4;
	setp.eq.s32 	%p7, %r32, %r37;
	@%p7 bra 	$Lt_49_11778;
	mov.u32 	%r38, 5;
	setp.eq.s32 	%p8, %r32, %r38;
	@%p8 bra 	$Lt_49_12034;
	mov.u32 	%r39, 6;
	setp.eq.s32 	%p9, %r32, %r39;
	@%p9 bra 	$Lt_49_12290;
	mov.u32 	%r40, 7;
	setp.eq.s32 	%p10, %r32, %r40;
	@%p10 bra 	$Lt_49_12546;
	mov.u32 	%r41, 8;
	setp.eq.s32 	%p11, %r32, %r41;
	@%p11 bra 	$Lt_49_12802;
	mov.u32 	%r42, 9;
	setp.eq.s32 	%p12, %r32, %r42;
	@%p12 bra 	$Lt_49_13058;
	mov.u32 	%r43, 10;
	setp.eq.s32 	%p13, %r32, %r43;
	@%p13 bra 	$Lt_49_13314;
	mov.u32 	%r44, 11;
	setp.eq.s32 	%p14, %r32, %r44;
	@%p14 bra 	$Lt_49_13570;
	mov.u32 	%r45, 12;
	setp.eq.s32 	%p15, %r32, %r45;
	@%p15 bra 	$Lt_49_13826;
	mov.u32 	%r46, 13;
	setp.eq.s32 	%p16, %r32, %r46;
	@%p16 bra 	$Lt_49_14082;
	mov.u32 	%r47, 14;
	setp.eq.s32 	%p17, %r32, %r47;
	@%p17 bra 	$Lt_49_14338;
	mov.u32 	%r48, 15;
	setp.eq.s32 	%p18, %r32, %r48;
	@%p18 bra 	$Lt_49_14594;
	mov.u32 	%r49, 16;
	setp.eq.s32 	%p19, %r32, %r49;
	@%p19 bra 	$Lt_49_14850;
	bra.uni 	$Lt_49_10754;
$Lt_49_10498:
	.loc	38	57	0
	ld.param.f32 	%f10, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+8];
	mov.f32 	%f11, 0f3f800000;    	// 1
	setp.eq.ftz.f32 	%p20, %f10, %f11;
	ld.param.f32 	%f12, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+12];
	mov.f32 	%f13, 0f3f800000;    	// 1
	setp.eq.ftz.f32 	%p21, %f12, %f13;
	ld.param.f32 	%f14, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+4];
	ld.param.f32 	%f15, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+16];
	mov.f32 	%f16, 0f3f800000;    	// 1
	setp.eq.ftz.f32 	%p22, %f15, %f16;
	@!%p22 bra 	$Lt_49_158466;
	.loc	21	53	0
	cvt.ftz.sat.f32.f32 	%f17, %f8;
	mov.f32 	%f18, %f14;
	mul.ftz.f32 	%f19, %f17, %f18;
	selp.f32 	%f20, %f18, %f19, %p20;
	mov.f32 	%f21, 0f3f800000;    	// 1
	sub.ftz.f32 	%f22, %f21, %f20;
	selp.f32 	%f23, %f22, %f20, %p21;
	mov.f32 	%f9, %f23;
	.loc	21	57	0
	mov.f32 	%f5, %f23;
	mov.f32 	%f6, %f23;
	mov.f32 	%f7, %f23;
	bra.uni 	$Lt_49_10754;
$Lt_49_158466:
	@!%p20 bra 	$Lt_49_158978;
	.loc	21	61	0
	mov.f32 	%f18, %f14;
	mov.f32 	%f24, 0f3f800000;    	// 1
	sub.ftz.f32 	%f25, %f24, %f18;
	selp.f32 	%f9, %f25, %f18, %p21;
	bra.uni 	$Lt_49_10754;
$Lt_49_158978:
	.loc	21	73	0
	cvt.ftz.sat.f32.f32 	%f26, %f8;
	mov.f32 	%f27, 0f3f800000;    	// 1
	sub.ftz.f32 	%f28, %f27, %f26;
	selp.f32 	%f29, %f28, %f26, %p21;
	mul.ftz.f32 	%f9, %f29, %f14;
	bra.uni 	$Lt_49_10754;
$Lt_49_11010:
	.loc	22	267	0
	ld.const.f32 	%f30, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f30, %f6;
	ld.const.f32 	%f32, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f32, %f7, %f31;
	ld.const.f32 	%f34, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f34, %f5, %f33;
	ld.const.f32 	%f36, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f37, %f36, %f35;
	.loc	22	268	0
	ld.const.f32 	%f38, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f39, %f38, %f35;
	.loc	23	44	0
	ld.const.f32 	%f40, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f40, %f35;
	mov.f32 	%f6, %f37;
	mov.f32 	%f7, %f39;
	mov.f32 	%f9, %f8;
	.loc	38	61	0
	bra.uni 	$Lt_49_10754;
$Lt_49_11266:
	.loc	38	63	0
	ld.param.f32 	%f41, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+20];
	ld.param.f32 	%f42, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+12];
	ld.param.f32 	%f43, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+4];
	mov.f32 	%f44, 0f00000000;    	// 0
	setp.neu.ftz.f32 	%p23, %f43, %f44;
	@!%p23 bra 	$Lt_49_159490;
	.loc	24	44	0
	ld.param.f32 	%f45, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+8];
	sub.ftz.f32 	%f46, %f7, %f42;
	fma.rn.ftz.f32 	%f7, %f45, %f46, %f41;
	.loc	24	45	0
	sub.ftz.f32 	%f47, %f6, %f42;
	fma.rn.ftz.f32 	%f6, %f45, %f47, %f41;
	.loc	24	46	0
	sub.ftz.f32 	%f48, %f5, %f42;
	fma.rn.ftz.f32 	%f5, %f45, %f48, %f41;
	bra.uni 	$Lt_49_10754;
$Lt_49_159490:
	.loc	24	50	0
	ld.param.f32 	%f49, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+24];
	setp.gt.ftz.f32 	%p24, %f7, %f42;
	selp.f32 	%f7, %f49, %f41, %p24;
	.loc	24	51	0
	setp.gt.ftz.f32 	%p25, %f6, %f42;
	selp.f32 	%f6, %f49, %f41, %p25;
	.loc	24	52	0
	setp.gt.ftz.f32 	%p26, %f5, %f42;
	selp.f32 	%f5, %f49, %f41, %p26;
	bra.uni 	$Lt_49_10754;
$Lt_49_11522:
	.loc	25	47	0
	ld.param.f32 	%f50, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+4];
	mul.ftz.f32 	%f5, %f50, %f5;
	ld.param.f32 	%f51, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+8];
	mul.ftz.f32 	%f6, %f51, %f6;
	ld.param.f32 	%f52, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+12];
	mul.ftz.f32 	%f7, %f52, %f7;
	.loc	38	67	0
	bra.uni 	$Lt_49_10754;
$Lt_49_11778:
	.loc	26	48	0
	ld.param.f32 	%f53, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+4];
	mov.f32 	%f54, 0f00000000;    	// 0
	setp.eq.ftz.f32 	%p27, %f53, %f54;
	ld.param.f32 	%f55, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+8];
	mov.f32 	%f56, 0f00000000;    	// 0
	max.ftz.f32 	%f57, %f5, %f56;
	mov.f32 	%f58, 0f3f800000;    	// 1
	min.ftz.f32 	%f59, %f57, %f58;
	ld.param.f32 	%f60, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+12];
	sub.ftz.f32 	%f61, %f59, %f60;
	abs.ftz.f32 	%f62, %f61;
	mov.f32 	%f63, 0f00000000;    	// 0
	max.ftz.f32 	%f64, %f6, %f63;
	mov.f32 	%f65, 0f3f800000;    	// 1
	min.ftz.f32 	%f66, %f64, %f65;
	ld.param.f32 	%f67, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+16];
	sub.ftz.f32 	%f68, %f66, %f67;
	abs.ftz.f32 	%f69, %f68;
	mov.f32 	%f70, 0f00000000;    	// 0
	max.ftz.f32 	%f71, %f8, %f70;
	mov.f32 	%f72, 0f3f800000;    	// 1
	min.ftz.f32 	%f73, %f71, %f72;
	sub.ftz.f32 	%f74, %f73, %f8;
	abs.ftz.f32 	%f75, %f74;
	mov.f32 	%f76, 0f00000000;    	// 0
	max.ftz.f32 	%f77, %f7, %f76;
	mov.f32 	%f78, 0f3f800000;    	// 1
	min.ftz.f32 	%f79, %f77, %f78;
	ld.param.f32 	%f80, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+20];
	sub.ftz.f32 	%f81, %f79, %f80;
	abs.ftz.f32 	%f82, %f81;
	max.ftz.f32 	%f83, %f75, %f82;
	max.ftz.f32 	%f84, %f69, %f83;
	max.ftz.f32 	%f85, %f62, %f84;
	setp.ge.ftz.f32 	%p28, %f55, %f85;
	xor.pred 	%p29, %p27, %p28;
	@!%p29 bra 	$Lt_49_10754;
	.loc	22	267	0
	ld.const.f32 	%f86, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f86, %f6;
	ld.const.f32 	%f87, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f87, %f7, %f31;
	ld.const.f32 	%f88, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f88, %f5, %f33;
	ld.const.f32 	%f89, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f90, %f89, %f35;
	.loc	22	268	0
	ld.const.f32 	%f91, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f92, %f91, %f35;
	.loc	23	44	0
	ld.const.f32 	%f93, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f93, %f35;
	mov.f32 	%f6, %f90;
	mov.f32 	%f7, %f92;
	bra.uni 	$Lt_49_10754;
$Lt_49_12034:
	.loc	27	48	0
	ld.param.f32 	%f94, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+8];
	mov.f32 	%f95, 0f00000000;    	// 0
	max.ftz.f32 	%f96, %f5, %f95;
	mov.f32 	%f97, 0f3f800000;    	// 1
	min.ftz.f32 	%f98, %f96, %f97;
	ld.param.f32 	%f99, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+12];
	sub.ftz.f32 	%f100, %f98, %f99;
	abs.ftz.f32 	%f101, %f100;
	mov.f32 	%f102, 0f00000000;   	// 0
	max.ftz.f32 	%f103, %f6, %f102;
	mov.f32 	%f104, 0f3f800000;   	// 1
	min.ftz.f32 	%f105, %f103, %f104;
	ld.param.f32 	%f106, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+16];
	sub.ftz.f32 	%f107, %f105, %f106;
	abs.ftz.f32 	%f108, %f107;
	mov.f32 	%f109, 0f00000000;   	// 0
	max.ftz.f32 	%f110, %f8, %f109;
	mov.f32 	%f111, 0f3f800000;   	// 1
	min.ftz.f32 	%f112, %f110, %f111;
	sub.ftz.f32 	%f113, %f112, %f8;
	abs.ftz.f32 	%f114, %f113;
	mov.f32 	%f115, 0f00000000;   	// 0
	max.ftz.f32 	%f116, %f7, %f115;
	mov.f32 	%f117, 0f3f800000;   	// 1
	min.ftz.f32 	%f118, %f116, %f117;
	ld.param.f32 	%f119, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+20];
	sub.ftz.f32 	%f120, %f118, %f119;
	abs.ftz.f32 	%f121, %f120;
	max.ftz.f32 	%f122, %f114, %f121;
	max.ftz.f32 	%f123, %f108, %f122;
	max.ftz.f32 	%f124, %f101, %f123;
	setp.ge.ftz.f32 	%p30, %f94, %f124;
	@!%p30 bra 	$Lt_49_10754;
	.loc	27	51	0
	ld.param.f32 	%f125, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+24];
	mov.f32 	%f126, %f125;
	ld.param.f32 	%f127, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+28];
	mov.f32 	%f128, %f127;
	ld.param.f32 	%f129, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+32];
	mov.f32 	%f130, %f129;
	ld.param.f32 	%f131, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+4];
	mov.f32 	%f132, 0f00000000;   	// 0
	setp.eq.ftz.f32 	%p31, %f131, %f132;
	@!%p31 bra 	$Lt_49_160770;
	.loc	27	60	0
	ld.const.f32 	%f133, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f133, %f6;
	ld.const.f32 	%f134, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f134, %f7, %f31;
	ld.const.f32 	%f135, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f135, %f5, %f33;
	mul.ftz.f32 	%f126, %f35, %f125;
	.loc	27	61	0
	mul.ftz.f32 	%f128, %f35, %f127;
	.loc	27	62	0
	mul.ftz.f32 	%f130, %f35, %f129;
$Lt_49_160770:
	.loc	27	65	0
	mov.f32 	%f5, %f126;
	mov.f32 	%f6, %f128;
	mov.f32 	%f7, %f130;
	bra.uni 	$Lt_49_10754;
$Lt_49_12290:
	.loc	28	47	0
	sub.s32 	%r50, %r13, %r11;
	sub.s32 	%r51, %r14, %r5;
	cvt.rn.f32.s32 	%f136, %r5;
	cvt.rn.f32.s32 	%f137, %r11;
	cvt.rn.f32.s32 	%f138, %r50;
	cvt.rn.f32.s32 	%f139, %r51;
	ld.param.f32 	%f18, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+4];
	sub.ftz.f32 	%f140, %f18, %f136;
	sub.ftz.f32 	%f141, %f18, %f137;
	sub.ftz.f32 	%f142, %f18, %f138;
	sub.ftz.f32 	%f143, %f18, %f139;
	cvt.rzi.ftz.s32.f32 	%r52, %f140;
	cvt.rzi.ftz.s32.f32 	%r53, %f141;
	cvt.rzi.ftz.s32.f32 	%r54, %f142;
	cvt.rzi.ftz.s32.f32 	%r55, %f143;
	max.s32 	%r56, %r52, %r53;
	max.s32 	%r57, %r55, %r56;
	max.s32 	%r58, %r54, %r57;
	mov.u32 	%r59, 0;
	setp.le.s32 	%p32, %r58, %r59;
	@%p32 bra 	$Lt_49_10754;
	.loc	28	51	0
	cvt.rn.f32.s32 	%f144, %r58;
	sub.ftz.f32 	%f145, %f18, %f144;
	div.approx.ftz.f32 	%f146, %f145, %f18;
	mul.ftz.f32 	%f9, %f8, %f146;
	bra.uni 	$Lt_49_10754;
$Lt_49_12546:
	.loc	30	50	0
	ld.const.f32 	%f147, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f147, %f6;
	ld.param.f32 	%f148, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+12];
	ld.param.f32 	%f45, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+8];
	sub.ftz.f32 	%f149, %f148, %f45;
	ld.const.f32 	%f150, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f150, %f7, %f31;
	ld.param.f32 	%f151, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+16];
	mul.ftz.f32 	%f152, %f151, %f149;
	ld.const.f32 	%f153, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f153, %f5, %f33;
	add.ftz.f32 	%f154, %f152, %f148;
	sub.ftz.f32 	%f155, %f45, %f152;
	mov.f32 	%f156, 0f00000000;   	// 0
	max.ftz.f32 	%f157, %f154, %f156;
	mov.f32 	%f158, 0f00000000;   	// 0
	max.ftz.f32 	%f159, %f155, %f158;
	mov.f32 	%f160, 0f3f800000;   	// 1
	min.ftz.f32 	%f161, %f157, %f160;
	mov.f32 	%f162, 0f3f800000;   	// 1
	min.ftz.f32 	%f163, %f159, %f162;
	set.gt.ftz.u32.f32 	%r60, %f163, %f35;
	neg.s32 	%r61, %r60;
	set.le.ftz.u32.f32 	%r62, %f161, %f35;
	neg.s32 	%r63, %r62;
	or.b32 	%r64, %r61, %r63;
	mov.u32 	%r65, 0;
	setp.eq.s32 	%p33, %r64, %r65;
	@%p33 bra 	$Lt_49_162050;
	mov.f32 	%f164, 0f00000000;   	// 0
	bra.uni 	$Lt_49_162818;
$Lt_49_162050:
	add.ftz.f32 	%f165, %f152, %f45;
	mov.f32 	%f166, 0f00000000;   	// 0
	max.ftz.f32 	%f167, %f165, %f166;
	mov.f32 	%f168, 0f3f800000;   	// 1
	min.ftz.f32 	%f169, %f167, %f168;
	set.le.ftz.u32.f32 	%r66, %f169, %f35;
	neg.s32 	%r67, %r66;
	sub.ftz.f32 	%f170, %f148, %f152;
	mov.f32 	%f171, 0f00000000;   	// 0
	max.ftz.f32 	%f172, %f170, %f171;
	mov.f32 	%f173, 0f3f800000;   	// 1
	min.ftz.f32 	%f174, %f172, %f173;
	set.lt.ftz.u32.f32 	%r68, %f35, %f174;
	neg.s32 	%r69, %r68;
	and.b32 	%r70, %r67, %r69;
	mov.u32 	%r71, 0;
	setp.eq.s32 	%p34, %r70, %r71;
	@%p34 bra 	$Lt_49_162562;
	mov.f32 	%f164, 0f3f800000;   	// 1
	bra.uni 	$Lt_49_162818;
$Lt_49_162562:
	add.ftz.f32 	%f175, %f152, %f152;
	setp.gt.ftz.f32 	%p35, %f169, %f35;
	@!%p35 bra 	$Lt_49_163074;
	.loc	30	62	0
	sub.ftz.f32 	%f176, %f35, %f163;
	div.approx.ftz.f32 	%f164, %f176, %f175;
	bra.uni 	$Lt_49_162818;
$Lt_49_163074:
	.loc	30	66	0
	sub.ftz.f32 	%f177, %f161, %f35;
	div.approx.ftz.f32 	%f164, %f177, %f175;
$Lt_49_162818:
$Lt_49_162306:
$Lt_49_161794:
	.loc	30	69	0
	mov.f32 	%f178, 0f3f800000;   	// 1
	sub.ftz.f32 	%f179, %f178, %f164;
	ld.param.f32 	%f180, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+4];
	mov.f32 	%f181, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p36, %f180, %f181;
	selp.f32 	%f164, %f179, %f164, %p36;
	.loc	30	77	0
	ld.const.f32 	%f182, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f182, %f164;
	ld.const.f32 	%f183, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f6, %f183, %f164;
	ld.const.f32 	%f184, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f7, %f184, %f164;
	.loc	38	79	0
	bra.uni 	$Lt_49_10754;
$Lt_49_12802:
	.loc	38	80	0
	ld.param.f32 	%f185, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+44];
	mov.f32 	%f186, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p37, %f185, %f186;
	@!%p37 bra 	$L_49_150786;
	cvt.rn.f32.s32 	%f187, %r5;
	cvt.rn.f32.s32 	%f188, %r14;
	mul.ftz.f32 	%f189, %f188, %f185;
	setp.lt.ftz.f32 	%p38, %f187, %f189;
	@%p38 bra 	$L_49_150530;
$L_49_150786:
	mov.f32 	%f190, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p39, %f185, %f190;
	@!%p39 bra 	$Lt_49_10754;
	cvt.rn.f32.s32 	%f191, %r11;
	cvt.rn.f32.s32 	%f192, %r13;
	mul.ftz.f32 	%f193, %f192, %f185;
	neg.ftz.f32 	%f194, %f193;
	setp.lt.ftz.f32 	%p40, %f191, %f194;
	@!%p40 bra 	$Lt_49_10754;
$L_49_150530:
	.loc	31	47	0
	ld.const.f32 	%f195, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f196, %f195, %f6;
	ld.const.f32 	%f197, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f198, %f197, %f7, %f196;
	ld.const.f32 	%f199, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f199, %f5, %f198;
	mov.f32 	%f200, %f35;
	ld.param.f32 	%f45, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+8];
	ld.param.f32 	%f201, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+4];
	sub.ftz.f32 	%f202, %f201, %f45;
	ld.param.f32 	%f203, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+16];
	ld.param.f32 	%f204, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+12];
	sub.ftz.f32 	%f205, %f204, %f203;
	mov.f32 	%f206, 0f3f800000;   	// 1
	set.neu.ftz.u32.f32 	%r72, %f205, %f206;
	neg.s32 	%r73, %r72;
	mov.f32 	%f207, 0f3f800000;   	// 1
	set.neu.ftz.u32.f32 	%r74, %f202, %f207;
	neg.s32 	%r75, %r74;
	or.b32 	%r76, %r73, %r75;
	mov.u32 	%r77, 0;
	setp.eq.s32 	%p41, %r76, %r77;
	@%p41 bra 	$Lt_49_163330;
	.loc	20	143	0
	mov.s32 	%r78, 1;
	sub.s32 	%r79, %r78, %r5;
	shr.u32 	%r80, %r11, 13;
	sub.u32 	%r81, %r5, %r11;
	sub.u32 	%r82, %r79, %r11;
	xor.b32 	%r83, %r82, %r80;
	shl.b32 	%r84, %r83, 8;
	sub.u32 	%r85, %r81, %r83;
	sub.u32 	%r86, %r11, %r83;
	xor.b32 	%r87, %r85, %r84;
	shr.u32 	%r88, %r87, 13;
	sub.u32 	%r89, %r86, %r87;
	sub.u32 	%r90, %r83, %r87;
	xor.b32 	%r91, %r89, %r88;
	shr.u32 	%r92, %r91, 12;
	sub.u32 	%r93, %r90, %r91;
	xor.b32 	%r94, %r93, %r92;
	sub.u32 	%r95, %r87, %r91;
	sub.u32 	%r96, %r95, %r94;
	shl.b32 	%r97, %r94, 16;
	xor.b32 	%r98, %r96, %r97;
	.loc	20	144	0
	sub.u32 	%r99, %r91, %r94;
	sub.u32 	%r100, %r99, %r98;
	shr.u32 	%r101, %r98, 5;
	xor.b32 	%r102, %r100, %r101;
	.loc	20	145	0
	sub.u32 	%r103, %r94, %r98;
	sub.u32 	%r104, %r103, %r102;
	shr.u32 	%r105, %r102, 3;
	xor.b32 	%r106, %r104, %r105;
	.loc	20	146	0
	sub.u32 	%r107, %r98, %r102;
	sub.u32 	%r108, %r107, %r106;
	shl.b32 	%r109, %r106, 10;
	xor.b32 	%r110, %r108, %r109;
	.loc	20	147	0
	sub.u32 	%r111, %r102, %r106;
	sub.u32 	%r112, %r111, %r110;
	shr.u32 	%r113, %r110, 15;
	xor.b32 	%r114, %r112, %r113;
	.loc	31	57	0
	mov.f32 	%f208, 0f3b270d73;   	// 0.00254902
	mul.lo.u32 	%r115, %r114, 1103515245;
	add.u32 	%r116, %r115, 12345;
	shr.u32 	%r117, %r116, 16;
	and.b32 	%r118, %r117, 255;
	shl.b32 	%r119, %r118, 7;
	mul.lo.u32 	%r120, %r114, -1029531031;
	sub.u32 	%r121, %r120, 740551042;
	shr.u32 	%r122, %r121, 16;
	and.b32 	%r123, %r122, 255;
	xor.b32 	%r124, %r119, %r123;
	cvt.rn.f32.s32 	%f209, %r124;
	mov.f32 	%f210, 0f467ffe00;   	// 16383.5
	div.approx.ftz.f32 	%f211, %f209, %f210;
	mov.f32 	%f212, 0fbf800000;   	// -1
	add.ftz.f32 	%f213, %f211, %f212;
	fma.rn.ftz.f32 	%f200, %f208, %f213, %f35;
$Lt_49_163330:
	sub.ftz.f32 	%f214, %f200, %f45;
	ld.param.f32 	%f215, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+20];
	mov.f32 	%f216, 0f3f800000;   	// 1
	setp.neu.ftz.f32 	%p42, %f215, %f216;
	@!%p42 bra 	$Lt_49_164098;
	mov.f32 	%f217, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p43, %f214, %f217;
	@!%p43 bra 	$Lt_49_164610;
	.loc	31	66	0
	mov.f32 	%f200, %f203;
	bra.uni 	$Lt_49_163842;
$Lt_49_164610:
	.loc	31	70	0
	rcp.approx.ftz.f32 	%f218, %f202;
	mul.ftz.f32 	%f219, %f214, %f218;
	lg2.approx.ftz.f32 	%f220, %f219;
	mul.ftz.f32 	%f221, %f215, %f220;
	ex2.approx.ftz.f32 	%f222, %f221;
	fma.rn.ftz.f32 	%f200, %f205, %f222, %f203;
	bra.uni 	$Lt_49_163842;
$Lt_49_164098:
	.loc	31	77	0
	rcp.approx.ftz.f32 	%f223, %f202;
	mul.ftz.f32 	%f224, %f214, %f223;
	fma.rn.ftz.f32 	%f200, %f205, %f224, %f203;
$Lt_49_163842:
	.loc	22	267	0
	ld.const.f32 	%f225, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f226, %f225, %f6;
	ld.const.f32 	%f227, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f228, %f227, %f6;
	ld.param.f32 	%f229, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+28];
	sin.approx.ftz.f32 	%f230, %f229;
	cos.approx.ftz.f32 	%f231, %f229;
	ld.const.f32 	%f232, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f233, %f232, %f7, %f226;
	ld.const.f32 	%f234, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f235, %f234, %f7, %f228;
	ld.const.f32 	%f236, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f237, %f236, %f5, %f233;
	ld.const.f32 	%f238, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f239, %f238, %f5, %f235;
	mul.ftz.f32 	%f240, %f230, %f237;
	mul.ftz.f32 	%f241, %f231, %f237;
	mul.ftz.f32 	%f242, %f239, %f231;
	sub.ftz.f32 	%f243, %f242, %f240;
	fma.rn.ftz.f32 	%f244, %f239, %f230, %f241;
	ld.param.f32 	%f245, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+32];
	sub.ftz.f32 	%f246, %f245, %f243;
	ld.param.f32 	%f247, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+36];
	sub.ftz.f32 	%f248, %f247, %f244;
	ld.param.f32 	%f249, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+40];
	fma.rn.ftz.f32 	%f250, %f249, %f246, %f243;
	fma.rn.ftz.f32 	%f251, %f249, %f248, %f244;
	ld.param.f32 	%f252, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+24];
	mul.ftz.f32 	%f253, %f250, %f252;
	mul.ftz.f32 	%f254, %f251, %f252;
	ld.const.f32 	%f255, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f256, %f255, %f253;
	ld.const.f32 	%f257, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f258, %f257, %f200, %f256;
	ld.const.f32 	%f259, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f260, %f259, %f254, %f258;
	.loc	22	268	0
	ld.const.f32 	%f261, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f262, %f261, %f253;
	ld.const.f32 	%f263, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f264, %f263, %f200, %f262;
	ld.const.f32 	%f265, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f266, %f265, %f254, %f264;
	.loc	31	92	0
	ld.const.f32 	%f267, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f268, %f267, %f253;
	ld.const.f32 	%f269, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f270, %f269, %f200, %f268;
	ld.const.f32 	%f271, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f5, %f271, %f254, %f270;
	mov.f32 	%f6, %f260;
	mov.f32 	%f7, %f266;
	bra.uni 	$Lt_49_10754;
$Lt_49_13058:
	.loc	38	83	0
	ld.param.f32 	%f272, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+44];
	mov.f32 	%f273, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p44, %f272, %f273;
	@!%p44 bra 	$L_49_151554;
	cvt.rn.f32.s32 	%f274, %r5;
	cvt.rn.f32.s32 	%f275, %r14;
	mul.ftz.f32 	%f276, %f275, %f272;
	setp.lt.ftz.f32 	%p45, %f274, %f276;
	@%p45 bra 	$L_49_151298;
$L_49_151554:
	mov.f32 	%f277, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p46, %f272, %f277;
	@!%p46 bra 	$Lt_49_10754;
	cvt.rn.f32.s32 	%f278, %r11;
	cvt.rn.f32.s32 	%f279, %r13;
	mul.ftz.f32 	%f280, %f279, %f272;
	neg.ftz.f32 	%f281, %f280;
	setp.lt.ftz.f32 	%p47, %f278, %f281;
	@!%p47 bra 	$Lt_49_10754;
$L_49_151298:
	.loc	31	110	0
	ld.const.f32 	%f282, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f282, %f6;
	ld.param.f32 	%f45, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+8];
	ld.param.f32 	%f283, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+4];
	sub.ftz.f32 	%f284, %f283, %f45;
	ld.param.f32 	%f285, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+16];
	ld.param.f32 	%f286, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+12];
	sub.ftz.f32 	%f287, %f286, %f285;
	ld.const.f32 	%f288, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f288, %f7, %f31;
	ld.const.f32 	%f289, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f289, %f5, %f33;
	sub.ftz.f32 	%f290, %f35, %f45;
	ld.param.f32 	%f291, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+20];
	mov.f32 	%f292, 0f3f800000;   	// 1
	setp.neu.ftz.f32 	%p48, %f291, %f292;
	@!%p48 bra 	$Lt_49_165122;
	.loc	42	523	0
	mov.f32 	%f293, 0f00000000;   	// 0
	max.ftz.f32 	%f294, %f290, %f293;
	div.approx.ftz.f32 	%f295, %f294, %f284;
	lg2.approx.ftz.f32 	%f296, %f295;
	mul.ftz.f32 	%f297, %f291, %f296;
	ex2.approx.ftz.f32 	%f298, %f297;
	.loc	31	120	0
	fma.rn.ftz.f32 	%f299, %f287, %f298, %f285;
	bra.uni 	$Lt_49_164866;
$Lt_49_165122:
	.loc	31	129	0
	div.approx.ftz.f32 	%f300, %f290, %f284;
	fma.rn.ftz.f32 	%f299, %f287, %f300, %f285;
$Lt_49_164866:
	.loc	31	135	0
	ld.const.f32 	%f301, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f301, %f299;
	ld.const.f32 	%f302, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f6, %f302, %f299;
	ld.const.f32 	%f303, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f7, %f303, %f299;
	bra.uni 	$Lt_49_10754;
$Lt_49_13314:
	.loc	38	86	0
	ld.param.f32 	%f304, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+44];
	mov.f32 	%f305, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p49, %f304, %f305;
	@!%p49 bra 	$L_49_152322;
	cvt.rn.f32.s32 	%f306, %r5;
	cvt.rn.f32.s32 	%f307, %r14;
	mul.ftz.f32 	%f308, %f307, %f304;
	setp.lt.ftz.f32 	%p50, %f306, %f308;
	@%p50 bra 	$L_49_152066;
$L_49_152322:
	mov.f32 	%f309, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p51, %f304, %f309;
	@!%p51 bra 	$Lt_49_10754;
	cvt.rn.f32.s32 	%f310, %r11;
	cvt.rn.f32.s32 	%f311, %r13;
	mul.ftz.f32 	%f312, %f311, %f304;
	neg.ftz.f32 	%f313, %f312;
	setp.lt.ftz.f32 	%p52, %f310, %f313;
	@!%p52 bra 	$Lt_49_10754;
$L_49_152066:
	.loc	31	160	0
	ld.const.f32 	%f5, [k601YPbPr_To_RGB32f+24];
	ld.const.f32 	%f6, [k601YPbPr_To_RGB32f+12];
	ld.const.f32 	%f7, [k601YPbPr_To_RGB32f+0];
	bra.uni 	$Lt_49_10754;
$Lt_49_13570:
	.loc	32	42	0
	ld.param.f32 	%f314, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+4];
	mov.f32 	%f315, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p53, %f5, %f315;
	@!%p53 bra 	$Lt_49_165634;
	.loc	22	292	0
	mov.f32 	%f18, %f314;
	lg2.approx.ftz.f32 	%f316, %f5;
	mul.ftz.f32 	%f317, %f18, %f316;
	ex2.approx.ftz.f32 	%f318, %f317;
	bra.uni 	$Lt_49_165378;
$Lt_49_165634:
	mov.f32 	%f18, %f314;
	neg.ftz.f32 	%f319, %f5;
	lg2.approx.ftz.f32 	%f320, %f319;
	mul.ftz.f32 	%f321, %f18, %f320;
	ex2.approx.ftz.f32 	%f322, %f321;
	neg.ftz.f32 	%f318, %f322;
$Lt_49_165378:
	mov.f32 	%f323, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p54, %f6, %f323;
	@!%p54 bra 	$Lt_49_166146;
	.loc	22	293	0
	lg2.approx.ftz.f32 	%f324, %f6;
	mul.ftz.f32 	%f325, %f18, %f324;
	ex2.approx.ftz.f32 	%f326, %f325;
	bra.uni 	$Lt_49_165890;
$Lt_49_166146:
	neg.ftz.f32 	%f327, %f6;
	lg2.approx.ftz.f32 	%f328, %f327;
	mul.ftz.f32 	%f329, %f18, %f328;
	ex2.approx.ftz.f32 	%f330, %f329;
	neg.ftz.f32 	%f326, %f330;
$Lt_49_165890:
	mov.f32 	%f331, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p55, %f7, %f331;
	@!%p55 bra 	$Lt_49_166658;
	.loc	22	294	0
	lg2.approx.ftz.f32 	%f332, %f7;
	mul.ftz.f32 	%f333, %f18, %f332;
	ex2.approx.ftz.f32 	%f334, %f333;
	bra.uni 	$Lt_49_166402;
$Lt_49_166658:
	neg.ftz.f32 	%f335, %f7;
	lg2.approx.ftz.f32 	%f336, %f335;
	mul.ftz.f32 	%f337, %f18, %f336;
	ex2.approx.ftz.f32 	%f338, %f337;
	neg.ftz.f32 	%f334, %f338;
$Lt_49_166402:
	mov.f32 	%f339, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p56, %f8, %f339;
	@!%p56 bra 	$Lt_49_167170;
	.loc	22	295	0
	lg2.approx.ftz.f32 	%f340, %f8;
	mul.ftz.f32 	%f341, %f18, %f340;
	ex2.approx.ftz.f32 	%f342, %f341;
	bra.uni 	$Lt_49_166914;
$Lt_49_167170:
	neg.ftz.f32 	%f343, %f8;
	lg2.approx.ftz.f32 	%f344, %f343;
	mul.ftz.f32 	%f345, %f18, %f344;
	ex2.approx.ftz.f32 	%f346, %f345;
	neg.ftz.f32 	%f342, %f346;
$Lt_49_166914:
	.loc	32	42	0
	mov.f32 	%f5, %f318;
	mov.f32 	%f6, %f326;
	mov.f32 	%f7, %f334;
	mov.f32 	%f9, %f342;
	.loc	38	91	0
	bra.uni 	$Lt_49_10754;
$Lt_49_13826:
	.loc	33	41	0
	sub.s32 	%r125, %r14, %r5;
	sub.s32 	%r6, %r125, 1;
	.loc	38	94	0
	bra.uni 	$Lt_49_10754;
$Lt_49_14082:
	.loc	38	96	0
	ld.param.f32 	%f18, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+4];
	mov.f32 	%f347, 0f3f000000;   	// 0.5
	mul.ftz.f32 	%f348, %f18, %f347;
	ld.param.f32 	%f349, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+16];
	ld.param.f32 	%f350, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+8];
	mov.f32 	%f351, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p57, %f350, %f351;
	@!%p57 bra 	$Lt_49_167682;
	.loc	20	143	0
	mul.lo.s32 	%r126, %r5, 3;
	cvt.rzi.ftz.u32.f32 	%r127, %f349;
	sub.u32 	%r128, %r126, %r11;
	shr.u32 	%r129, %r127, 13;
	sub.u32 	%r130, %r11, %r127;
	sub.u32 	%r131, %r128, %r127;
	xor.b32 	%r132, %r131, %r129;
	shl.b32 	%r133, %r132, 8;
	sub.u32 	%r134, %r130, %r132;
	sub.u32 	%r135, %r127, %r132;
	xor.b32 	%r136, %r133, %r134;
	shr.u32 	%r137, %r136, 13;
	sub.u32 	%r138, %r135, %r136;
	sub.u32 	%r139, %r132, %r136;
	xor.b32 	%r140, %r137, %r138;
	shr.u32 	%r141, %r140, 12;
	sub.u32 	%r142, %r139, %r140;
	xor.b32 	%r143, %r141, %r142;
	shl.b32 	%r144, %r143, 16;
	sub.u32 	%r145, %r136, %r140;
	sub.u32 	%r146, %r145, %r143;
	xor.b32 	%r147, %r144, %r146;
	.loc	20	144	0
	sub.u32 	%r148, %r140, %r143;
	sub.u32 	%r149, %r148, %r147;
	shr.u32 	%r150, %r147, 5;
	xor.b32 	%r151, %r149, %r150;
	.loc	20	145	0
	sub.u32 	%r152, %r143, %r147;
	sub.u32 	%r153, %r152, %r151;
	shr.u32 	%r154, %r151, 3;
	xor.b32 	%r155, %r153, %r154;
	.loc	20	146	0
	sub.u32 	%r156, %r147, %r151;
	sub.u32 	%r157, %r156, %r155;
	shl.b32 	%r158, %r155, 10;
	xor.b32 	%r159, %r157, %r158;
	.loc	20	147	0
	sub.u32 	%r160, %r151, %r155;
	sub.u32 	%r161, %r160, %r159;
	shr.u32 	%r162, %r159, 15;
	xor.b32 	%r163, %r161, %r162;
	.loc	34	48	0
	mul.lo.u32 	%r164, %r163, 1103515245;
	add.u32 	%r165, %r164, 12345;
	shr.u32 	%r166, %r165, 16;
	and.b32 	%r167, %r166, 255;
	shl.b32 	%r168, %r167, 7;
	mul.lo.u32 	%r169, %r163, -1029531031;
	sub.u32 	%r170, %r169, 740551042;
	shr.u32 	%r171, %r170, 16;
	and.b32 	%r172, %r171, 255;
	xor.b32 	%r173, %r168, %r172;
	cvt.rn.f32.s32 	%f352, %r173;
	mov.f32 	%f353, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f354, %f352, %f353;
	mul.ftz.f32 	%f355, %f18, %f354;
	sub.ftz.f32 	%f356, %f355, %f348;
	.loc	20	143	0
	add.u32 	%r174, %r131, 1;
	xor.b32 	%r175, %r174, %r129;
	shl.b32 	%r176, %r175, 8;
	sub.u32 	%r177, %r130, %r175;
	sub.u32 	%r178, %r127, %r175;
	xor.b32 	%r179, %r176, %r177;
	shr.u32 	%r180, %r179, 13;
	sub.u32 	%r181, %r178, %r179;
	sub.u32 	%r182, %r175, %r179;
	xor.b32 	%r183, %r180, %r181;
	shr.u32 	%r184, %r183, 12;
	sub.u32 	%r185, %r182, %r183;
	xor.b32 	%r186, %r184, %r185;
	sub.u32 	%r187, %r179, %r183;
	sub.u32 	%r188, %r187, %r186;
	shl.b32 	%r189, %r186, 16;
	xor.b32 	%r190, %r188, %r189;
	.loc	20	144	0
	sub.u32 	%r191, %r183, %r186;
	sub.u32 	%r192, %r191, %r190;
	shr.u32 	%r193, %r190, 5;
	xor.b32 	%r194, %r192, %r193;
	.loc	20	145	0
	sub.u32 	%r195, %r186, %r190;
	sub.u32 	%r196, %r195, %r194;
	shr.u32 	%r197, %r194, 3;
	xor.b32 	%r198, %r196, %r197;
	.loc	20	146	0
	sub.u32 	%r199, %r190, %r194;
	sub.u32 	%r200, %r199, %r198;
	shl.b32 	%r201, %r198, 10;
	xor.b32 	%r202, %r200, %r201;
	.loc	20	147	0
	sub.u32 	%r203, %r194, %r198;
	sub.u32 	%r204, %r203, %r202;
	shr.u32 	%r205, %r202, 15;
	xor.b32 	%r206, %r204, %r205;
	.loc	34	49	0
	mul.lo.u32 	%r207, %r206, 1103515245;
	add.u32 	%r208, %r207, 12345;
	shr.u32 	%r209, %r208, 16;
	and.b32 	%r210, %r209, 255;
	shl.b32 	%r211, %r210, 7;
	mul.lo.u32 	%r212, %r206, -1029531031;
	sub.u32 	%r213, %r212, 740551042;
	shr.u32 	%r214, %r213, 16;
	and.b32 	%r215, %r214, 255;
	xor.b32 	%r216, %r211, %r215;
	cvt.rn.f32.s32 	%f357, %r216;
	mov.f32 	%f358, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f359, %f357, %f358;
	mul.ftz.f32 	%f360, %f18, %f359;
	sub.ftz.f32 	%f361, %f360, %f348;
	.loc	20	143	0
	add.u32 	%r217, %r131, 2;
	xor.b32 	%r218, %r217, %r129;
	shl.b32 	%r219, %r218, 8;
	sub.u32 	%r220, %r130, %r218;
	sub.u32 	%r221, %r127, %r218;
	xor.b32 	%r222, %r219, %r220;
	shr.u32 	%r223, %r222, 13;
	sub.u32 	%r224, %r221, %r222;
	sub.u32 	%r225, %r218, %r222;
	xor.b32 	%r226, %r223, %r224;
	shr.u32 	%r227, %r226, 12;
	sub.u32 	%r228, %r225, %r226;
	xor.b32 	%r229, %r227, %r228;
	sub.u32 	%r230, %r222, %r226;
	sub.u32 	%r231, %r230, %r229;
	shl.b32 	%r232, %r229, 16;
	xor.b32 	%r233, %r231, %r232;
	.loc	20	144	0
	sub.u32 	%r234, %r226, %r229;
	sub.u32 	%r235, %r234, %r233;
	shr.u32 	%r236, %r233, 5;
	xor.b32 	%r237, %r235, %r236;
	.loc	20	145	0
	sub.u32 	%r238, %r229, %r233;
	sub.u32 	%r239, %r238, %r237;
	shr.u32 	%r240, %r237, 3;
	xor.b32 	%r241, %r239, %r240;
	.loc	20	146	0
	sub.u32 	%r242, %r233, %r237;
	sub.u32 	%r243, %r242, %r241;
	shl.b32 	%r244, %r241, 10;
	xor.b32 	%r245, %r243, %r244;
	.loc	20	147	0
	sub.u32 	%r246, %r237, %r241;
	sub.u32 	%r247, %r246, %r245;
	shr.u32 	%r248, %r245, 15;
	xor.b32 	%r249, %r247, %r248;
	.loc	34	50	0
	mul.lo.u32 	%r250, %r249, 1103515245;
	add.u32 	%r251, %r250, 12345;
	shr.u32 	%r252, %r251, 16;
	and.b32 	%r253, %r252, 255;
	shl.b32 	%r254, %r253, 7;
	mul.lo.u32 	%r255, %r249, -1029531031;
	sub.u32 	%r256, %r255, 740551042;
	shr.u32 	%r257, %r256, 16;
	and.b32 	%r258, %r257, 255;
	xor.b32 	%r259, %r254, %r258;
	cvt.rn.f32.s32 	%f362, %r259;
	mov.f32 	%f363, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f364, %f362, %f363;
	mul.ftz.f32 	%f365, %f18, %f364;
	sub.ftz.f32 	%f366, %f365, %f348;
	bra.uni 	$Lt_49_167426;
$Lt_49_167682:
	.loc	20	143	0
	sub.u32 	%r81, %r5, %r11;
	cvt.rzi.ftz.u32.f32 	%r260, %f349;
	shr.u32 	%r261, %r260, 13;
	sub.u32 	%r262, %r81, %r260;
	sub.u32 	%r263, %r11, %r260;
	xor.b32 	%r264, %r262, %r261;
	shl.b32 	%r265, %r264, 8;
	sub.u32 	%r266, %r263, %r264;
	sub.u32 	%r267, %r260, %r264;
	xor.b32 	%r268, %r265, %r266;
	shr.u32 	%r269, %r268, 13;
	sub.u32 	%r270, %r267, %r268;
	sub.u32 	%r271, %r264, %r268;
	xor.b32 	%r272, %r269, %r270;
	shr.u32 	%r273, %r272, 12;
	sub.u32 	%r274, %r271, %r272;
	xor.b32 	%r275, %r273, %r274;
	shl.b32 	%r276, %r275, 16;
	sub.u32 	%r277, %r268, %r272;
	sub.u32 	%r278, %r277, %r275;
	xor.b32 	%r279, %r276, %r278;
	.loc	20	144	0
	sub.u32 	%r280, %r272, %r275;
	sub.u32 	%r281, %r280, %r279;
	shr.u32 	%r282, %r279, 5;
	xor.b32 	%r283, %r281, %r282;
	.loc	20	145	0
	sub.u32 	%r284, %r275, %r279;
	sub.u32 	%r285, %r284, %r283;
	shr.u32 	%r286, %r283, 3;
	xor.b32 	%r287, %r285, %r286;
	.loc	20	146	0
	sub.u32 	%r288, %r279, %r283;
	sub.u32 	%r289, %r288, %r287;
	shl.b32 	%r290, %r287, 10;
	xor.b32 	%r291, %r289, %r290;
	.loc	20	147	0
	sub.u32 	%r292, %r283, %r287;
	sub.u32 	%r293, %r292, %r291;
	shr.u32 	%r294, %r291, 15;
	xor.b32 	%r295, %r293, %r294;
	.loc	34	54	0
	mul.lo.u32 	%r296, %r295, 1103515245;
	mul.lo.u32 	%r297, %r295, -1029531031;
	add.u32 	%r298, %r296, 12345;
	sub.u32 	%r299, %r297, 740551042;
	shr.u32 	%r300, %r298, 16;
	shr.u32 	%r301, %r299, 16;
	and.b32 	%r302, %r300, 255;
	and.b32 	%r303, %r301, 255;
	shl.b32 	%r304, %r302, 7;
	xor.b32 	%r305, %r304, %r303;
	cvt.rn.f32.s32 	%f367, %r305;
	mov.f32 	%f368, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f369, %f367, %f368;
	mul.ftz.f32 	%f370, %f18, %f369;
	sub.ftz.f32 	%f371, %f370, %f348;
	mov.f32 	%f366, %f371;
	mov.f32 	%f361, %f371;
	mov.f32 	%f356, %f371;
$Lt_49_167426:
	.loc	34	57	0
	add.ftz.f32 	%f7, %f356, %f7;
	.loc	34	58	0
	add.ftz.f32 	%f6, %f361, %f6;
	.loc	34	59	0
	add.ftz.f32 	%f5, %f5, %f366;
	ld.param.f32 	%f372, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+12];
	mov.f32 	%f373, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p58, %f372, %f373;
	@!%p58 bra 	$Lt_49_10754;
	.loc	34	63	0
	mov.f32 	%f374, 0f00000000;   	// 0
	max.ftz.f32 	%f375, %f5, %f374;
	mov.f32 	%f376, 0f3f800000;   	// 1
	min.ftz.f32 	%f5, %f375, %f376;
	mov.f32 	%f377, 0f00000000;   	// 0
	max.ftz.f32 	%f378, %f6, %f377;
	mov.f32 	%f379, 0f3f800000;   	// 1
	min.ftz.f32 	%f6, %f378, %f379;
	mov.f32 	%f380, 0f00000000;   	// 0
	max.ftz.f32 	%f381, %f7, %f380;
	mov.f32 	%f382, 0f3f800000;   	// 1
	min.ftz.f32 	%f7, %f381, %f382;
	mov.f32 	%f383, 0f00000000;   	// 0
	max.ftz.f32 	%f384, %f8, %f383;
	mov.f32 	%f385, 0f3f800000;   	// 1
	min.ftz.f32 	%f9, %f384, %f385;
	bra.uni 	$Lt_49_10754;
$Lt_49_14338:
	.loc	38	99	0
	cvt.rn.f32.s32 	%f386, %r5;
	cvt.rn.f32.s32 	%f387, %r14;
	ld.param.f32 	%f388, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+20];
	mul.ftz.f32 	%f389, %f387, %f388;
	setp.lt.ftz.f32 	%p59, %f386, %f389;
	@!%p59 bra 	$Lt_49_10754;
	.loc	22	267	0
	ld.const.f32 	%f390, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f390, %f6;
	ld.const.f32 	%f391, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f392, %f391, %f6;
	ld.const.f32 	%f393, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f394, %f393, %f6;
	ld.const.f32 	%f395, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f395, %f7, %f31;
	ld.const.f32 	%f396, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f397, %f396, %f7, %f392;
	ld.const.f32 	%f398, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f399, %f398, %f7, %f394;
	ld.const.f32 	%f400, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f400, %f5, %f33;
	ld.const.f32 	%f401, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f402, %f401, %f5, %f397;
	ld.const.f32 	%f403, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f404, %f403, %f5, %f399;
	ld.param.f32 	%f405, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+4];
	ld.param.f32 	%f406, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+8];
	fma.rn.ftz.f32 	%f407, %f35, %f406, %f405;
	ld.param.f32 	%f408, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+16];
	mul.ftz.f32 	%f409, %f402, %f408;
	ld.param.f32 	%f410, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+12];
	mul.ftz.f32 	%f411, %f402, %f410;
	mul.ftz.f32 	%f412, %f404, %f410;
	sub.ftz.f32 	%f413, %f412, %f409;
	fma.rn.ftz.f32 	%f414, %f404, %f408, %f411;
	ld.const.f32 	%f415, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f416, %f415, %f413;
	ld.const.f32 	%f417, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f418, %f417, %f407, %f416;
	ld.const.f32 	%f419, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f420, %f419, %f414, %f418;
	.loc	22	268	0
	ld.const.f32 	%f421, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f422, %f421, %f413;
	ld.const.f32 	%f423, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f424, %f423, %f407, %f422;
	ld.const.f32 	%f425, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f426, %f425, %f414, %f424;
	.loc	35	56	0
	ld.const.f32 	%f427, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f428, %f427, %f413;
	ld.const.f32 	%f429, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f430, %f429, %f407, %f428;
	ld.const.f32 	%f431, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f5, %f431, %f414, %f430;
	mov.f32 	%f6, %f420;
	mov.f32 	%f7, %f426;
	bra.uni 	$Lt_49_10754;
$Lt_49_14594:
	.loc	36	46	0
	ld.const.f32 	%f432, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f432, %f6;
	ld.const.f32 	%f433, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f433, %f7, %f31;
	ld.const.f32 	%f434, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f434, %f5, %f33;
	ld.param.f32 	%f45, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+8];
	ld.param.f32 	%f435, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+20];
	sub.ftz.f32 	%f436, %f435, %f45;
	fma.rn.ftz.f32 	%f437, %f35, %f436, %f45;
	.loc	36	47	0
	ld.param.f32 	%f438, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+12];
	ld.param.f32 	%f439, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+24];
	sub.ftz.f32 	%f440, %f439, %f438;
	fma.rn.ftz.f32 	%f441, %f35, %f440, %f438;
	.loc	36	49	0
	ld.param.f32 	%f442, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+28];
	ld.param.f32 	%f18, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+4];
	ld.param.f32 	%f443, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter0+16];
	sub.ftz.f32 	%f444, %f443, %f18;
	fma.rn.ftz.f32 	%f445, %f35, %f444, %f18;
	sub.ftz.f32 	%f446, %f445, %f5;
	fma.rn.ftz.f32 	%f5, %f442, %f446, %f5;
	.loc	36	50	0
	sub.ftz.f32 	%f447, %f437, %f6;
	fma.rn.ftz.f32 	%f6, %f442, %f447, %f6;
	.loc	36	51	0
	sub.ftz.f32 	%f448, %f441, %f7;
	fma.rn.ftz.f32 	%f7, %f442, %f448, %f7;
	.loc	38	103	0
	bra.uni 	$Lt_49_10754;
$Lt_49_14850:
	.loc	37	41	0
	sub.s32 	%r306, %r13, %r11;
	sub.s32 	%r12, %r306, 1;
$Lt_49_10754:
	.loc	38	54	0
	ld.param.u32 	%r307, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+0];
	mov.u32 	%r308, 0;
	setp.eq.s32 	%p60, %r307, %r308;
	@%p60 bra 	$Lt_49_5378;
	mov.u32 	%r309, 1;
	setp.eq.s32 	%p61, %r307, %r309;
	@%p61 bra 	$Lt_49_5890;
	mov.u32 	%r310, 2;
	setp.eq.s32 	%p62, %r307, %r310;
	@%p62 bra 	$Lt_49_6146;
	mov.u32 	%r311, 3;
	setp.eq.s32 	%p63, %r307, %r311;
	@%p63 bra 	$Lt_49_6402;
	mov.u32 	%r312, 4;
	setp.eq.s32 	%p64, %r307, %r312;
	@%p64 bra 	$Lt_49_6658;
	mov.u32 	%r313, 5;
	setp.eq.s32 	%p65, %r307, %r313;
	@%p65 bra 	$Lt_49_6914;
	mov.u32 	%r314, 6;
	setp.eq.s32 	%p66, %r307, %r314;
	@%p66 bra 	$Lt_49_7170;
	mov.u32 	%r315, 7;
	setp.eq.s32 	%p67, %r307, %r315;
	@%p67 bra 	$Lt_49_7426;
	mov.u32 	%r316, 8;
	setp.eq.s32 	%p68, %r307, %r316;
	@%p68 bra 	$Lt_49_7682;
	mov.u32 	%r317, 9;
	setp.eq.s32 	%p69, %r307, %r317;
	@%p69 bra 	$Lt_49_7938;
	mov.u32 	%r318, 10;
	setp.eq.s32 	%p70, %r307, %r318;
	@%p70 bra 	$Lt_49_8194;
	mov.u32 	%r319, 11;
	setp.eq.s32 	%p71, %r307, %r319;
	@%p71 bra 	$Lt_49_8450;
	mov.u32 	%r320, 12;
	setp.eq.s32 	%p72, %r307, %r320;
	@%p72 bra 	$Lt_49_8706;
	mov.u32 	%r321, 13;
	setp.eq.s32 	%p73, %r307, %r321;
	@%p73 bra 	$Lt_49_8962;
	mov.u32 	%r322, 14;
	setp.eq.s32 	%p74, %r307, %r322;
	@%p74 bra 	$Lt_49_9218;
	mov.u32 	%r323, 15;
	setp.eq.s32 	%p75, %r307, %r323;
	@%p75 bra 	$Lt_49_9474;
	mov.u32 	%r324, 16;
	setp.eq.s32 	%p76, %r307, %r324;
	@%p76 bra 	$Lt_49_9730;
	bra.uni 	$Lt_49_5634;
$Lt_49_5378:
	.loc	21	42	0
	cvt.ftz.sat.f32.f32 	%f9, %f9;
	ld.param.f32 	%f449, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+8];
	mov.f32 	%f450, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p77, %f449, %f450;
	ld.param.f32 	%f451, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+12];
	mov.f32 	%f452, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p78, %f451, %f452;
	ld.param.f32 	%f453, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+4];
	ld.param.f32 	%f454, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+16];
	mov.f32 	%f455, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p79, %f454, %f455;
	@!%p79 bra 	$Lt_49_169218;
	.loc	21	45	0
	mov.f32 	%f456, %f453;
	mul.ftz.f32 	%f457, %f456, %f9;
	selp.f32 	%f458, %f456, %f457, %p77;
	.loc	21	53	0
	mov.f32 	%f459, 0f3f800000;   	// 1
	sub.ftz.f32 	%f460, %f459, %f458;
	selp.f32 	%f9, %f460, %f458, %p78;
	.loc	21	57	0
	mov.f32 	%f5, %f9;
	mov.f32 	%f6, %f9;
	mov.f32 	%f7, %f9;
	bra.uni 	$Lt_49_5634;
$Lt_49_169218:
	@!%p77 bra 	$Lt_49_169730;
	.loc	21	61	0
	mov.f32 	%f456, %f453;
	mov.f32 	%f461, 0f3f800000;   	// 1
	sub.ftz.f32 	%f462, %f461, %f456;
	selp.f32 	%f9, %f462, %f456, %p78;
	bra.uni 	$Lt_49_5634;
$Lt_49_169730:
	.loc	21	69	0
	mov.f32 	%f463, 0f3f800000;   	// 1
	sub.ftz.f32 	%f464, %f463, %f9;
	selp.f32 	%f465, %f464, %f9, %p78;
	.loc	21	73	0
	mul.ftz.f32 	%f9, %f465, %f453;
	bra.uni 	$Lt_49_5634;
$Lt_49_5890:
	.loc	22	267	0
	ld.const.f32 	%f466, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f466, %f6;
	ld.const.f32 	%f468, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f468, %f7, %f467;
	ld.const.f32 	%f470, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f470, %f5, %f469;
	ld.const.f32 	%f472, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f473, %f472, %f471;
	.loc	22	268	0
	ld.const.f32 	%f474, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f475, %f474, %f471;
	.loc	23	44	0
	ld.const.f32 	%f476, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f476, %f471;
	mov.f32 	%f6, %f473;
	mov.f32 	%f7, %f475;
	.loc	38	61	0
	bra.uni 	$Lt_49_5634;
$Lt_49_6146:
	.loc	38	63	0
	ld.param.f32 	%f477, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+20];
	ld.param.f32 	%f478, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+12];
	ld.param.f32 	%f479, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+4];
	mov.f32 	%f480, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p80, %f479, %f480;
	@!%p80 bra 	$Lt_49_170242;
	.loc	24	44	0
	ld.param.f32 	%f481, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+8];
	sub.ftz.f32 	%f482, %f7, %f478;
	fma.rn.ftz.f32 	%f7, %f481, %f482, %f477;
	.loc	24	45	0
	sub.ftz.f32 	%f483, %f6, %f478;
	fma.rn.ftz.f32 	%f6, %f481, %f483, %f477;
	.loc	24	46	0
	sub.ftz.f32 	%f484, %f5, %f478;
	fma.rn.ftz.f32 	%f5, %f481, %f484, %f477;
	bra.uni 	$Lt_49_5634;
$Lt_49_170242:
	.loc	24	50	0
	ld.param.f32 	%f485, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+24];
	setp.gt.ftz.f32 	%p81, %f7, %f478;
	selp.f32 	%f7, %f485, %f477, %p81;
	.loc	24	51	0
	setp.gt.ftz.f32 	%p82, %f6, %f478;
	selp.f32 	%f6, %f485, %f477, %p82;
	.loc	24	52	0
	setp.gt.ftz.f32 	%p83, %f5, %f478;
	selp.f32 	%f5, %f485, %f477, %p83;
	bra.uni 	$Lt_49_5634;
$Lt_49_6402:
	.loc	25	47	0
	ld.param.f32 	%f486, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+4];
	mul.ftz.f32 	%f5, %f486, %f5;
	ld.param.f32 	%f487, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+8];
	mul.ftz.f32 	%f6, %f487, %f6;
	ld.param.f32 	%f488, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+12];
	mul.ftz.f32 	%f7, %f488, %f7;
	.loc	38	67	0
	bra.uni 	$Lt_49_5634;
$Lt_49_6658:
	.loc	26	48	0
	ld.param.f32 	%f489, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+4];
	mov.f32 	%f490, 0f00000000;   	// 0
	setp.eq.ftz.f32 	%p84, %f489, %f490;
	ld.param.f32 	%f491, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+8];
	mov.f32 	%f492, 0f00000000;   	// 0
	max.ftz.f32 	%f493, %f5, %f492;
	mov.f32 	%f494, 0f3f800000;   	// 1
	min.ftz.f32 	%f495, %f493, %f494;
	ld.param.f32 	%f496, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+12];
	sub.ftz.f32 	%f497, %f495, %f496;
	abs.ftz.f32 	%f498, %f497;
	mov.f32 	%f499, 0f00000000;   	// 0
	max.ftz.f32 	%f500, %f6, %f499;
	mov.f32 	%f501, 0f3f800000;   	// 1
	min.ftz.f32 	%f502, %f500, %f501;
	ld.param.f32 	%f503, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+16];
	sub.ftz.f32 	%f504, %f502, %f503;
	abs.ftz.f32 	%f505, %f504;
	mov.f32 	%f506, 0f00000000;   	// 0
	max.ftz.f32 	%f507, %f7, %f506;
	mov.f32 	%f508, 0f3f800000;   	// 1
	min.ftz.f32 	%f509, %f507, %f508;
	ld.param.f32 	%f510, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+20];
	sub.ftz.f32 	%f511, %f509, %f510;
	abs.ftz.f32 	%f512, %f511;
	mov.f32 	%f513, 0f00000000;   	// 0
	max.ftz.f32 	%f514, %f9, %f513;
	mov.f32 	%f515, 0f3f800000;   	// 1
	min.ftz.f32 	%f516, %f514, %f515;
	sub.ftz.f32 	%f517, %f516, %f9;
	abs.ftz.f32 	%f518, %f517;
	max.ftz.f32 	%f519, %f512, %f518;
	max.ftz.f32 	%f520, %f505, %f519;
	max.ftz.f32 	%f521, %f498, %f520;
	setp.ge.ftz.f32 	%p85, %f491, %f521;
	xor.pred 	%p86, %p84, %p85;
	@!%p86 bra 	$Lt_49_5634;
	.loc	22	267	0
	ld.const.f32 	%f522, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f522, %f6;
	ld.const.f32 	%f523, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f523, %f7, %f467;
	ld.const.f32 	%f524, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f524, %f5, %f469;
	ld.const.f32 	%f525, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f526, %f525, %f471;
	.loc	22	268	0
	ld.const.f32 	%f527, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f528, %f527, %f471;
	.loc	23	44	0
	ld.const.f32 	%f529, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f529, %f471;
	mov.f32 	%f6, %f526;
	mov.f32 	%f7, %f528;
	bra.uni 	$Lt_49_5634;
$Lt_49_6914:
	.loc	27	48	0
	ld.param.f32 	%f530, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+8];
	mov.f32 	%f531, 0f00000000;   	// 0
	max.ftz.f32 	%f532, %f5, %f531;
	mov.f32 	%f533, 0f3f800000;   	// 1
	min.ftz.f32 	%f534, %f532, %f533;
	ld.param.f32 	%f535, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+12];
	sub.ftz.f32 	%f536, %f534, %f535;
	abs.ftz.f32 	%f537, %f536;
	mov.f32 	%f538, 0f00000000;   	// 0
	max.ftz.f32 	%f539, %f6, %f538;
	mov.f32 	%f540, 0f3f800000;   	// 1
	min.ftz.f32 	%f541, %f539, %f540;
	ld.param.f32 	%f542, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+16];
	sub.ftz.f32 	%f543, %f541, %f542;
	abs.ftz.f32 	%f544, %f543;
	mov.f32 	%f545, 0f00000000;   	// 0
	max.ftz.f32 	%f546, %f7, %f545;
	mov.f32 	%f547, 0f3f800000;   	// 1
	min.ftz.f32 	%f548, %f546, %f547;
	ld.param.f32 	%f549, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+20];
	sub.ftz.f32 	%f550, %f548, %f549;
	abs.ftz.f32 	%f551, %f550;
	mov.f32 	%f552, 0f00000000;   	// 0
	max.ftz.f32 	%f553, %f9, %f552;
	mov.f32 	%f554, 0f3f800000;   	// 1
	min.ftz.f32 	%f555, %f553, %f554;
	sub.ftz.f32 	%f556, %f555, %f9;
	abs.ftz.f32 	%f557, %f556;
	max.ftz.f32 	%f558, %f551, %f557;
	max.ftz.f32 	%f559, %f544, %f558;
	max.ftz.f32 	%f560, %f537, %f559;
	setp.ge.ftz.f32 	%p87, %f530, %f560;
	@!%p87 bra 	$Lt_49_5634;
	.loc	27	51	0
	ld.param.f32 	%f561, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+24];
	mov.f32 	%f126, %f561;
	ld.param.f32 	%f562, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+28];
	mov.f32 	%f128, %f562;
	ld.param.f32 	%f563, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+32];
	mov.f32 	%f130, %f563;
	ld.param.f32 	%f564, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+4];
	mov.f32 	%f565, 0f00000000;   	// 0
	setp.eq.ftz.f32 	%p88, %f564, %f565;
	@!%p88 bra 	$Lt_49_171522;
	.loc	27	60	0
	ld.const.f32 	%f566, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f566, %f6;
	ld.const.f32 	%f567, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f567, %f7, %f467;
	ld.const.f32 	%f568, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f568, %f5, %f469;
	mul.ftz.f32 	%f126, %f471, %f561;
	.loc	27	61	0
	mul.ftz.f32 	%f128, %f471, %f562;
	.loc	27	62	0
	mul.ftz.f32 	%f130, %f471, %f563;
$Lt_49_171522:
	.loc	27	65	0
	mov.f32 	%f5, %f126;
	mov.f32 	%f6, %f128;
	mov.f32 	%f7, %f130;
	bra.uni 	$Lt_49_5634;
$Lt_49_7170:
	.loc	28	47	0
	sub.s32 	%r325, %r13, %r12;
	sub.s32 	%r326, %r14, %r6;
	cvt.rn.f32.s32 	%f569, %r6;
	cvt.rn.f32.s32 	%f570, %r12;
	cvt.rn.f32.s32 	%f138, %r325;
	cvt.rn.f32.s32 	%f139, %r326;
	ld.param.f32 	%f456, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+4];
	sub.ftz.f32 	%f571, %f456, %f569;
	sub.ftz.f32 	%f572, %f456, %f570;
	sub.ftz.f32 	%f573, %f456, %f138;
	sub.ftz.f32 	%f574, %f456, %f139;
	cvt.rzi.ftz.s32.f32 	%r327, %f571;
	cvt.rzi.ftz.s32.f32 	%r328, %f572;
	cvt.rzi.ftz.s32.f32 	%r329, %f573;
	cvt.rzi.ftz.s32.f32 	%r330, %f574;
	max.s32 	%r331, %r327, %r328;
	max.s32 	%r332, %r330, %r331;
	max.s32 	%r333, %r329, %r332;
	mov.u32 	%r334, 0;
	setp.le.s32 	%p89, %r333, %r334;
	@%p89 bra 	$Lt_49_5634;
	.loc	28	51	0
	cvt.rn.f32.s32 	%f575, %r333;
	sub.ftz.f32 	%f576, %f456, %f575;
	div.approx.ftz.f32 	%f577, %f576, %f456;
	mul.ftz.f32 	%f9, %f9, %f577;
	bra.uni 	$Lt_49_5634;
$Lt_49_7426:
	.loc	30	50	0
	ld.const.f32 	%f578, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f578, %f6;
	ld.param.f32 	%f579, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+12];
	ld.param.f32 	%f481, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+8];
	sub.ftz.f32 	%f580, %f579, %f481;
	ld.const.f32 	%f581, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f581, %f7, %f467;
	ld.param.f32 	%f582, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+16];
	mul.ftz.f32 	%f583, %f582, %f580;
	ld.const.f32 	%f584, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f584, %f5, %f469;
	add.ftz.f32 	%f585, %f583, %f579;
	sub.ftz.f32 	%f586, %f481, %f583;
	mov.f32 	%f587, 0f00000000;   	// 0
	max.ftz.f32 	%f588, %f585, %f587;
	mov.f32 	%f589, 0f00000000;   	// 0
	max.ftz.f32 	%f590, %f586, %f589;
	mov.f32 	%f591, 0f3f800000;   	// 1
	min.ftz.f32 	%f592, %f588, %f591;
	mov.f32 	%f593, 0f3f800000;   	// 1
	min.ftz.f32 	%f594, %f590, %f593;
	set.gt.ftz.u32.f32 	%r335, %f594, %f471;
	neg.s32 	%r336, %r335;
	set.le.ftz.u32.f32 	%r337, %f592, %f471;
	neg.s32 	%r338, %r337;
	or.b32 	%r339, %r336, %r338;
	mov.u32 	%r340, 0;
	setp.eq.s32 	%p90, %r339, %r340;
	@%p90 bra 	$Lt_49_172802;
	mov.f32 	%f164, 0f00000000;   	// 0
	bra.uni 	$Lt_49_173570;
$Lt_49_172802:
	add.ftz.f32 	%f595, %f583, %f481;
	mov.f32 	%f596, 0f00000000;   	// 0
	max.ftz.f32 	%f597, %f595, %f596;
	mov.f32 	%f598, 0f3f800000;   	// 1
	min.ftz.f32 	%f599, %f597, %f598;
	set.le.ftz.u32.f32 	%r341, %f599, %f471;
	neg.s32 	%r342, %r341;
	sub.ftz.f32 	%f600, %f579, %f583;
	mov.f32 	%f601, 0f00000000;   	// 0
	max.ftz.f32 	%f602, %f600, %f601;
	mov.f32 	%f603, 0f3f800000;   	// 1
	min.ftz.f32 	%f604, %f602, %f603;
	set.lt.ftz.u32.f32 	%r343, %f471, %f604;
	neg.s32 	%r344, %r343;
	and.b32 	%r345, %r342, %r344;
	mov.u32 	%r346, 0;
	setp.eq.s32 	%p91, %r345, %r346;
	@%p91 bra 	$Lt_49_173314;
	mov.f32 	%f164, 0f3f800000;   	// 1
	bra.uni 	$Lt_49_173570;
$Lt_49_173314:
	add.ftz.f32 	%f605, %f583, %f583;
	setp.gt.ftz.f32 	%p92, %f599, %f471;
	@!%p92 bra 	$Lt_49_173826;
	.loc	30	62	0
	sub.ftz.f32 	%f606, %f471, %f594;
	div.approx.ftz.f32 	%f164, %f606, %f605;
	bra.uni 	$Lt_49_173570;
$Lt_49_173826:
	.loc	30	66	0
	sub.ftz.f32 	%f607, %f592, %f471;
	div.approx.ftz.f32 	%f164, %f607, %f605;
$Lt_49_173570:
$Lt_49_173058:
$Lt_49_172546:
	.loc	30	69	0
	mov.f32 	%f608, 0f3f800000;   	// 1
	sub.ftz.f32 	%f609, %f608, %f164;
	ld.param.f32 	%f610, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+4];
	mov.f32 	%f611, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p93, %f610, %f611;
	selp.f32 	%f164, %f609, %f164, %p93;
	.loc	30	77	0
	ld.const.f32 	%f612, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f612, %f164;
	ld.const.f32 	%f613, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f6, %f613, %f164;
	ld.const.f32 	%f614, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f7, %f614, %f164;
	.loc	38	79	0
	bra.uni 	$Lt_49_5634;
$Lt_49_7682:
	.loc	38	80	0
	ld.param.f32 	%f615, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+44];
	mov.f32 	%f616, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p94, %f615, %f616;
	@!%p94 bra 	$L_49_153090;
	cvt.rn.f32.s32 	%f617, %r6;
	cvt.rn.f32.s32 	%f618, %r14;
	mul.ftz.f32 	%f619, %f618, %f615;
	setp.lt.ftz.f32 	%p95, %f617, %f619;
	@%p95 bra 	$L_49_152834;
$L_49_153090:
	mov.f32 	%f620, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p96, %f615, %f620;
	@!%p96 bra 	$Lt_49_5634;
	cvt.rn.f32.s32 	%f621, %r12;
	cvt.rn.f32.s32 	%f622, %r13;
	mul.ftz.f32 	%f623, %f622, %f615;
	neg.ftz.f32 	%f624, %f623;
	setp.lt.ftz.f32 	%p97, %f621, %f624;
	@!%p97 bra 	$Lt_49_5634;
$L_49_152834:
	.loc	31	47	0
	ld.const.f32 	%f625, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f626, %f625, %f6;
	ld.const.f32 	%f627, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f628, %f627, %f7, %f626;
	ld.const.f32 	%f629, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f629, %f5, %f628;
	mov.f32 	%f200, %f471;
	ld.param.f32 	%f481, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+8];
	ld.param.f32 	%f630, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+4];
	sub.ftz.f32 	%f631, %f630, %f481;
	ld.param.f32 	%f632, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+16];
	ld.param.f32 	%f633, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+12];
	sub.ftz.f32 	%f634, %f633, %f632;
	mov.f32 	%f635, 0f3f800000;   	// 1
	set.neu.ftz.u32.f32 	%r347, %f634, %f635;
	neg.s32 	%r348, %r347;
	mov.f32 	%f636, 0f3f800000;   	// 1
	set.neu.ftz.u32.f32 	%r349, %f631, %f636;
	neg.s32 	%r350, %r349;
	or.b32 	%r351, %r348, %r350;
	mov.u32 	%r352, 0;
	setp.eq.s32 	%p98, %r351, %r352;
	@%p98 bra 	$Lt_49_174082;
	.loc	20	143	0
	mov.s32 	%r353, 1;
	sub.s32 	%r79, %r353, %r5;
	shr.u32 	%r80, %r11, 13;
	sub.u32 	%r81, %r5, %r11;
	sub.u32 	%r82, %r79, %r11;
	xor.b32 	%r83, %r82, %r80;
	shl.b32 	%r84, %r83, 8;
	sub.u32 	%r85, %r81, %r83;
	sub.u32 	%r86, %r11, %r83;
	xor.b32 	%r87, %r85, %r84;
	shr.u32 	%r88, %r87, 13;
	sub.u32 	%r89, %r86, %r87;
	sub.u32 	%r90, %r83, %r87;
	xor.b32 	%r91, %r89, %r88;
	shr.u32 	%r92, %r91, 12;
	sub.u32 	%r93, %r90, %r91;
	xor.b32 	%r94, %r93, %r92;
	sub.u32 	%r354, %r87, %r91;
	sub.u32 	%r96, %r354, %r94;
	shl.b32 	%r355, %r94, 16;
	xor.b32 	%r98, %r96, %r355;
	.loc	20	144	0
	sub.u32 	%r99, %r91, %r94;
	sub.u32 	%r100, %r99, %r98;
	shr.u32 	%r356, %r98, 5;
	xor.b32 	%r102, %r100, %r356;
	.loc	20	145	0
	sub.u32 	%r103, %r94, %r98;
	sub.u32 	%r104, %r103, %r102;
	shr.u32 	%r357, %r102, 3;
	xor.b32 	%r106, %r104, %r357;
	.loc	20	146	0
	sub.u32 	%r107, %r98, %r102;
	sub.u32 	%r108, %r107, %r106;
	shl.b32 	%r358, %r106, 10;
	xor.b32 	%r110, %r108, %r358;
	.loc	20	147	0
	sub.u32 	%r111, %r102, %r106;
	sub.u32 	%r112, %r111, %r110;
	shr.u32 	%r359, %r110, 15;
	xor.b32 	%r114, %r112, %r359;
	.loc	31	57	0
	mov.f32 	%f637, 0f3b270d73;   	// 0.00254902
	mul.lo.u32 	%r360, %r114, 1103515245;
	add.u32 	%r361, %r360, 12345;
	shr.u32 	%r362, %r361, 16;
	and.b32 	%r363, %r362, 255;
	shl.b32 	%r364, %r363, 7;
	mul.lo.u32 	%r365, %r114, -1029531031;
	sub.u32 	%r366, %r365, 740551042;
	shr.u32 	%r367, %r366, 16;
	and.b32 	%r368, %r367, 255;
	xor.b32 	%r369, %r364, %r368;
	cvt.rn.f32.s32 	%f638, %r369;
	mov.f32 	%f639, 0f467ffe00;   	// 16383.5
	div.approx.ftz.f32 	%f640, %f638, %f639;
	mov.f32 	%f641, 0fbf800000;   	// -1
	add.ftz.f32 	%f642, %f640, %f641;
	fma.rn.ftz.f32 	%f200, %f637, %f642, %f471;
$Lt_49_174082:
	sub.ftz.f32 	%f643, %f200, %f481;
	ld.param.f32 	%f644, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+20];
	mov.f32 	%f645, 0f3f800000;   	// 1
	setp.neu.ftz.f32 	%p99, %f644, %f645;
	@!%p99 bra 	$Lt_49_174850;
	mov.f32 	%f646, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p100, %f643, %f646;
	@!%p100 bra 	$Lt_49_175362;
	.loc	31	66	0
	mov.f32 	%f200, %f632;
	bra.uni 	$Lt_49_174594;
$Lt_49_175362:
	.loc	31	70	0
	rcp.approx.ftz.f32 	%f647, %f631;
	mul.ftz.f32 	%f648, %f643, %f647;
	lg2.approx.ftz.f32 	%f649, %f648;
	mul.ftz.f32 	%f650, %f644, %f649;
	ex2.approx.ftz.f32 	%f651, %f650;
	fma.rn.ftz.f32 	%f200, %f634, %f651, %f632;
	bra.uni 	$Lt_49_174594;
$Lt_49_174850:
	.loc	31	77	0
	rcp.approx.ftz.f32 	%f652, %f631;
	mul.ftz.f32 	%f653, %f643, %f652;
	fma.rn.ftz.f32 	%f200, %f634, %f653, %f632;
$Lt_49_174594:
	.loc	22	267	0
	ld.const.f32 	%f654, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f655, %f654, %f6;
	ld.const.f32 	%f656, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f657, %f656, %f6;
	ld.param.f32 	%f658, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+28];
	sin.approx.ftz.f32 	%f659, %f658;
	cos.approx.ftz.f32 	%f660, %f658;
	ld.const.f32 	%f661, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f662, %f661, %f7, %f655;
	ld.const.f32 	%f663, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f664, %f663, %f7, %f657;
	ld.const.f32 	%f665, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f666, %f665, %f5, %f662;
	ld.const.f32 	%f667, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f668, %f667, %f5, %f664;
	mul.ftz.f32 	%f669, %f659, %f666;
	mul.ftz.f32 	%f670, %f660, %f666;
	mul.ftz.f32 	%f671, %f668, %f660;
	sub.ftz.f32 	%f672, %f671, %f669;
	fma.rn.ftz.f32 	%f673, %f668, %f659, %f670;
	ld.param.f32 	%f674, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+32];
	sub.ftz.f32 	%f675, %f674, %f672;
	ld.param.f32 	%f676, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+36];
	sub.ftz.f32 	%f677, %f676, %f673;
	ld.param.f32 	%f678, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+40];
	fma.rn.ftz.f32 	%f679, %f678, %f675, %f672;
	fma.rn.ftz.f32 	%f680, %f678, %f677, %f673;
	ld.param.f32 	%f681, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+24];
	mul.ftz.f32 	%f682, %f679, %f681;
	mul.ftz.f32 	%f683, %f680, %f681;
	ld.const.f32 	%f684, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f685, %f684, %f682;
	ld.const.f32 	%f686, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f687, %f686, %f200, %f685;
	ld.const.f32 	%f688, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f260, %f688, %f683, %f687;
	.loc	22	268	0
	ld.const.f32 	%f689, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f690, %f689, %f682;
	ld.const.f32 	%f691, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f692, %f691, %f200, %f690;
	ld.const.f32 	%f693, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f266, %f693, %f683, %f692;
	.loc	31	92	0
	ld.const.f32 	%f694, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f695, %f694, %f682;
	ld.const.f32 	%f696, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f697, %f696, %f200, %f695;
	ld.const.f32 	%f698, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f5, %f698, %f683, %f697;
	mov.f32 	%f6, %f260;
	mov.f32 	%f7, %f266;
	bra.uni 	$Lt_49_5634;
$Lt_49_7938:
	.loc	38	83	0
	ld.param.f32 	%f699, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+44];
	mov.f32 	%f700, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p101, %f699, %f700;
	@!%p101 bra 	$L_49_153858;
	cvt.rn.f32.s32 	%f701, %r6;
	cvt.rn.f32.s32 	%f702, %r14;
	mul.ftz.f32 	%f703, %f702, %f699;
	setp.lt.ftz.f32 	%p102, %f701, %f703;
	@%p102 bra 	$L_49_153602;
$L_49_153858:
	mov.f32 	%f704, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p103, %f699, %f704;
	@!%p103 bra 	$Lt_49_5634;
	cvt.rn.f32.s32 	%f705, %r12;
	cvt.rn.f32.s32 	%f706, %r13;
	mul.ftz.f32 	%f707, %f706, %f699;
	neg.ftz.f32 	%f708, %f707;
	setp.lt.ftz.f32 	%p104, %f705, %f708;
	@!%p104 bra 	$Lt_49_5634;
$L_49_153602:
	.loc	31	110	0
	ld.const.f32 	%f709, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f709, %f6;
	ld.param.f32 	%f481, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+8];
	ld.param.f32 	%f710, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+4];
	sub.ftz.f32 	%f711, %f710, %f481;
	ld.param.f32 	%f712, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+16];
	ld.param.f32 	%f713, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+12];
	sub.ftz.f32 	%f714, %f713, %f712;
	ld.const.f32 	%f715, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f715, %f7, %f467;
	ld.const.f32 	%f716, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f716, %f5, %f469;
	sub.ftz.f32 	%f717, %f471, %f481;
	ld.param.f32 	%f718, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+20];
	mov.f32 	%f719, 0f3f800000;   	// 1
	setp.neu.ftz.f32 	%p105, %f718, %f719;
	@!%p105 bra 	$Lt_49_175874;
	.loc	42	523	0
	mov.f32 	%f720, 0f00000000;   	// 0
	max.ftz.f32 	%f721, %f717, %f720;
	div.approx.ftz.f32 	%f722, %f721, %f711;
	lg2.approx.ftz.f32 	%f723, %f722;
	mul.ftz.f32 	%f724, %f718, %f723;
	ex2.approx.ftz.f32 	%f298, %f724;
	.loc	31	120	0
	fma.rn.ftz.f32 	%f299, %f714, %f298, %f712;
	bra.uni 	$Lt_49_175618;
$Lt_49_175874:
	.loc	31	129	0
	div.approx.ftz.f32 	%f725, %f717, %f711;
	fma.rn.ftz.f32 	%f299, %f714, %f725, %f712;
$Lt_49_175618:
	.loc	31	135	0
	ld.const.f32 	%f726, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f726, %f299;
	ld.const.f32 	%f727, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f6, %f727, %f299;
	ld.const.f32 	%f728, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f7, %f728, %f299;
	bra.uni 	$Lt_49_5634;
$Lt_49_8194:
	.loc	38	86	0
	ld.param.f32 	%f729, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+44];
	mov.f32 	%f730, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p106, %f729, %f730;
	@!%p106 bra 	$L_49_154626;
	cvt.rn.f32.s32 	%f731, %r6;
	cvt.rn.f32.s32 	%f732, %r14;
	mul.ftz.f32 	%f733, %f732, %f729;
	setp.lt.ftz.f32 	%p107, %f731, %f733;
	@%p107 bra 	$L_49_154370;
$L_49_154626:
	mov.f32 	%f734, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p108, %f729, %f734;
	@!%p108 bra 	$Lt_49_5634;
	cvt.rn.f32.s32 	%f735, %r12;
	cvt.rn.f32.s32 	%f736, %r13;
	mul.ftz.f32 	%f737, %f736, %f729;
	neg.ftz.f32 	%f738, %f737;
	setp.lt.ftz.f32 	%p109, %f735, %f738;
	@!%p109 bra 	$Lt_49_5634;
$L_49_154370:
	.loc	31	160	0
	ld.const.f32 	%f5, [k601YPbPr_To_RGB32f+24];
	ld.const.f32 	%f6, [k601YPbPr_To_RGB32f+12];
	ld.const.f32 	%f7, [k601YPbPr_To_RGB32f+0];
	bra.uni 	$Lt_49_5634;
$Lt_49_8450:
	.loc	32	42	0
	ld.param.f32 	%f739, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+4];
	mov.f32 	%f740, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p110, %f5, %f740;
	@!%p110 bra 	$Lt_49_176386;
	.loc	22	292	0
	mov.f32 	%f456, %f739;
	lg2.approx.ftz.f32 	%f741, %f5;
	mul.ftz.f32 	%f742, %f456, %f741;
	ex2.approx.ftz.f32 	%f743, %f742;
	bra.uni 	$Lt_49_176130;
$Lt_49_176386:
	mov.f32 	%f456, %f739;
	neg.ftz.f32 	%f744, %f5;
	lg2.approx.ftz.f32 	%f745, %f744;
	mul.ftz.f32 	%f746, %f456, %f745;
	ex2.approx.ftz.f32 	%f747, %f746;
	neg.ftz.f32 	%f743, %f747;
$Lt_49_176130:
	mov.f32 	%f748, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p111, %f6, %f748;
	@!%p111 bra 	$Lt_49_176898;
	.loc	22	293	0
	lg2.approx.ftz.f32 	%f749, %f6;
	mul.ftz.f32 	%f750, %f456, %f749;
	ex2.approx.ftz.f32 	%f751, %f750;
	bra.uni 	$Lt_49_176642;
$Lt_49_176898:
	neg.ftz.f32 	%f752, %f6;
	lg2.approx.ftz.f32 	%f753, %f752;
	mul.ftz.f32 	%f754, %f456, %f753;
	ex2.approx.ftz.f32 	%f755, %f754;
	neg.ftz.f32 	%f751, %f755;
$Lt_49_176642:
	mov.f32 	%f756, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p112, %f7, %f756;
	@!%p112 bra 	$Lt_49_177410;
	.loc	22	294	0
	lg2.approx.ftz.f32 	%f757, %f7;
	mul.ftz.f32 	%f758, %f456, %f757;
	ex2.approx.ftz.f32 	%f759, %f758;
	bra.uni 	$Lt_49_177154;
$Lt_49_177410:
	neg.ftz.f32 	%f760, %f7;
	lg2.approx.ftz.f32 	%f761, %f760;
	mul.ftz.f32 	%f762, %f456, %f761;
	ex2.approx.ftz.f32 	%f763, %f762;
	neg.ftz.f32 	%f759, %f763;
$Lt_49_177154:
	mov.f32 	%f764, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p113, %f9, %f764;
	@!%p113 bra 	$Lt_49_177922;
	.loc	22	295	0
	lg2.approx.ftz.f32 	%f765, %f9;
	mul.ftz.f32 	%f766, %f456, %f765;
	ex2.approx.ftz.f32 	%f767, %f766;
	bra.uni 	$Lt_49_177666;
$Lt_49_177922:
	neg.ftz.f32 	%f768, %f9;
	lg2.approx.ftz.f32 	%f769, %f768;
	mul.ftz.f32 	%f770, %f456, %f769;
	ex2.approx.ftz.f32 	%f771, %f770;
	neg.ftz.f32 	%f767, %f771;
$Lt_49_177666:
	.loc	32	42	0
	mov.f32 	%f5, %f743;
	mov.f32 	%f6, %f751;
	mov.f32 	%f7, %f759;
	mov.f32 	%f9, %f767;
	.loc	38	91	0
	bra.uni 	$Lt_49_5634;
$Lt_49_8706:
	.loc	33	41	0
	sub.s32 	%r370, %r14, %r6;
	sub.s32 	%r6, %r370, 1;
	.loc	38	94	0
	bra.uni 	$Lt_49_5634;
$Lt_49_8962:
	.loc	38	96	0
	ld.param.f32 	%f456, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+4];
	mov.f32 	%f772, 0f3f000000;   	// 0.5
	mul.ftz.f32 	%f773, %f456, %f772;
	ld.param.f32 	%f774, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+16];
	ld.param.f32 	%f775, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+8];
	mov.f32 	%f776, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p114, %f775, %f776;
	@!%p114 bra 	$Lt_49_178434;
	.loc	20	143	0
	mul.lo.s32 	%r126, %r6, 3;
	cvt.rzi.ftz.u32.f32 	%r371, %f774;
	sub.u32 	%r128, %r126, %r12;
	shr.u32 	%r372, %r371, 13;
	sub.u32 	%r373, %r12, %r371;
	sub.u32 	%r374, %r128, %r371;
	xor.b32 	%r375, %r374, %r372;
	shl.b32 	%r376, %r375, 8;
	sub.u32 	%r377, %r373, %r375;
	sub.u32 	%r378, %r371, %r375;
	xor.b32 	%r379, %r376, %r377;
	shr.u32 	%r380, %r379, 13;
	sub.u32 	%r381, %r378, %r379;
	sub.u32 	%r382, %r375, %r379;
	xor.b32 	%r383, %r380, %r381;
	shr.u32 	%r384, %r383, 12;
	sub.u32 	%r385, %r382, %r383;
	xor.b32 	%r386, %r384, %r385;
	shl.b32 	%r387, %r386, 16;
	sub.u32 	%r388, %r379, %r383;
	sub.u32 	%r389, %r388, %r386;
	xor.b32 	%r390, %r387, %r389;
	.loc	20	144	0
	sub.u32 	%r391, %r383, %r386;
	sub.u32 	%r392, %r391, %r390;
	shr.u32 	%r393, %r390, 5;
	xor.b32 	%r394, %r392, %r393;
	.loc	20	145	0
	sub.u32 	%r395, %r386, %r390;
	sub.u32 	%r396, %r395, %r394;
	shr.u32 	%r397, %r394, 3;
	xor.b32 	%r155, %r396, %r397;
	.loc	20	146	0
	sub.u32 	%r398, %r390, %r394;
	sub.u32 	%r399, %r398, %r155;
	shl.b32 	%r400, %r155, 10;
	xor.b32 	%r159, %r399, %r400;
	.loc	20	147	0
	sub.u32 	%r401, %r394, %r155;
	sub.u32 	%r402, %r401, %r159;
	shr.u32 	%r403, %r159, 15;
	xor.b32 	%r163, %r402, %r403;
	.loc	34	48	0
	mul.lo.u32 	%r404, %r163, 1103515245;
	add.u32 	%r405, %r404, 12345;
	shr.u32 	%r406, %r405, 16;
	and.b32 	%r407, %r406, 255;
	shl.b32 	%r408, %r407, 7;
	mul.lo.u32 	%r409, %r163, -1029531031;
	sub.u32 	%r410, %r409, 740551042;
	shr.u32 	%r411, %r410, 16;
	and.b32 	%r412, %r411, 255;
	xor.b32 	%r413, %r408, %r412;
	cvt.rn.f32.s32 	%f777, %r413;
	mov.f32 	%f778, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f779, %f777, %f778;
	mul.ftz.f32 	%f780, %f456, %f779;
	sub.ftz.f32 	%f356, %f780, %f773;
	.loc	20	143	0
	add.u32 	%r414, %r374, 1;
	xor.b32 	%r415, %r414, %r372;
	shl.b32 	%r416, %r415, 8;
	sub.u32 	%r417, %r373, %r415;
	sub.u32 	%r418, %r371, %r415;
	xor.b32 	%r419, %r416, %r417;
	shr.u32 	%r420, %r419, 13;
	sub.u32 	%r421, %r418, %r419;
	sub.u32 	%r422, %r415, %r419;
	xor.b32 	%r423, %r420, %r421;
	shr.u32 	%r424, %r423, 12;
	sub.u32 	%r425, %r422, %r423;
	xor.b32 	%r426, %r424, %r425;
	sub.u32 	%r427, %r419, %r423;
	sub.u32 	%r428, %r427, %r426;
	shl.b32 	%r429, %r426, 16;
	xor.b32 	%r430, %r428, %r429;
	.loc	20	144	0
	sub.u32 	%r431, %r423, %r426;
	sub.u32 	%r432, %r431, %r430;
	shr.u32 	%r433, %r430, 5;
	xor.b32 	%r434, %r432, %r433;
	.loc	20	145	0
	sub.u32 	%r435, %r426, %r430;
	sub.u32 	%r436, %r435, %r434;
	shr.u32 	%r437, %r434, 3;
	xor.b32 	%r198, %r436, %r437;
	.loc	20	146	0
	sub.u32 	%r438, %r430, %r434;
	sub.u32 	%r439, %r438, %r198;
	shl.b32 	%r440, %r198, 10;
	xor.b32 	%r202, %r439, %r440;
	.loc	20	147	0
	sub.u32 	%r441, %r434, %r198;
	sub.u32 	%r442, %r441, %r202;
	shr.u32 	%r443, %r202, 15;
	xor.b32 	%r206, %r442, %r443;
	.loc	34	49	0
	mul.lo.u32 	%r444, %r206, 1103515245;
	add.u32 	%r445, %r444, 12345;
	shr.u32 	%r446, %r445, 16;
	and.b32 	%r447, %r446, 255;
	shl.b32 	%r448, %r447, 7;
	mul.lo.u32 	%r449, %r206, -1029531031;
	sub.u32 	%r450, %r449, 740551042;
	shr.u32 	%r451, %r450, 16;
	and.b32 	%r452, %r451, 255;
	xor.b32 	%r453, %r448, %r452;
	cvt.rn.f32.s32 	%f781, %r453;
	mov.f32 	%f782, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f783, %f781, %f782;
	mul.ftz.f32 	%f784, %f456, %f783;
	sub.ftz.f32 	%f361, %f784, %f773;
	.loc	20	143	0
	add.u32 	%r454, %r374, 2;
	xor.b32 	%r455, %r454, %r372;
	shl.b32 	%r456, %r455, 8;
	sub.u32 	%r457, %r373, %r455;
	sub.u32 	%r458, %r371, %r455;
	xor.b32 	%r459, %r456, %r457;
	shr.u32 	%r460, %r459, 13;
	sub.u32 	%r461, %r458, %r459;
	sub.u32 	%r462, %r455, %r459;
	xor.b32 	%r463, %r460, %r461;
	shr.u32 	%r464, %r463, 12;
	sub.u32 	%r465, %r462, %r463;
	xor.b32 	%r466, %r464, %r465;
	sub.u32 	%r467, %r459, %r463;
	sub.u32 	%r468, %r467, %r466;
	shl.b32 	%r469, %r466, 16;
	xor.b32 	%r470, %r468, %r469;
	.loc	20	144	0
	sub.u32 	%r471, %r463, %r466;
	sub.u32 	%r472, %r471, %r470;
	shr.u32 	%r473, %r470, 5;
	xor.b32 	%r474, %r472, %r473;
	.loc	20	145	0
	sub.u32 	%r475, %r466, %r470;
	sub.u32 	%r476, %r475, %r474;
	shr.u32 	%r477, %r474, 3;
	xor.b32 	%r241, %r476, %r477;
	.loc	20	146	0
	sub.u32 	%r478, %r470, %r474;
	sub.u32 	%r479, %r478, %r241;
	shl.b32 	%r480, %r241, 10;
	xor.b32 	%r245, %r479, %r480;
	.loc	20	147	0
	sub.u32 	%r481, %r474, %r241;
	sub.u32 	%r482, %r481, %r245;
	shr.u32 	%r483, %r245, 15;
	xor.b32 	%r249, %r482, %r483;
	.loc	34	50	0
	mul.lo.u32 	%r484, %r249, 1103515245;
	add.u32 	%r485, %r484, 12345;
	shr.u32 	%r486, %r485, 16;
	and.b32 	%r487, %r486, 255;
	shl.b32 	%r488, %r487, 7;
	mul.lo.u32 	%r489, %r249, -1029531031;
	sub.u32 	%r490, %r489, 740551042;
	shr.u32 	%r491, %r490, 16;
	and.b32 	%r492, %r491, 255;
	xor.b32 	%r493, %r488, %r492;
	cvt.rn.f32.s32 	%f785, %r493;
	mov.f32 	%f786, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f787, %f785, %f786;
	mul.ftz.f32 	%f788, %f456, %f787;
	sub.ftz.f32 	%f366, %f788, %f773;
	bra.uni 	$Lt_49_178178;
$Lt_49_178434:
	.loc	20	143	0
	sub.u32 	%r494, %r6, %r12;
	cvt.rzi.ftz.u32.f32 	%r495, %f774;
	shr.u32 	%r496, %r495, 13;
	sub.u32 	%r497, %r494, %r495;
	sub.u32 	%r498, %r12, %r495;
	xor.b32 	%r499, %r497, %r496;
	shl.b32 	%r500, %r499, 8;
	sub.u32 	%r501, %r498, %r499;
	sub.u32 	%r502, %r495, %r499;
	xor.b32 	%r503, %r500, %r501;
	shr.u32 	%r504, %r503, 13;
	sub.u32 	%r505, %r502, %r503;
	sub.u32 	%r506, %r499, %r503;
	xor.b32 	%r507, %r504, %r505;
	shr.u32 	%r508, %r507, 12;
	sub.u32 	%r509, %r506, %r507;
	xor.b32 	%r510, %r508, %r509;
	shl.b32 	%r511, %r510, 16;
	sub.u32 	%r512, %r503, %r507;
	sub.u32 	%r513, %r512, %r510;
	xor.b32 	%r514, %r511, %r513;
	.loc	20	144	0
	sub.u32 	%r515, %r507, %r510;
	sub.u32 	%r516, %r515, %r514;
	shr.u32 	%r517, %r514, 5;
	xor.b32 	%r518, %r516, %r517;
	.loc	20	145	0
	sub.u32 	%r519, %r510, %r514;
	sub.u32 	%r520, %r519, %r518;
	shr.u32 	%r521, %r518, 3;
	xor.b32 	%r287, %r520, %r521;
	.loc	20	146	0
	sub.u32 	%r522, %r514, %r518;
	sub.u32 	%r523, %r522, %r287;
	shl.b32 	%r524, %r287, 10;
	xor.b32 	%r291, %r523, %r524;
	.loc	20	147	0
	sub.u32 	%r525, %r518, %r287;
	sub.u32 	%r526, %r525, %r291;
	shr.u32 	%r527, %r291, 15;
	xor.b32 	%r295, %r526, %r527;
	.loc	34	54	0
	mul.lo.u32 	%r296, %r295, 1103515245;
	mul.lo.u32 	%r297, %r295, -1029531031;
	add.u32 	%r298, %r296, 12345;
	sub.u32 	%r299, %r297, 740551042;
	shr.u32 	%r300, %r298, 16;
	shr.u32 	%r301, %r299, 16;
	and.b32 	%r302, %r300, 255;
	and.b32 	%r303, %r301, 255;
	shl.b32 	%r304, %r302, 7;
	xor.b32 	%r305, %r304, %r303;
	cvt.rn.f32.s32 	%f367, %r305;
	mov.f32 	%f789, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f369, %f367, %f789;
	mul.ftz.f32 	%f790, %f456, %f369;
	sub.ftz.f32 	%f791, %f790, %f773;
	mov.f32 	%f366, %f791;
	mov.f32 	%f361, %f791;
	mov.f32 	%f356, %f791;
$Lt_49_178178:
	.loc	34	57	0
	add.ftz.f32 	%f7, %f356, %f7;
	.loc	34	58	0
	add.ftz.f32 	%f6, %f361, %f6;
	.loc	34	59	0
	add.ftz.f32 	%f5, %f5, %f366;
	ld.param.f32 	%f792, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+12];
	mov.f32 	%f793, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p115, %f792, %f793;
	@!%p115 bra 	$Lt_49_5634;
	.loc	34	63	0
	mov.f32 	%f794, 0f00000000;   	// 0
	max.ftz.f32 	%f795, %f5, %f794;
	mov.f32 	%f796, 0f3f800000;   	// 1
	min.ftz.f32 	%f5, %f795, %f796;
	mov.f32 	%f797, 0f00000000;   	// 0
	max.ftz.f32 	%f798, %f6, %f797;
	mov.f32 	%f799, 0f3f800000;   	// 1
	min.ftz.f32 	%f6, %f798, %f799;
	mov.f32 	%f800, 0f00000000;   	// 0
	max.ftz.f32 	%f801, %f7, %f800;
	mov.f32 	%f802, 0f3f800000;   	// 1
	min.ftz.f32 	%f7, %f801, %f802;
	mov.f32 	%f803, 0f00000000;   	// 0
	max.ftz.f32 	%f804, %f9, %f803;
	mov.f32 	%f805, 0f3f800000;   	// 1
	min.ftz.f32 	%f9, %f804, %f805;
	bra.uni 	$Lt_49_5634;
$Lt_49_9218:
	.loc	38	99	0
	cvt.rn.f32.s32 	%f806, %r6;
	cvt.rn.f32.s32 	%f807, %r14;
	ld.param.f32 	%f808, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+20];
	mul.ftz.f32 	%f809, %f807, %f808;
	setp.lt.ftz.f32 	%p116, %f806, %f809;
	@!%p116 bra 	$Lt_49_5634;
	.loc	22	267	0
	ld.const.f32 	%f810, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f810, %f6;
	ld.const.f32 	%f811, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f812, %f811, %f6;
	ld.const.f32 	%f813, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f814, %f813, %f6;
	ld.const.f32 	%f815, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f815, %f7, %f467;
	ld.const.f32 	%f816, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f817, %f816, %f7, %f812;
	ld.const.f32 	%f818, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f819, %f818, %f7, %f814;
	ld.const.f32 	%f820, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f820, %f5, %f469;
	ld.const.f32 	%f821, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f822, %f821, %f5, %f817;
	ld.const.f32 	%f823, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f824, %f823, %f5, %f819;
	ld.param.f32 	%f825, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+4];
	ld.param.f32 	%f826, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+8];
	fma.rn.ftz.f32 	%f827, %f471, %f826, %f825;
	ld.param.f32 	%f828, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+16];
	mul.ftz.f32 	%f829, %f822, %f828;
	ld.param.f32 	%f830, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+12];
	mul.ftz.f32 	%f831, %f822, %f830;
	mul.ftz.f32 	%f832, %f824, %f830;
	sub.ftz.f32 	%f833, %f832, %f829;
	fma.rn.ftz.f32 	%f834, %f824, %f828, %f831;
	ld.const.f32 	%f835, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f836, %f835, %f833;
	ld.const.f32 	%f837, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f838, %f837, %f827, %f836;
	ld.const.f32 	%f839, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f420, %f839, %f834, %f838;
	.loc	22	268	0
	ld.const.f32 	%f840, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f841, %f840, %f833;
	ld.const.f32 	%f842, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f843, %f842, %f827, %f841;
	ld.const.f32 	%f844, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f426, %f844, %f834, %f843;
	.loc	35	56	0
	ld.const.f32 	%f845, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f846, %f845, %f833;
	ld.const.f32 	%f847, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f848, %f847, %f827, %f846;
	ld.const.f32 	%f849, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f5, %f849, %f834, %f848;
	mov.f32 	%f6, %f420;
	mov.f32 	%f7, %f426;
	bra.uni 	$Lt_49_5634;
$Lt_49_9474:
	.loc	36	46	0
	ld.const.f32 	%f850, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f850, %f6;
	ld.const.f32 	%f851, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f851, %f7, %f467;
	ld.const.f32 	%f852, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f852, %f5, %f469;
	ld.param.f32 	%f481, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+8];
	ld.param.f32 	%f853, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+20];
	sub.ftz.f32 	%f854, %f853, %f481;
	fma.rn.ftz.f32 	%f437, %f471, %f854, %f481;
	.loc	36	47	0
	ld.param.f32 	%f855, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+12];
	ld.param.f32 	%f856, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+24];
	sub.ftz.f32 	%f857, %f856, %f855;
	fma.rn.ftz.f32 	%f441, %f471, %f857, %f855;
	.loc	36	49	0
	ld.param.f32 	%f858, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+28];
	ld.param.f32 	%f456, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+4];
	ld.param.f32 	%f859, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter1+16];
	sub.ftz.f32 	%f860, %f859, %f456;
	fma.rn.ftz.f32 	%f861, %f471, %f860, %f456;
	sub.ftz.f32 	%f862, %f861, %f5;
	fma.rn.ftz.f32 	%f5, %f858, %f862, %f5;
	.loc	36	50	0
	sub.ftz.f32 	%f863, %f437, %f6;
	fma.rn.ftz.f32 	%f6, %f858, %f863, %f6;
	.loc	36	51	0
	sub.ftz.f32 	%f864, %f441, %f7;
	fma.rn.ftz.f32 	%f7, %f858, %f864, %f7;
	.loc	38	103	0
	bra.uni 	$Lt_49_5634;
$Lt_49_9730:
	.loc	37	41	0
	sub.s32 	%r528, %r13, %r12;
	sub.s32 	%r12, %r528, 1;
$Lt_49_5634:
	.loc	38	54	0
	ld.param.u32 	%r529, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+0];
	mov.u32 	%r530, 0;
	setp.eq.s32 	%p117, %r529, %r530;
	@%p117 bra 	$Lt_49_258;
	mov.u32 	%r531, 1;
	setp.eq.s32 	%p118, %r529, %r531;
	@%p118 bra 	$Lt_49_770;
	mov.u32 	%r532, 2;
	setp.eq.s32 	%p119, %r529, %r532;
	@%p119 bra 	$Lt_49_1026;
	mov.u32 	%r533, 3;
	setp.eq.s32 	%p120, %r529, %r533;
	@%p120 bra 	$Lt_49_1282;
	mov.u32 	%r534, 4;
	setp.eq.s32 	%p121, %r529, %r534;
	@%p121 bra 	$Lt_49_1538;
	mov.u32 	%r535, 5;
	setp.eq.s32 	%p122, %r529, %r535;
	@%p122 bra 	$Lt_49_1794;
	mov.u32 	%r536, 6;
	setp.eq.s32 	%p123, %r529, %r536;
	@%p123 bra 	$Lt_49_2050;
	mov.u32 	%r537, 7;
	setp.eq.s32 	%p124, %r529, %r537;
	@%p124 bra 	$Lt_49_2306;
	mov.u32 	%r538, 8;
	setp.eq.s32 	%p125, %r529, %r538;
	@%p125 bra 	$Lt_49_2562;
	mov.u32 	%r539, 9;
	setp.eq.s32 	%p126, %r529, %r539;
	@%p126 bra 	$Lt_49_2818;
	mov.u32 	%r540, 10;
	setp.eq.s32 	%p127, %r529, %r540;
	@%p127 bra 	$Lt_49_3074;
	mov.u32 	%r541, 11;
	setp.eq.s32 	%p128, %r529, %r541;
	@%p128 bra 	$Lt_49_3330;
	mov.u32 	%r542, 12;
	setp.eq.s32 	%p129, %r529, %r542;
	@%p129 bra 	$Lt_49_3586;
	mov.u32 	%r543, 13;
	setp.eq.s32 	%p130, %r529, %r543;
	@%p130 bra 	$Lt_49_3842;
	mov.u32 	%r544, 14;
	setp.eq.s32 	%p131, %r529, %r544;
	@%p131 bra 	$Lt_49_4098;
	mov.u32 	%r545, 15;
	setp.eq.s32 	%p132, %r529, %r545;
	@%p132 bra 	$Lt_49_4354;
	mov.u32 	%r546, 16;
	setp.eq.s32 	%p133, %r529, %r546;
	@%p133 bra 	$Lt_49_4610;
	bra.uni 	$Lt_49_514;
$Lt_49_258:
	.loc	21	42	0
	cvt.ftz.sat.f32.f32 	%f9, %f9;
	ld.param.f32 	%f865, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+8];
	mov.f32 	%f866, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p134, %f865, %f866;
	ld.param.f32 	%f867, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+12];
	mov.f32 	%f868, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p135, %f867, %f868;
	ld.param.f32 	%f869, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+4];
	ld.param.f32 	%f870, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+16];
	mov.f32 	%f871, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p136, %f870, %f871;
	@!%p136 bra 	$Lt_49_179970;
	.loc	21	45	0
	mov.f32 	%f872, %f869;
	mul.ftz.f32 	%f873, %f872, %f9;
	selp.f32 	%f874, %f872, %f873, %p134;
	.loc	21	53	0
	mov.f32 	%f875, 0f3f800000;   	// 1
	sub.ftz.f32 	%f876, %f875, %f874;
	selp.f32 	%f9, %f876, %f874, %p135;
	.loc	21	57	0
	mov.f32 	%f5, %f9;
	mov.f32 	%f6, %f9;
	mov.f32 	%f7, %f9;
	bra.uni 	$Lt_49_514;
$Lt_49_179970:
	@!%p134 bra 	$Lt_49_180482;
	.loc	21	61	0
	mov.f32 	%f872, %f869;
	mov.f32 	%f877, 0f3f800000;   	// 1
	sub.ftz.f32 	%f878, %f877, %f872;
	selp.f32 	%f9, %f878, %f872, %p135;
	bra.uni 	$Lt_49_514;
$Lt_49_180482:
	.loc	21	69	0
	mov.f32 	%f879, 0f3f800000;   	// 1
	sub.ftz.f32 	%f880, %f879, %f9;
	selp.f32 	%f881, %f880, %f9, %p135;
	.loc	21	73	0
	mul.ftz.f32 	%f9, %f881, %f869;
	bra.uni 	$Lt_49_514;
$Lt_49_770:
	.loc	22	267	0
	ld.const.f32 	%f882, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f883, %f882, %f6;
	ld.const.f32 	%f884, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f885, %f884, %f7, %f883;
	ld.const.f32 	%f886, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f887, %f886, %f5, %f885;
	ld.const.f32 	%f888, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f889, %f888, %f887;
	.loc	22	268	0
	ld.const.f32 	%f890, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f891, %f890, %f887;
	.loc	23	44	0
	ld.const.f32 	%f892, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f892, %f887;
	mov.f32 	%f6, %f889;
	mov.f32 	%f7, %f891;
	.loc	38	61	0
	bra.uni 	$Lt_49_514;
$Lt_49_1026:
	.loc	38	63	0
	ld.param.f32 	%f893, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+20];
	ld.param.f32 	%f894, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+12];
	ld.param.f32 	%f895, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+4];
	mov.f32 	%f896, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p137, %f895, %f896;
	@!%p137 bra 	$Lt_49_180994;
	.loc	24	44	0
	ld.param.f32 	%f897, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+8];
	sub.ftz.f32 	%f898, %f7, %f894;
	fma.rn.ftz.f32 	%f7, %f897, %f898, %f893;
	.loc	24	45	0
	sub.ftz.f32 	%f899, %f6, %f894;
	fma.rn.ftz.f32 	%f6, %f897, %f899, %f893;
	.loc	24	46	0
	sub.ftz.f32 	%f900, %f5, %f894;
	fma.rn.ftz.f32 	%f5, %f897, %f900, %f893;
	bra.uni 	$Lt_49_514;
$Lt_49_180994:
	.loc	24	50	0
	ld.param.f32 	%f901, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+24];
	setp.gt.ftz.f32 	%p138, %f7, %f894;
	selp.f32 	%f7, %f901, %f893, %p138;
	.loc	24	51	0
	setp.gt.ftz.f32 	%p139, %f6, %f894;
	selp.f32 	%f6, %f901, %f893, %p139;
	.loc	24	52	0
	setp.gt.ftz.f32 	%p140, %f5, %f894;
	selp.f32 	%f5, %f901, %f893, %p140;
	bra.uni 	$Lt_49_514;
$Lt_49_1282:
	.loc	25	47	0
	ld.param.f32 	%f902, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+4];
	mul.ftz.f32 	%f5, %f902, %f5;
	ld.param.f32 	%f903, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+8];
	mul.ftz.f32 	%f6, %f903, %f6;
	ld.param.f32 	%f904, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+12];
	mul.ftz.f32 	%f7, %f904, %f7;
	.loc	38	67	0
	bra.uni 	$Lt_49_514;
$Lt_49_1538:
	.loc	26	48	0
	ld.param.f32 	%f905, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+4];
	mov.f32 	%f906, 0f00000000;   	// 0
	setp.eq.ftz.f32 	%p141, %f905, %f906;
	ld.param.f32 	%f907, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+8];
	mov.f32 	%f908, 0f00000000;   	// 0
	max.ftz.f32 	%f909, %f5, %f908;
	mov.f32 	%f910, 0f3f800000;   	// 1
	min.ftz.f32 	%f911, %f909, %f910;
	ld.param.f32 	%f912, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+12];
	sub.ftz.f32 	%f913, %f911, %f912;
	abs.ftz.f32 	%f914, %f913;
	mov.f32 	%f915, 0f00000000;   	// 0
	max.ftz.f32 	%f916, %f6, %f915;
	mov.f32 	%f917, 0f3f800000;   	// 1
	min.ftz.f32 	%f918, %f916, %f917;
	ld.param.f32 	%f919, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+16];
	sub.ftz.f32 	%f920, %f918, %f919;
	abs.ftz.f32 	%f921, %f920;
	mov.f32 	%f922, 0f00000000;   	// 0
	max.ftz.f32 	%f923, %f7, %f922;
	mov.f32 	%f924, 0f3f800000;   	// 1
	min.ftz.f32 	%f925, %f923, %f924;
	ld.param.f32 	%f926, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+20];
	sub.ftz.f32 	%f927, %f925, %f926;
	abs.ftz.f32 	%f928, %f927;
	mov.f32 	%f929, 0f00000000;   	// 0
	max.ftz.f32 	%f930, %f9, %f929;
	mov.f32 	%f931, 0f3f800000;   	// 1
	min.ftz.f32 	%f932, %f930, %f931;
	sub.ftz.f32 	%f933, %f932, %f9;
	abs.ftz.f32 	%f934, %f933;
	max.ftz.f32 	%f935, %f928, %f934;
	max.ftz.f32 	%f936, %f921, %f935;
	max.ftz.f32 	%f937, %f914, %f936;
	setp.ge.ftz.f32 	%p142, %f907, %f937;
	xor.pred 	%p143, %p141, %p142;
	@!%p143 bra 	$Lt_49_514;
	.loc	22	267	0
	ld.const.f32 	%f938, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f883, %f938, %f6;
	ld.const.f32 	%f939, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f885, %f939, %f7, %f883;
	ld.const.f32 	%f940, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f887, %f940, %f5, %f885;
	ld.const.f32 	%f941, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f942, %f941, %f887;
	.loc	22	268	0
	ld.const.f32 	%f943, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f944, %f943, %f887;
	.loc	23	44	0
	ld.const.f32 	%f945, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f945, %f887;
	mov.f32 	%f6, %f942;
	mov.f32 	%f7, %f944;
	bra.uni 	$Lt_49_514;
$Lt_49_1794:
	.loc	27	48	0
	ld.param.f32 	%f946, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+8];
	mov.f32 	%f947, 0f00000000;   	// 0
	max.ftz.f32 	%f948, %f5, %f947;
	mov.f32 	%f949, 0f3f800000;   	// 1
	min.ftz.f32 	%f950, %f948, %f949;
	ld.param.f32 	%f951, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+12];
	sub.ftz.f32 	%f952, %f950, %f951;
	abs.ftz.f32 	%f953, %f952;
	mov.f32 	%f954, 0f00000000;   	// 0
	max.ftz.f32 	%f955, %f6, %f954;
	mov.f32 	%f956, 0f3f800000;   	// 1
	min.ftz.f32 	%f957, %f955, %f956;
	ld.param.f32 	%f958, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+16];
	sub.ftz.f32 	%f959, %f957, %f958;
	abs.ftz.f32 	%f960, %f959;
	mov.f32 	%f961, 0f00000000;   	// 0
	max.ftz.f32 	%f962, %f7, %f961;
	mov.f32 	%f963, 0f3f800000;   	// 1
	min.ftz.f32 	%f964, %f962, %f963;
	ld.param.f32 	%f965, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+20];
	sub.ftz.f32 	%f966, %f964, %f965;
	abs.ftz.f32 	%f967, %f966;
	mov.f32 	%f968, 0f00000000;   	// 0
	max.ftz.f32 	%f969, %f9, %f968;
	mov.f32 	%f970, 0f3f800000;   	// 1
	min.ftz.f32 	%f971, %f969, %f970;
	sub.ftz.f32 	%f972, %f971, %f9;
	abs.ftz.f32 	%f973, %f972;
	max.ftz.f32 	%f974, %f967, %f973;
	max.ftz.f32 	%f975, %f960, %f974;
	max.ftz.f32 	%f976, %f953, %f975;
	setp.ge.ftz.f32 	%p144, %f946, %f976;
	@!%p144 bra 	$Lt_49_514;
	.loc	27	51	0
	ld.param.f32 	%f977, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+24];
	mov.f32 	%f126, %f977;
	ld.param.f32 	%f978, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+28];
	mov.f32 	%f128, %f978;
	ld.param.f32 	%f979, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+32];
	mov.f32 	%f130, %f979;
	ld.param.f32 	%f980, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+4];
	mov.f32 	%f981, 0f00000000;   	// 0
	setp.eq.ftz.f32 	%p145, %f980, %f981;
	@!%p145 bra 	$Lt_49_182274;
	.loc	27	60	0
	ld.const.f32 	%f982, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f883, %f982, %f6;
	ld.const.f32 	%f983, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f885, %f983, %f7, %f883;
	ld.const.f32 	%f984, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f887, %f984, %f5, %f885;
	mul.ftz.f32 	%f126, %f887, %f977;
	.loc	27	61	0
	mul.ftz.f32 	%f128, %f887, %f978;
	.loc	27	62	0
	mul.ftz.f32 	%f130, %f887, %f979;
$Lt_49_182274:
	.loc	27	65	0
	mov.f32 	%f5, %f126;
	mov.f32 	%f6, %f128;
	mov.f32 	%f7, %f130;
	bra.uni 	$Lt_49_514;
$Lt_49_2050:
	.loc	28	47	0
	sub.s32 	%r547, %r13, %r12;
	sub.s32 	%r548, %r14, %r6;
	cvt.rn.f32.s32 	%f985, %r6;
	cvt.rn.f32.s32 	%f986, %r12;
	cvt.rn.f32.s32 	%f138, %r547;
	cvt.rn.f32.s32 	%f139, %r548;
	ld.param.f32 	%f872, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+4];
	sub.ftz.f32 	%f987, %f872, %f985;
	sub.ftz.f32 	%f988, %f872, %f986;
	sub.ftz.f32 	%f989, %f872, %f138;
	sub.ftz.f32 	%f990, %f872, %f139;
	cvt.rzi.ftz.s32.f32 	%r549, %f987;
	cvt.rzi.ftz.s32.f32 	%r550, %f988;
	cvt.rzi.ftz.s32.f32 	%r551, %f989;
	cvt.rzi.ftz.s32.f32 	%r552, %f990;
	max.s32 	%r553, %r549, %r550;
	max.s32 	%r554, %r552, %r553;
	max.s32 	%r555, %r551, %r554;
	mov.u32 	%r556, 0;
	setp.le.s32 	%p146, %r555, %r556;
	@%p146 bra 	$Lt_49_514;
	.loc	28	51	0
	cvt.rn.f32.s32 	%f991, %r555;
	sub.ftz.f32 	%f992, %f872, %f991;
	div.approx.ftz.f32 	%f993, %f992, %f872;
	mul.ftz.f32 	%f9, %f9, %f993;
	bra.uni 	$Lt_49_514;
$Lt_49_2306:
	.loc	30	50	0
	ld.const.f32 	%f994, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f883, %f994, %f6;
	ld.param.f32 	%f995, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+12];
	ld.param.f32 	%f897, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+8];
	sub.ftz.f32 	%f996, %f995, %f897;
	ld.const.f32 	%f997, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f885, %f997, %f7, %f883;
	ld.param.f32 	%f998, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+16];
	mul.ftz.f32 	%f999, %f998, %f996;
	ld.const.f32 	%f1000, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f887, %f1000, %f5, %f885;
	add.ftz.f32 	%f1001, %f999, %f995;
	sub.ftz.f32 	%f1002, %f897, %f999;
	mov.f32 	%f1003, 0f00000000;  	// 0
	max.ftz.f32 	%f1004, %f1001, %f1003;
	mov.f32 	%f1005, 0f00000000;  	// 0
	max.ftz.f32 	%f1006, %f1002, %f1005;
	mov.f32 	%f1007, 0f3f800000;  	// 1
	min.ftz.f32 	%f1008, %f1004, %f1007;
	mov.f32 	%f1009, 0f3f800000;  	// 1
	min.ftz.f32 	%f1010, %f1006, %f1009;
	set.gt.ftz.u32.f32 	%r557, %f1010, %f887;
	neg.s32 	%r558, %r557;
	set.le.ftz.u32.f32 	%r559, %f1008, %f887;
	neg.s32 	%r560, %r559;
	or.b32 	%r561, %r558, %r560;
	mov.u32 	%r562, 0;
	setp.eq.s32 	%p147, %r561, %r562;
	@%p147 bra 	$Lt_49_183554;
	mov.f32 	%f164, 0f00000000;   	// 0
	bra.uni 	$Lt_49_184322;
$Lt_49_183554:
	add.ftz.f32 	%f1011, %f999, %f897;
	mov.f32 	%f1012, 0f00000000;  	// 0
	max.ftz.f32 	%f1013, %f1011, %f1012;
	mov.f32 	%f1014, 0f3f800000;  	// 1
	min.ftz.f32 	%f1015, %f1013, %f1014;
	set.le.ftz.u32.f32 	%r563, %f1015, %f887;
	neg.s32 	%r564, %r563;
	sub.ftz.f32 	%f1016, %f995, %f999;
	mov.f32 	%f1017, 0f00000000;  	// 0
	max.ftz.f32 	%f1018, %f1016, %f1017;
	mov.f32 	%f1019, 0f3f800000;  	// 1
	min.ftz.f32 	%f1020, %f1018, %f1019;
	set.lt.ftz.u32.f32 	%r565, %f887, %f1020;
	neg.s32 	%r566, %r565;
	and.b32 	%r567, %r564, %r566;
	mov.u32 	%r568, 0;
	setp.eq.s32 	%p148, %r567, %r568;
	@%p148 bra 	$Lt_49_184066;
	mov.f32 	%f164, 0f3f800000;   	// 1
	bra.uni 	$Lt_49_184322;
$Lt_49_184066:
	add.ftz.f32 	%f1021, %f999, %f999;
	setp.gt.ftz.f32 	%p149, %f1015, %f887;
	@!%p149 bra 	$Lt_49_184578;
	.loc	30	62	0
	sub.ftz.f32 	%f1022, %f887, %f1010;
	div.approx.ftz.f32 	%f164, %f1022, %f1021;
	bra.uni 	$Lt_49_184322;
$Lt_49_184578:
	.loc	30	66	0
	sub.ftz.f32 	%f1023, %f1008, %f887;
	div.approx.ftz.f32 	%f164, %f1023, %f1021;
$Lt_49_184322:
$Lt_49_183810:
$Lt_49_183298:
	.loc	30	69	0
	mov.f32 	%f1024, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1025, %f1024, %f164;
	ld.param.f32 	%f1026, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+4];
	mov.f32 	%f1027, 0f00000000;  	// 0
	setp.neu.ftz.f32 	%p150, %f1026, %f1027;
	selp.f32 	%f164, %f1025, %f164, %p150;
	.loc	30	77	0
	ld.const.f32 	%f1028, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f1028, %f164;
	ld.const.f32 	%f1029, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f6, %f1029, %f164;
	ld.const.f32 	%f1030, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f7, %f1030, %f164;
	.loc	38	79	0
	bra.uni 	$Lt_49_514;
$Lt_49_2562:
	.loc	38	80	0
	ld.param.f32 	%f1031, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+44];
	mov.f32 	%f1032, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p151, %f1031, %f1032;
	@!%p151 bra 	$L_49_155394;
	cvt.rn.f32.s32 	%f1033, %r6;
	cvt.rn.f32.s32 	%f1034, %r14;
	mul.ftz.f32 	%f1035, %f1034, %f1031;
	setp.lt.ftz.f32 	%p152, %f1033, %f1035;
	@%p152 bra 	$L_49_155138;
$L_49_155394:
	mov.f32 	%f1036, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p153, %f1031, %f1036;
	@!%p153 bra 	$Lt_49_514;
	cvt.rn.f32.s32 	%f1037, %r12;
	cvt.rn.f32.s32 	%f1038, %r13;
	mul.ftz.f32 	%f1039, %f1038, %f1031;
	neg.ftz.f32 	%f1040, %f1039;
	setp.lt.ftz.f32 	%p154, %f1037, %f1040;
	@!%p154 bra 	$Lt_49_514;
$L_49_155138:
	.loc	31	47	0
	ld.const.f32 	%f1041, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1042, %f1041, %f6;
	ld.const.f32 	%f1043, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1044, %f1043, %f7, %f1042;
	ld.const.f32 	%f1045, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f887, %f1045, %f5, %f1044;
	mov.f32 	%f200, %f887;
	ld.param.f32 	%f897, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+8];
	ld.param.f32 	%f1046, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+4];
	sub.ftz.f32 	%f1047, %f1046, %f897;
	ld.param.f32 	%f1048, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+16];
	ld.param.f32 	%f1049, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+12];
	sub.ftz.f32 	%f1050, %f1049, %f1048;
	mov.f32 	%f1051, 0f3f800000;  	// 1
	set.neu.ftz.u32.f32 	%r569, %f1050, %f1051;
	neg.s32 	%r570, %r569;
	mov.f32 	%f1052, 0f3f800000;  	// 1
	set.neu.ftz.u32.f32 	%r571, %f1047, %f1052;
	neg.s32 	%r572, %r571;
	or.b32 	%r573, %r570, %r572;
	mov.u32 	%r574, 0;
	setp.eq.s32 	%p155, %r573, %r574;
	@%p155 bra 	$Lt_49_184834;
	.loc	20	143	0
	mov.s32 	%r575, 1;
	sub.s32 	%r79, %r575, %r5;
	shr.u32 	%r80, %r11, 13;
	sub.u32 	%r81, %r5, %r11;
	sub.u32 	%r82, %r79, %r11;
	xor.b32 	%r83, %r82, %r80;
	shl.b32 	%r84, %r83, 8;
	sub.u32 	%r85, %r81, %r83;
	sub.u32 	%r86, %r11, %r83;
	xor.b32 	%r87, %r85, %r84;
	shr.u32 	%r88, %r87, 13;
	sub.u32 	%r89, %r86, %r87;
	sub.u32 	%r90, %r83, %r87;
	xor.b32 	%r91, %r89, %r88;
	shr.u32 	%r92, %r91, 12;
	sub.u32 	%r93, %r90, %r91;
	xor.b32 	%r94, %r93, %r92;
	sub.u32 	%r576, %r87, %r91;
	sub.u32 	%r96, %r576, %r94;
	shl.b32 	%r577, %r94, 16;
	xor.b32 	%r98, %r96, %r577;
	.loc	20	144	0
	sub.u32 	%r99, %r91, %r94;
	sub.u32 	%r100, %r99, %r98;
	shr.u32 	%r578, %r98, 5;
	xor.b32 	%r102, %r100, %r578;
	.loc	20	145	0
	sub.u32 	%r103, %r94, %r98;
	sub.u32 	%r104, %r103, %r102;
	shr.u32 	%r579, %r102, 3;
	xor.b32 	%r106, %r104, %r579;
	.loc	20	146	0
	sub.u32 	%r107, %r98, %r102;
	sub.u32 	%r108, %r107, %r106;
	shl.b32 	%r580, %r106, 10;
	xor.b32 	%r110, %r108, %r580;
	.loc	20	147	0
	sub.u32 	%r111, %r102, %r106;
	sub.u32 	%r112, %r111, %r110;
	shr.u32 	%r581, %r110, 15;
	xor.b32 	%r114, %r112, %r581;
	.loc	31	57	0
	mov.f32 	%f1053, 0f3b270d73;  	// 0.00254902
	mul.lo.u32 	%r582, %r114, 1103515245;
	add.u32 	%r583, %r582, 12345;
	shr.u32 	%r584, %r583, 16;
	and.b32 	%r585, %r584, 255;
	shl.b32 	%r586, %r585, 7;
	mul.lo.u32 	%r587, %r114, -1029531031;
	sub.u32 	%r588, %r587, 740551042;
	shr.u32 	%r589, %r588, 16;
	and.b32 	%r590, %r589, 255;
	xor.b32 	%r591, %r586, %r590;
	cvt.rn.f32.s32 	%f1054, %r591;
	mov.f32 	%f1055, 0f467ffe00;  	// 16383.5
	div.approx.ftz.f32 	%f1056, %f1054, %f1055;
	mov.f32 	%f1057, 0fbf800000;  	// -1
	add.ftz.f32 	%f1058, %f1056, %f1057;
	fma.rn.ftz.f32 	%f200, %f1053, %f1058, %f887;
$Lt_49_184834:
	sub.ftz.f32 	%f1059, %f200, %f897;
	ld.param.f32 	%f1060, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+20];
	mov.f32 	%f1061, 0f3f800000;  	// 1
	setp.neu.ftz.f32 	%p156, %f1060, %f1061;
	@!%p156 bra 	$Lt_49_185602;
	mov.f32 	%f1062, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p157, %f1059, %f1062;
	@!%p157 bra 	$Lt_49_186114;
	.loc	31	66	0
	mov.f32 	%f200, %f1048;
	bra.uni 	$Lt_49_185346;
$Lt_49_186114:
	.loc	31	70	0
	rcp.approx.ftz.f32 	%f1063, %f1047;
	mul.ftz.f32 	%f1064, %f1059, %f1063;
	lg2.approx.ftz.f32 	%f1065, %f1064;
	mul.ftz.f32 	%f1066, %f1060, %f1065;
	ex2.approx.ftz.f32 	%f1067, %f1066;
	fma.rn.ftz.f32 	%f200, %f1050, %f1067, %f1048;
	bra.uni 	$Lt_49_185346;
$Lt_49_185602:
	.loc	31	77	0
	rcp.approx.ftz.f32 	%f1068, %f1047;
	mul.ftz.f32 	%f1069, %f1059, %f1068;
	fma.rn.ftz.f32 	%f200, %f1050, %f1069, %f1048;
$Lt_49_185346:
	.loc	22	267	0
	ld.param.f32 	%f1070, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+28];
	sin.approx.ftz.f32 	%f1071, %f1070;
	ld.const.f32 	%f1072, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f1073, %f1072, %f6;
	ld.const.f32 	%f1074, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f1075, %f1074, %f6;
	cos.approx.ftz.f32 	%f1076, %f1070;
	ld.const.f32 	%f1077, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f1078, %f1077, %f7, %f1073;
	ld.const.f32 	%f1079, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f1080, %f1079, %f7, %f1075;
	ld.const.f32 	%f1081, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f1082, %f1081, %f5, %f1078;
	ld.const.f32 	%f1083, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f1084, %f1083, %f5, %f1080;
	mul.ftz.f32 	%f1085, %f1071, %f1082;
	mul.ftz.f32 	%f1086, %f1076, %f1082;
	mul.ftz.f32 	%f1087, %f1084, %f1076;
	sub.ftz.f32 	%f1088, %f1087, %f1085;
	fma.rn.ftz.f32 	%f1089, %f1084, %f1071, %f1086;
	ld.param.f32 	%f1090, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+32];
	sub.ftz.f32 	%f1091, %f1090, %f1088;
	ld.param.f32 	%f1092, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+36];
	sub.ftz.f32 	%f1093, %f1092, %f1089;
	ld.param.f32 	%f1094, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+40];
	fma.rn.ftz.f32 	%f1095, %f1094, %f1091, %f1088;
	fma.rn.ftz.f32 	%f1096, %f1094, %f1093, %f1089;
	ld.param.f32 	%f1097, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+24];
	mul.ftz.f32 	%f1098, %f1095, %f1097;
	mul.ftz.f32 	%f1099, %f1096, %f1097;
	ld.const.f32 	%f1100, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f1101, %f1100, %f1098;
	ld.const.f32 	%f1102, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f1103, %f1102, %f200, %f1101;
	ld.const.f32 	%f1104, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f260, %f1104, %f1099, %f1103;
	.loc	22	268	0
	ld.const.f32 	%f1105, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f1106, %f1105, %f1098;
	ld.const.f32 	%f1107, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f1108, %f1107, %f200, %f1106;
	ld.const.f32 	%f1109, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f266, %f1109, %f1099, %f1108;
	.loc	31	92	0
	ld.const.f32 	%f1110, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f1111, %f1110, %f1098;
	ld.const.f32 	%f1112, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f1113, %f1112, %f200, %f1111;
	ld.const.f32 	%f1114, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f5, %f1114, %f1099, %f1113;
	mov.f32 	%f6, %f260;
	mov.f32 	%f7, %f266;
	bra.uni 	$Lt_49_514;
$Lt_49_2818:
	.loc	38	83	0
	ld.param.f32 	%f1115, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+44];
	mov.f32 	%f1116, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p158, %f1115, %f1116;
	@!%p158 bra 	$L_49_156162;
	cvt.rn.f32.s32 	%f1117, %r6;
	cvt.rn.f32.s32 	%f1118, %r14;
	mul.ftz.f32 	%f1119, %f1118, %f1115;
	setp.lt.ftz.f32 	%p159, %f1117, %f1119;
	@%p159 bra 	$L_49_155906;
$L_49_156162:
	mov.f32 	%f1120, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p160, %f1115, %f1120;
	@!%p160 bra 	$Lt_49_514;
	cvt.rn.f32.s32 	%f1121, %r12;
	cvt.rn.f32.s32 	%f1122, %r13;
	mul.ftz.f32 	%f1123, %f1122, %f1115;
	neg.ftz.f32 	%f1124, %f1123;
	setp.lt.ftz.f32 	%p161, %f1121, %f1124;
	@!%p161 bra 	$Lt_49_514;
$L_49_155906:
	.loc	31	110	0
	ld.const.f32 	%f1125, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f883, %f1125, %f6;
	ld.param.f32 	%f897, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+8];
	ld.param.f32 	%f1126, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+4];
	sub.ftz.f32 	%f1127, %f1126, %f897;
	ld.param.f32 	%f1128, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+16];
	ld.param.f32 	%f1129, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+12];
	sub.ftz.f32 	%f1130, %f1129, %f1128;
	ld.const.f32 	%f1131, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f885, %f1131, %f7, %f883;
	ld.const.f32 	%f1132, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f887, %f1132, %f5, %f885;
	sub.ftz.f32 	%f1133, %f887, %f897;
	ld.param.f32 	%f1134, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+20];
	mov.f32 	%f1135, 0f3f800000;  	// 1
	setp.neu.ftz.f32 	%p162, %f1134, %f1135;
	@!%p162 bra 	$Lt_49_186626;
	.loc	42	523	0
	mov.f32 	%f1136, 0f00000000;  	// 0
	max.ftz.f32 	%f1137, %f1133, %f1136;
	div.approx.ftz.f32 	%f1138, %f1137, %f1127;
	lg2.approx.ftz.f32 	%f1139, %f1138;
	mul.ftz.f32 	%f1140, %f1134, %f1139;
	ex2.approx.ftz.f32 	%f298, %f1140;
	.loc	31	120	0
	fma.rn.ftz.f32 	%f299, %f1130, %f298, %f1128;
	bra.uni 	$Lt_49_186370;
$Lt_49_186626:
	.loc	31	129	0
	div.approx.ftz.f32 	%f1141, %f1133, %f1127;
	fma.rn.ftz.f32 	%f299, %f1130, %f1141, %f1128;
$Lt_49_186370:
	.loc	31	135	0
	ld.const.f32 	%f1142, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f1142, %f299;
	ld.const.f32 	%f1143, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f6, %f1143, %f299;
	ld.const.f32 	%f1144, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f7, %f1144, %f299;
	bra.uni 	$Lt_49_514;
$Lt_49_3074:
	.loc	38	86	0
	ld.param.f32 	%f1145, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+44];
	mov.f32 	%f1146, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p163, %f1145, %f1146;
	@!%p163 bra 	$L_49_156930;
	cvt.rn.f32.s32 	%f1147, %r6;
	cvt.rn.f32.s32 	%f1148, %r14;
	mul.ftz.f32 	%f1149, %f1148, %f1145;
	setp.lt.ftz.f32 	%p164, %f1147, %f1149;
	@%p164 bra 	$L_49_156674;
$L_49_156930:
	mov.f32 	%f1150, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p165, %f1145, %f1150;
	@!%p165 bra 	$Lt_49_514;
	cvt.rn.f32.s32 	%f1151, %r12;
	cvt.rn.f32.s32 	%f1152, %r13;
	mul.ftz.f32 	%f1153, %f1152, %f1145;
	neg.ftz.f32 	%f1154, %f1153;
	setp.lt.ftz.f32 	%p166, %f1151, %f1154;
	@!%p166 bra 	$Lt_49_514;
$L_49_156674:
	.loc	31	160	0
	ld.const.f32 	%f5, [k601YPbPr_To_RGB32f+24];
	ld.const.f32 	%f6, [k601YPbPr_To_RGB32f+12];
	ld.const.f32 	%f7, [k601YPbPr_To_RGB32f+0];
	bra.uni 	$Lt_49_514;
$Lt_49_3330:
	.loc	32	42	0
	ld.param.f32 	%f1155, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+4];
	mov.f32 	%f1156, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p167, %f5, %f1156;
	@!%p167 bra 	$Lt_49_187138;
	.loc	22	292	0
	mov.f32 	%f872, %f1155;
	lg2.approx.ftz.f32 	%f1157, %f5;
	mul.ftz.f32 	%f1158, %f872, %f1157;
	ex2.approx.ftz.f32 	%f1159, %f1158;
	bra.uni 	$Lt_49_186882;
$Lt_49_187138:
	mov.f32 	%f872, %f1155;
	neg.ftz.f32 	%f1160, %f5;
	lg2.approx.ftz.f32 	%f1161, %f1160;
	mul.ftz.f32 	%f1162, %f872, %f1161;
	ex2.approx.ftz.f32 	%f1163, %f1162;
	neg.ftz.f32 	%f1159, %f1163;
$Lt_49_186882:
	mov.f32 	%f1164, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p168, %f6, %f1164;
	@!%p168 bra 	$Lt_49_187650;
	.loc	22	293	0
	lg2.approx.ftz.f32 	%f1165, %f6;
	mul.ftz.f32 	%f1166, %f872, %f1165;
	ex2.approx.ftz.f32 	%f1167, %f1166;
	bra.uni 	$Lt_49_187394;
$Lt_49_187650:
	neg.ftz.f32 	%f1168, %f6;
	lg2.approx.ftz.f32 	%f1169, %f1168;
	mul.ftz.f32 	%f1170, %f872, %f1169;
	ex2.approx.ftz.f32 	%f1171, %f1170;
	neg.ftz.f32 	%f1167, %f1171;
$Lt_49_187394:
	mov.f32 	%f1172, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p169, %f7, %f1172;
	@!%p169 bra 	$Lt_49_188162;
	.loc	22	294	0
	lg2.approx.ftz.f32 	%f1173, %f7;
	mul.ftz.f32 	%f1174, %f872, %f1173;
	ex2.approx.ftz.f32 	%f1175, %f1174;
	bra.uni 	$Lt_49_187906;
$Lt_49_188162:
	neg.ftz.f32 	%f1176, %f7;
	lg2.approx.ftz.f32 	%f1177, %f1176;
	mul.ftz.f32 	%f1178, %f872, %f1177;
	ex2.approx.ftz.f32 	%f1179, %f1178;
	neg.ftz.f32 	%f1175, %f1179;
$Lt_49_187906:
	mov.f32 	%f1180, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p170, %f9, %f1180;
	@!%p170 bra 	$Lt_49_188674;
	.loc	22	295	0
	lg2.approx.ftz.f32 	%f1181, %f9;
	mul.ftz.f32 	%f1182, %f872, %f1181;
	ex2.approx.ftz.f32 	%f1183, %f1182;
	bra.uni 	$Lt_49_188418;
$Lt_49_188674:
	neg.ftz.f32 	%f1184, %f9;
	lg2.approx.ftz.f32 	%f1185, %f1184;
	mul.ftz.f32 	%f1186, %f872, %f1185;
	ex2.approx.ftz.f32 	%f1187, %f1186;
	neg.ftz.f32 	%f1183, %f1187;
$Lt_49_188418:
	.loc	32	42	0
	mov.f32 	%f5, %f1159;
	mov.f32 	%f6, %f1167;
	mov.f32 	%f7, %f1175;
	mov.f32 	%f9, %f1183;
	.loc	38	91	0
	bra.uni 	$Lt_49_514;
$Lt_49_3586:
	.loc	33	41	0
	sub.s32 	%r592, %r14, %r6;
	sub.s32 	%r6, %r592, 1;
	.loc	38	94	0
	bra.uni 	$Lt_49_514;
$Lt_49_3842:
	.loc	38	96	0
	ld.param.f32 	%f872, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+4];
	mov.f32 	%f1188, 0f3f000000;  	// 0.5
	mul.ftz.f32 	%f1189, %f872, %f1188;
	ld.param.f32 	%f1190, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+16];
	ld.param.f32 	%f1191, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+8];
	mov.f32 	%f1192, 0f3f800000;  	// 1
	setp.eq.ftz.f32 	%p171, %f1191, %f1192;
	@!%p171 bra 	$Lt_49_189186;
	.loc	20	143	0
	cvt.rzi.ftz.u32.f32 	%r593, %f1190;
	mul.lo.s32 	%r126, %r6, 3;
	shr.u32 	%r594, %r593, 13;
	sub.u32 	%r595, %r12, %r593;
	sub.u32 	%r128, %r126, %r12;
	sub.u32 	%r596, %r128, %r593;
	xor.b32 	%r597, %r596, %r594;
	shl.b32 	%r598, %r597, 8;
	sub.u32 	%r599, %r595, %r597;
	sub.u32 	%r600, %r593, %r597;
	xor.b32 	%r601, %r598, %r599;
	shr.u32 	%r602, %r601, 13;
	sub.u32 	%r603, %r600, %r601;
	sub.u32 	%r604, %r597, %r601;
	xor.b32 	%r605, %r602, %r603;
	shr.u32 	%r606, %r605, 12;
	sub.u32 	%r607, %r604, %r605;
	xor.b32 	%r608, %r606, %r607;
	shl.b32 	%r609, %r608, 16;
	sub.u32 	%r610, %r601, %r605;
	sub.u32 	%r611, %r610, %r608;
	xor.b32 	%r612, %r609, %r611;
	.loc	20	144	0
	sub.u32 	%r613, %r605, %r608;
	sub.u32 	%r614, %r613, %r612;
	shr.u32 	%r615, %r612, 5;
	xor.b32 	%r616, %r614, %r615;
	.loc	20	145	0
	sub.u32 	%r617, %r608, %r612;
	sub.u32 	%r618, %r617, %r616;
	shr.u32 	%r619, %r616, 3;
	xor.b32 	%r155, %r618, %r619;
	.loc	20	146	0
	sub.u32 	%r620, %r612, %r616;
	sub.u32 	%r621, %r620, %r155;
	shl.b32 	%r622, %r155, 10;
	xor.b32 	%r159, %r621, %r622;
	.loc	20	147	0
	sub.u32 	%r623, %r616, %r155;
	sub.u32 	%r624, %r623, %r159;
	shr.u32 	%r625, %r159, 15;
	xor.b32 	%r163, %r624, %r625;
	.loc	34	48	0
	mul.lo.u32 	%r626, %r163, 1103515245;
	add.u32 	%r627, %r626, 12345;
	shr.u32 	%r628, %r627, 16;
	and.b32 	%r629, %r628, 255;
	shl.b32 	%r630, %r629, 7;
	mul.lo.u32 	%r631, %r163, -1029531031;
	sub.u32 	%r632, %r631, 740551042;
	shr.u32 	%r633, %r632, 16;
	and.b32 	%r634, %r633, 255;
	xor.b32 	%r635, %r630, %r634;
	cvt.rn.f32.s32 	%f1193, %r635;
	mov.f32 	%f1194, 0f46fffe00;  	// 32767
	div.approx.ftz.f32 	%f1195, %f1193, %f1194;
	mul.ftz.f32 	%f1196, %f872, %f1195;
	sub.ftz.f32 	%f356, %f1196, %f1189;
	.loc	20	143	0
	add.u32 	%r636, %r596, 1;
	xor.b32 	%r637, %r636, %r594;
	shl.b32 	%r638, %r637, 8;
	sub.u32 	%r639, %r595, %r637;
	sub.u32 	%r640, %r593, %r637;
	xor.b32 	%r641, %r638, %r639;
	shr.u32 	%r642, %r641, 13;
	sub.u32 	%r643, %r640, %r641;
	sub.u32 	%r644, %r637, %r641;
	xor.b32 	%r645, %r642, %r643;
	shr.u32 	%r646, %r645, 12;
	sub.u32 	%r647, %r644, %r645;
	xor.b32 	%r648, %r646, %r647;
	sub.u32 	%r649, %r641, %r645;
	sub.u32 	%r650, %r649, %r648;
	shl.b32 	%r651, %r648, 16;
	xor.b32 	%r652, %r650, %r651;
	.loc	20	144	0
	sub.u32 	%r653, %r645, %r648;
	sub.u32 	%r654, %r653, %r652;
	shr.u32 	%r655, %r652, 5;
	xor.b32 	%r656, %r654, %r655;
	.loc	20	145	0
	sub.u32 	%r657, %r648, %r652;
	sub.u32 	%r658, %r657, %r656;
	shr.u32 	%r659, %r656, 3;
	xor.b32 	%r198, %r658, %r659;
	.loc	20	146	0
	sub.u32 	%r660, %r652, %r656;
	sub.u32 	%r661, %r660, %r198;
	shl.b32 	%r662, %r198, 10;
	xor.b32 	%r202, %r661, %r662;
	.loc	20	147	0
	sub.u32 	%r663, %r656, %r198;
	sub.u32 	%r664, %r663, %r202;
	shr.u32 	%r665, %r202, 15;
	xor.b32 	%r206, %r664, %r665;
	.loc	34	49	0
	mul.lo.u32 	%r666, %r206, 1103515245;
	add.u32 	%r667, %r666, 12345;
	shr.u32 	%r668, %r667, 16;
	and.b32 	%r669, %r668, 255;
	shl.b32 	%r670, %r669, 7;
	mul.lo.u32 	%r671, %r206, -1029531031;
	sub.u32 	%r672, %r671, 740551042;
	shr.u32 	%r673, %r672, 16;
	and.b32 	%r674, %r673, 255;
	xor.b32 	%r675, %r670, %r674;
	cvt.rn.f32.s32 	%f1197, %r675;
	mov.f32 	%f1198, 0f46fffe00;  	// 32767
	div.approx.ftz.f32 	%f1199, %f1197, %f1198;
	mul.ftz.f32 	%f1200, %f872, %f1199;
	sub.ftz.f32 	%f361, %f1200, %f1189;
	.loc	20	143	0
	add.u32 	%r676, %r596, 2;
	xor.b32 	%r677, %r676, %r594;
	shl.b32 	%r678, %r677, 8;
	sub.u32 	%r679, %r595, %r677;
	sub.u32 	%r680, %r593, %r677;
	xor.b32 	%r681, %r678, %r679;
	shr.u32 	%r682, %r681, 13;
	sub.u32 	%r683, %r680, %r681;
	sub.u32 	%r684, %r677, %r681;
	xor.b32 	%r685, %r682, %r683;
	shr.u32 	%r686, %r685, 12;
	sub.u32 	%r687, %r684, %r685;
	xor.b32 	%r688, %r686, %r687;
	sub.u32 	%r689, %r681, %r685;
	sub.u32 	%r690, %r689, %r688;
	shl.b32 	%r691, %r688, 16;
	xor.b32 	%r692, %r690, %r691;
	.loc	20	144	0
	sub.u32 	%r693, %r685, %r688;
	sub.u32 	%r694, %r693, %r692;
	shr.u32 	%r695, %r692, 5;
	xor.b32 	%r696, %r694, %r695;
	.loc	20	145	0
	sub.u32 	%r697, %r688, %r692;
	sub.u32 	%r698, %r697, %r696;
	shr.u32 	%r699, %r696, 3;
	xor.b32 	%r241, %r698, %r699;
	.loc	20	146	0
	sub.u32 	%r700, %r692, %r696;
	sub.u32 	%r701, %r700, %r241;
	shl.b32 	%r702, %r241, 10;
	xor.b32 	%r245, %r701, %r702;
	.loc	20	147	0
	sub.u32 	%r703, %r696, %r241;
	sub.u32 	%r704, %r703, %r245;
	shr.u32 	%r705, %r245, 15;
	xor.b32 	%r249, %r704, %r705;
	.loc	34	50	0
	mul.lo.u32 	%r706, %r249, 1103515245;
	add.u32 	%r707, %r706, 12345;
	shr.u32 	%r708, %r707, 16;
	and.b32 	%r709, %r708, 255;
	shl.b32 	%r710, %r709, 7;
	mul.lo.u32 	%r711, %r249, -1029531031;
	sub.u32 	%r712, %r711, 740551042;
	shr.u32 	%r713, %r712, 16;
	and.b32 	%r714, %r713, 255;
	xor.b32 	%r715, %r710, %r714;
	cvt.rn.f32.s32 	%f1201, %r715;
	mov.f32 	%f1202, 0f46fffe00;  	// 32767
	div.approx.ftz.f32 	%f1203, %f1201, %f1202;
	mul.ftz.f32 	%f1204, %f872, %f1203;
	sub.ftz.f32 	%f366, %f1204, %f1189;
	bra.uni 	$Lt_49_188930;
$Lt_49_189186:
	.loc	20	143	0
	cvt.rzi.ftz.u32.f32 	%r716, %f1190;
	sub.u32 	%r717, %r6, %r12;
	shr.u32 	%r718, %r716, 13;
	sub.u32 	%r719, %r12, %r716;
	sub.u32 	%r720, %r717, %r716;
	xor.b32 	%r721, %r720, %r718;
	shl.b32 	%r722, %r721, 8;
	sub.u32 	%r723, %r719, %r721;
	sub.u32 	%r724, %r716, %r721;
	xor.b32 	%r725, %r722, %r723;
	shr.u32 	%r726, %r725, 13;
	sub.u32 	%r727, %r724, %r725;
	sub.u32 	%r728, %r721, %r725;
	xor.b32 	%r729, %r726, %r727;
	shr.u32 	%r730, %r729, 12;
	sub.u32 	%r731, %r728, %r729;
	xor.b32 	%r732, %r730, %r731;
	shl.b32 	%r733, %r732, 16;
	sub.u32 	%r734, %r725, %r729;
	sub.u32 	%r735, %r734, %r732;
	xor.b32 	%r736, %r733, %r735;
	.loc	20	144	0
	sub.u32 	%r737, %r729, %r732;
	sub.u32 	%r738, %r737, %r736;
	shr.u32 	%r739, %r736, 5;
	xor.b32 	%r740, %r738, %r739;
	.loc	20	145	0
	sub.u32 	%r741, %r732, %r736;
	sub.u32 	%r742, %r741, %r740;
	shr.u32 	%r743, %r740, 3;
	xor.b32 	%r287, %r742, %r743;
	.loc	20	146	0
	sub.u32 	%r744, %r736, %r740;
	sub.u32 	%r745, %r744, %r287;
	shl.b32 	%r746, %r287, 10;
	xor.b32 	%r291, %r745, %r746;
	.loc	20	147	0
	sub.u32 	%r747, %r740, %r287;
	sub.u32 	%r748, %r747, %r291;
	shr.u32 	%r749, %r291, 15;
	xor.b32 	%r295, %r748, %r749;
	.loc	34	54	0
	mul.lo.u32 	%r296, %r295, 1103515245;
	mul.lo.u32 	%r297, %r295, -1029531031;
	add.u32 	%r298, %r296, 12345;
	sub.u32 	%r299, %r297, 740551042;
	shr.u32 	%r300, %r298, 16;
	shr.u32 	%r301, %r299, 16;
	and.b32 	%r302, %r300, 255;
	and.b32 	%r303, %r301, 255;
	shl.b32 	%r304, %r302, 7;
	xor.b32 	%r305, %r304, %r303;
	cvt.rn.f32.s32 	%f367, %r305;
	mov.f32 	%f1205, 0f46fffe00;  	// 32767
	div.approx.ftz.f32 	%f369, %f367, %f1205;
	mul.ftz.f32 	%f1206, %f872, %f369;
	sub.ftz.f32 	%f1207, %f1206, %f1189;
	mov.f32 	%f366, %f1207;
	mov.f32 	%f361, %f1207;
	mov.f32 	%f356, %f1207;
$Lt_49_188930:
	.loc	34	57	0
	add.ftz.f32 	%f7, %f356, %f7;
	.loc	34	58	0
	add.ftz.f32 	%f6, %f361, %f6;
	.loc	34	59	0
	add.ftz.f32 	%f5, %f5, %f366;
	ld.param.f32 	%f1208, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+12];
	mov.f32 	%f1209, 0f3f800000;  	// 1
	setp.eq.ftz.f32 	%p172, %f1208, %f1209;
	@!%p172 bra 	$Lt_49_514;
	.loc	34	63	0
	mov.f32 	%f1210, 0f00000000;  	// 0
	max.ftz.f32 	%f1211, %f5, %f1210;
	mov.f32 	%f1212, 0f3f800000;  	// 1
	min.ftz.f32 	%f5, %f1211, %f1212;
	mov.f32 	%f1213, 0f00000000;  	// 0
	max.ftz.f32 	%f1214, %f6, %f1213;
	mov.f32 	%f1215, 0f3f800000;  	// 1
	min.ftz.f32 	%f6, %f1214, %f1215;
	mov.f32 	%f1216, 0f00000000;  	// 0
	max.ftz.f32 	%f1217, %f7, %f1216;
	mov.f32 	%f1218, 0f3f800000;  	// 1
	min.ftz.f32 	%f7, %f1217, %f1218;
	mov.f32 	%f1219, 0f00000000;  	// 0
	max.ftz.f32 	%f1220, %f9, %f1219;
	mov.f32 	%f1221, 0f3f800000;  	// 1
	min.ftz.f32 	%f9, %f1220, %f1221;
	bra.uni 	$Lt_49_514;
$Lt_49_4098:
	.loc	38	99	0
	cvt.rn.f32.s32 	%f1222, %r6;
	cvt.rn.f32.s32 	%f1223, %r14;
	ld.param.f32 	%f1224, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+20];
	mul.ftz.f32 	%f1225, %f1223, %f1224;
	setp.lt.ftz.f32 	%p173, %f1222, %f1225;
	@!%p173 bra 	$Lt_49_514;
	.loc	22	267	0
	ld.const.f32 	%f1226, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f883, %f1226, %f6;
	ld.const.f32 	%f1227, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f1228, %f1227, %f6;
	ld.const.f32 	%f1229, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f1230, %f1229, %f6;
	ld.const.f32 	%f1231, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f885, %f1231, %f7, %f883;
	ld.const.f32 	%f1232, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f1233, %f1232, %f7, %f1228;
	ld.const.f32 	%f1234, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f1235, %f1234, %f7, %f1230;
	ld.const.f32 	%f1236, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f887, %f1236, %f5, %f885;
	ld.const.f32 	%f1237, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f1238, %f1237, %f5, %f1233;
	ld.const.f32 	%f1239, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f1240, %f1239, %f5, %f1235;
	ld.param.f32 	%f1241, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+4];
	ld.param.f32 	%f1242, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+8];
	fma.rn.ftz.f32 	%f1243, %f887, %f1242, %f1241;
	ld.param.f32 	%f1244, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+16];
	mul.ftz.f32 	%f1245, %f1238, %f1244;
	ld.param.f32 	%f1246, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+12];
	mul.ftz.f32 	%f1247, %f1238, %f1246;
	mul.ftz.f32 	%f1248, %f1240, %f1246;
	sub.ftz.f32 	%f1249, %f1248, %f1245;
	fma.rn.ftz.f32 	%f1250, %f1240, %f1244, %f1247;
	ld.const.f32 	%f1251, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f1252, %f1251, %f1249;
	ld.const.f32 	%f1253, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f1254, %f1253, %f1243, %f1252;
	ld.const.f32 	%f1255, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f420, %f1255, %f1250, %f1254;
	.loc	22	268	0
	ld.const.f32 	%f1256, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f1257, %f1256, %f1249;
	ld.const.f32 	%f1258, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f1259, %f1258, %f1243, %f1257;
	ld.const.f32 	%f1260, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f426, %f1260, %f1250, %f1259;
	.loc	35	56	0
	ld.const.f32 	%f1261, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f1262, %f1261, %f1249;
	ld.const.f32 	%f1263, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f1264, %f1263, %f1243, %f1262;
	ld.const.f32 	%f1265, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f5, %f1265, %f1250, %f1264;
	mov.f32 	%f6, %f420;
	mov.f32 	%f7, %f426;
	bra.uni 	$Lt_49_514;
$Lt_49_4354:
	.loc	36	46	0
	ld.const.f32 	%f1266, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f883, %f1266, %f6;
	ld.const.f32 	%f1267, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f885, %f1267, %f7, %f883;
	ld.const.f32 	%f1268, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f887, %f1268, %f5, %f885;
	ld.param.f32 	%f897, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+8];
	ld.param.f32 	%f1269, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+20];
	sub.ftz.f32 	%f1270, %f1269, %f897;
	fma.rn.ftz.f32 	%f437, %f887, %f1270, %f897;
	.loc	36	47	0
	ld.param.f32 	%f1271, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+12];
	ld.param.f32 	%f1272, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+24];
	sub.ftz.f32 	%f1273, %f1272, %f1271;
	fma.rn.ftz.f32 	%f441, %f887, %f1273, %f1271;
	.loc	36	49	0
	ld.param.f32 	%f1274, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+28];
	ld.param.f32 	%f872, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+4];
	ld.param.f32 	%f1275, [__cudaparm_PointwiseFilterHostKernel3___val_paraminFilter2+16];
	sub.ftz.f32 	%f1276, %f1275, %f872;
	fma.rn.ftz.f32 	%f1277, %f887, %f1276, %f872;
	sub.ftz.f32 	%f1278, %f1277, %f5;
	fma.rn.ftz.f32 	%f5, %f1274, %f1278, %f5;
	.loc	36	50	0
	sub.ftz.f32 	%f1279, %f437, %f6;
	fma.rn.ftz.f32 	%f6, %f1274, %f1279, %f6;
	.loc	36	51	0
	sub.ftz.f32 	%f1280, %f441, %f7;
	fma.rn.ftz.f32 	%f7, %f1274, %f1280, %f7;
	.loc	38	103	0
	bra.uni 	$Lt_49_514;
$Lt_49_4610:
	.loc	37	41	0
	sub.s32 	%r750, %r13, %r12;
	sub.s32 	%r12, %r750, 1;
$Lt_49_514:
	.loc	38	181	0
	ld.param.s32 	%r751, [__cudaparm_PointwiseFilterHostKernel3_inDestPitch];
	mul.lo.s32 	%r752, %r751, %r12;
	add.s32 	%r753, %r6, %r752;
	cvt.s64.s32 	%rd8, %r753;
	ld.param.u64 	%rd9, [__cudaparm_PointwiseFilterHostKernel3_inDestImage];
	@!%p2 bra 	$Lt_49_190722;
	.loc	20	126	0
	mul.lo.u64 	%rd10, %rd8, 8;
	add.u64 	%rd11, %rd9, %rd10;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f5;
	mov.b32		%r754, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r755, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f7;
	mov.b32		%r756, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f9;
	mov.b32		%r757, %b1; }
	st.global.v4.u16 	[%rd11+0], {%r754,%r755,%r756,%r757};
	.loc	38	183	0
	bra.uni 	$Lt_49_190466;
$Lt_49_190722:
	.loc	20	126	0
	mul.lo.u64 	%rd12, %rd8, 16;
	add.u64 	%rd13, %rd9, %rd12;
	st.global.v4.f32 	[%rd13+0], {%f5,%f6,%f7,%f9};
$Lt_49_190466:
$Lt_49_157186:
	.loc	38	185	0
	exit;
$LDWend_PointwiseFilterHostKernel3:
	} // PointwiseFilterHostKernel3

	.entry PointwiseFilterHostKernel4 (
		.param .u64 __cudaparm_PointwiseFilterHostKernel4_inSrcImage,
		.param .s32 __cudaparm_PointwiseFilterHostKernel4_inSrcPitch,
		.param .u64 __cudaparm_PointwiseFilterHostKernel4_inDestImage,
		.param .s32 __cudaparm_PointwiseFilterHostKernel4_inDestPitch,
		.param .u32 __cudaparm_PointwiseFilterHostKernel4_inDeviceFormat,
		.param .s32 __cudaparm_PointwiseFilterHostKernel4_inWidth,
		.param .s32 __cudaparm_PointwiseFilterHostKernel4_inHeight,
		.param .align 4 .b8 __cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0[48],
		.param .align 4 .b8 __cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1[48],
		.param .align 4 .b8 __cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2[48],
		.param .align 4 .b8 __cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3[48])
	{
	.reg .u32 %r<979>;
	.reg .u64 %rd<15>;
	.reg .f32 %f<1698>;
	.reg .pred %p<232>;
	.loc	38	197	0
$LDWbegin_PointwiseFilterHostKernel4:
	.loc	38	199	0
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	mov.u32 	%r4, %tid.x;
	add.u32 	%r5, %r3, %r4;
	mov.s32 	%r6, %r5;
	.loc	38	200	0
	cvt.s32.u32 	%r7, %ctaid.y;
	cvt.s32.u32 	%r8, %ntid.y;
	mul.lo.s32 	%r9, %r7, %r8;
	mov.u32 	%r10, %tid.y;
	add.u32 	%r11, %r9, %r10;
	mov.s32 	%r12, %r11;
	ld.param.s32 	%r13, [__cudaparm_PointwiseFilterHostKernel4_inHeight];
	ld.param.s32 	%r14, [__cudaparm_PointwiseFilterHostKernel4_inWidth];
	set.gt.u32.s32 	%r15, %r13, %r11;
	neg.s32 	%r16, %r15;
	set.gt.u32.s32 	%r17, %r14, %r5;
	neg.s32 	%r18, %r17;
	and.b32 	%r19, %r16, %r18;
	mov.u32 	%r20, 0;
	setp.eq.s32 	%p1, %r19, %r20;
	@%p1 bra 	$Lt_50_252674;
	ld.param.s32 	%r21, [__cudaparm_PointwiseFilterHostKernel4_inDeviceFormat];
	mov.s32 	%r22, 0;
	setp.eq.s32 	%p2, %r21, %r22;
	ld.param.u64 	%rd1, [__cudaparm_PointwiseFilterHostKernel4_inSrcImage];
	ld.param.s32 	%r23, [__cudaparm_PointwiseFilterHostKernel4_inSrcPitch];
	@!%p2 bra 	$Lt_50_209410;
	.loc	20	115	0
	mul.lo.s32 	%r24, %r23, %r11;
	add.s32 	%r25, %r5, %r24;
	cvt.s64.s32 	%rd2, %r25;
	mul.wide.s32 	%rd3, %r25, 8;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.v4.u16 	{%r26,%r27,%r28,%r29}, [%rd4+0];
	.loc	38	204	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r28;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r29;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_50_209154;
$Lt_50_209410:
	mul.lo.s32 	%r30, %r23, %r11;
	add.s32 	%r31, %r5, %r30;
	cvt.s64.s32 	%rd5, %r31;
	mul.wide.s32 	%rd6, %r31, 16;
	add.u64 	%rd7, %rd1, %rd6;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd7+0];
$Lt_50_209154:
	mov.f32 	%f5, %f1;
	mov.f32 	%f6, %f2;
	mov.f32 	%f7, %f3;
	mov.f32 	%f8, %f4;
	mov.f32 	%f9, %f8;
	.loc	38	54	0
	ld.param.u32 	%r32, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+0];
	mov.u32 	%r33, 0;
	setp.eq.s32 	%p3, %r32, %r33;
	@%p3 bra 	$Lt_50_15618;
	mov.u32 	%r34, 1;
	setp.eq.s32 	%p4, %r32, %r34;
	@%p4 bra 	$Lt_50_16130;
	mov.u32 	%r35, 2;
	setp.eq.s32 	%p5, %r32, %r35;
	@%p5 bra 	$Lt_50_16386;
	mov.u32 	%r36, 3;
	setp.eq.s32 	%p6, %r32, %r36;
	@%p6 bra 	$Lt_50_16642;
	mov.u32 	%r37, 4;
	setp.eq.s32 	%p7, %r32, %r37;
	@%p7 bra 	$Lt_50_16898;
	mov.u32 	%r38, 5;
	setp.eq.s32 	%p8, %r32, %r38;
	@%p8 bra 	$Lt_50_17154;
	mov.u32 	%r39, 6;
	setp.eq.s32 	%p9, %r32, %r39;
	@%p9 bra 	$Lt_50_17410;
	mov.u32 	%r40, 7;
	setp.eq.s32 	%p10, %r32, %r40;
	@%p10 bra 	$Lt_50_17666;
	mov.u32 	%r41, 8;
	setp.eq.s32 	%p11, %r32, %r41;
	@%p11 bra 	$Lt_50_17922;
	mov.u32 	%r42, 9;
	setp.eq.s32 	%p12, %r32, %r42;
	@%p12 bra 	$Lt_50_18178;
	mov.u32 	%r43, 10;
	setp.eq.s32 	%p13, %r32, %r43;
	@%p13 bra 	$Lt_50_18434;
	mov.u32 	%r44, 11;
	setp.eq.s32 	%p14, %r32, %r44;
	@%p14 bra 	$Lt_50_18690;
	mov.u32 	%r45, 12;
	setp.eq.s32 	%p15, %r32, %r45;
	@%p15 bra 	$Lt_50_18946;
	mov.u32 	%r46, 13;
	setp.eq.s32 	%p16, %r32, %r46;
	@%p16 bra 	$Lt_50_19202;
	mov.u32 	%r47, 14;
	setp.eq.s32 	%p17, %r32, %r47;
	@%p17 bra 	$Lt_50_19458;
	mov.u32 	%r48, 15;
	setp.eq.s32 	%p18, %r32, %r48;
	@%p18 bra 	$Lt_50_19714;
	mov.u32 	%r49, 16;
	setp.eq.s32 	%p19, %r32, %r49;
	@%p19 bra 	$Lt_50_19970;
	bra.uni 	$Lt_50_15874;
$Lt_50_15618:
	.loc	38	57	0
	ld.param.f32 	%f10, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+8];
	mov.f32 	%f11, 0f3f800000;    	// 1
	setp.eq.ftz.f32 	%p20, %f10, %f11;
	ld.param.f32 	%f12, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+12];
	mov.f32 	%f13, 0f3f800000;    	// 1
	setp.eq.ftz.f32 	%p21, %f12, %f13;
	ld.param.f32 	%f14, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+4];
	ld.param.f32 	%f15, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+16];
	mov.f32 	%f16, 0f3f800000;    	// 1
	setp.eq.ftz.f32 	%p22, %f15, %f16;
	@!%p22 bra 	$Lt_50_209922;
	.loc	21	53	0
	cvt.ftz.sat.f32.f32 	%f17, %f8;
	mov.f32 	%f18, %f14;
	mul.ftz.f32 	%f19, %f17, %f18;
	selp.f32 	%f20, %f18, %f19, %p20;
	mov.f32 	%f21, 0f3f800000;    	// 1
	sub.ftz.f32 	%f22, %f21, %f20;
	selp.f32 	%f23, %f22, %f20, %p21;
	mov.f32 	%f9, %f23;
	.loc	21	57	0
	mov.f32 	%f5, %f23;
	mov.f32 	%f6, %f23;
	mov.f32 	%f7, %f23;
	bra.uni 	$Lt_50_15874;
$Lt_50_209922:
	@!%p20 bra 	$Lt_50_210434;
	.loc	21	61	0
	mov.f32 	%f18, %f14;
	mov.f32 	%f24, 0f3f800000;    	// 1
	sub.ftz.f32 	%f25, %f24, %f18;
	selp.f32 	%f9, %f25, %f18, %p21;
	bra.uni 	$Lt_50_15874;
$Lt_50_210434:
	.loc	21	73	0
	cvt.ftz.sat.f32.f32 	%f26, %f8;
	mov.f32 	%f27, 0f3f800000;    	// 1
	sub.ftz.f32 	%f28, %f27, %f26;
	selp.f32 	%f29, %f28, %f26, %p21;
	mul.ftz.f32 	%f9, %f29, %f14;
	bra.uni 	$Lt_50_15874;
$Lt_50_16130:
	.loc	22	267	0
	ld.const.f32 	%f30, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f30, %f6;
	ld.const.f32 	%f32, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f32, %f7, %f31;
	ld.const.f32 	%f34, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f34, %f5, %f33;
	ld.const.f32 	%f36, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f37, %f36, %f35;
	.loc	22	268	0
	ld.const.f32 	%f38, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f39, %f38, %f35;
	.loc	23	44	0
	ld.const.f32 	%f40, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f40, %f35;
	mov.f32 	%f6, %f37;
	mov.f32 	%f7, %f39;
	mov.f32 	%f9, %f8;
	.loc	38	61	0
	bra.uni 	$Lt_50_15874;
$Lt_50_16386:
	.loc	38	63	0
	ld.param.f32 	%f41, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+20];
	ld.param.f32 	%f42, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+12];
	ld.param.f32 	%f43, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+4];
	mov.f32 	%f44, 0f00000000;    	// 0
	setp.neu.ftz.f32 	%p23, %f43, %f44;
	@!%p23 bra 	$Lt_50_210946;
	.loc	24	44	0
	ld.param.f32 	%f45, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+8];
	sub.ftz.f32 	%f46, %f7, %f42;
	fma.rn.ftz.f32 	%f7, %f45, %f46, %f41;
	.loc	24	45	0
	sub.ftz.f32 	%f47, %f6, %f42;
	fma.rn.ftz.f32 	%f6, %f45, %f47, %f41;
	.loc	24	46	0
	sub.ftz.f32 	%f48, %f5, %f42;
	fma.rn.ftz.f32 	%f5, %f45, %f48, %f41;
	bra.uni 	$Lt_50_15874;
$Lt_50_210946:
	.loc	24	50	0
	ld.param.f32 	%f49, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+24];
	setp.gt.ftz.f32 	%p24, %f7, %f42;
	selp.f32 	%f7, %f49, %f41, %p24;
	.loc	24	51	0
	setp.gt.ftz.f32 	%p25, %f6, %f42;
	selp.f32 	%f6, %f49, %f41, %p25;
	.loc	24	52	0
	setp.gt.ftz.f32 	%p26, %f5, %f42;
	selp.f32 	%f5, %f49, %f41, %p26;
	bra.uni 	$Lt_50_15874;
$Lt_50_16642:
	.loc	25	47	0
	ld.param.f32 	%f50, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+4];
	mul.ftz.f32 	%f5, %f50, %f5;
	ld.param.f32 	%f51, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+8];
	mul.ftz.f32 	%f6, %f51, %f6;
	ld.param.f32 	%f52, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+12];
	mul.ftz.f32 	%f7, %f52, %f7;
	.loc	38	67	0
	bra.uni 	$Lt_50_15874;
$Lt_50_16898:
	.loc	26	48	0
	ld.param.f32 	%f53, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+4];
	mov.f32 	%f54, 0f00000000;    	// 0
	setp.eq.ftz.f32 	%p27, %f53, %f54;
	ld.param.f32 	%f55, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+8];
	mov.f32 	%f56, 0f00000000;    	// 0
	max.ftz.f32 	%f57, %f5, %f56;
	mov.f32 	%f58, 0f3f800000;    	// 1
	min.ftz.f32 	%f59, %f57, %f58;
	ld.param.f32 	%f60, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+12];
	sub.ftz.f32 	%f61, %f59, %f60;
	abs.ftz.f32 	%f62, %f61;
	mov.f32 	%f63, 0f00000000;    	// 0
	max.ftz.f32 	%f64, %f6, %f63;
	mov.f32 	%f65, 0f3f800000;    	// 1
	min.ftz.f32 	%f66, %f64, %f65;
	ld.param.f32 	%f67, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+16];
	sub.ftz.f32 	%f68, %f66, %f67;
	abs.ftz.f32 	%f69, %f68;
	mov.f32 	%f70, 0f00000000;    	// 0
	max.ftz.f32 	%f71, %f8, %f70;
	mov.f32 	%f72, 0f3f800000;    	// 1
	min.ftz.f32 	%f73, %f71, %f72;
	sub.ftz.f32 	%f74, %f73, %f8;
	abs.ftz.f32 	%f75, %f74;
	mov.f32 	%f76, 0f00000000;    	// 0
	max.ftz.f32 	%f77, %f7, %f76;
	mov.f32 	%f78, 0f3f800000;    	// 1
	min.ftz.f32 	%f79, %f77, %f78;
	ld.param.f32 	%f80, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+20];
	sub.ftz.f32 	%f81, %f79, %f80;
	abs.ftz.f32 	%f82, %f81;
	max.ftz.f32 	%f83, %f75, %f82;
	max.ftz.f32 	%f84, %f69, %f83;
	max.ftz.f32 	%f85, %f62, %f84;
	setp.ge.ftz.f32 	%p28, %f55, %f85;
	xor.pred 	%p29, %p27, %p28;
	@!%p29 bra 	$Lt_50_15874;
	.loc	22	267	0
	ld.const.f32 	%f86, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f86, %f6;
	ld.const.f32 	%f87, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f87, %f7, %f31;
	ld.const.f32 	%f88, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f88, %f5, %f33;
	ld.const.f32 	%f89, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f90, %f89, %f35;
	.loc	22	268	0
	ld.const.f32 	%f91, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f92, %f91, %f35;
	.loc	23	44	0
	ld.const.f32 	%f93, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f93, %f35;
	mov.f32 	%f6, %f90;
	mov.f32 	%f7, %f92;
	bra.uni 	$Lt_50_15874;
$Lt_50_17154:
	.loc	27	48	0
	ld.param.f32 	%f94, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+8];
	mov.f32 	%f95, 0f00000000;    	// 0
	max.ftz.f32 	%f96, %f5, %f95;
	mov.f32 	%f97, 0f3f800000;    	// 1
	min.ftz.f32 	%f98, %f96, %f97;
	ld.param.f32 	%f99, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+12];
	sub.ftz.f32 	%f100, %f98, %f99;
	abs.ftz.f32 	%f101, %f100;
	mov.f32 	%f102, 0f00000000;   	// 0
	max.ftz.f32 	%f103, %f6, %f102;
	mov.f32 	%f104, 0f3f800000;   	// 1
	min.ftz.f32 	%f105, %f103, %f104;
	ld.param.f32 	%f106, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+16];
	sub.ftz.f32 	%f107, %f105, %f106;
	abs.ftz.f32 	%f108, %f107;
	mov.f32 	%f109, 0f00000000;   	// 0
	max.ftz.f32 	%f110, %f8, %f109;
	mov.f32 	%f111, 0f3f800000;   	// 1
	min.ftz.f32 	%f112, %f110, %f111;
	sub.ftz.f32 	%f113, %f112, %f8;
	abs.ftz.f32 	%f114, %f113;
	mov.f32 	%f115, 0f00000000;   	// 0
	max.ftz.f32 	%f116, %f7, %f115;
	mov.f32 	%f117, 0f3f800000;   	// 1
	min.ftz.f32 	%f118, %f116, %f117;
	ld.param.f32 	%f119, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+20];
	sub.ftz.f32 	%f120, %f118, %f119;
	abs.ftz.f32 	%f121, %f120;
	max.ftz.f32 	%f122, %f114, %f121;
	max.ftz.f32 	%f123, %f108, %f122;
	max.ftz.f32 	%f124, %f101, %f123;
	setp.ge.ftz.f32 	%p30, %f94, %f124;
	@!%p30 bra 	$Lt_50_15874;
	.loc	27	51	0
	ld.param.f32 	%f125, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+24];
	mov.f32 	%f126, %f125;
	ld.param.f32 	%f127, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+28];
	mov.f32 	%f128, %f127;
	ld.param.f32 	%f129, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+32];
	mov.f32 	%f130, %f129;
	ld.param.f32 	%f131, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+4];
	mov.f32 	%f132, 0f00000000;   	// 0
	setp.eq.ftz.f32 	%p31, %f131, %f132;
	@!%p31 bra 	$Lt_50_212226;
	.loc	27	60	0
	ld.const.f32 	%f133, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f133, %f6;
	ld.const.f32 	%f134, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f134, %f7, %f31;
	ld.const.f32 	%f135, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f135, %f5, %f33;
	mul.ftz.f32 	%f126, %f35, %f125;
	.loc	27	61	0
	mul.ftz.f32 	%f128, %f35, %f127;
	.loc	27	62	0
	mul.ftz.f32 	%f130, %f35, %f129;
$Lt_50_212226:
	.loc	27	65	0
	mov.f32 	%f5, %f126;
	mov.f32 	%f6, %f128;
	mov.f32 	%f7, %f130;
	bra.uni 	$Lt_50_15874;
$Lt_50_17410:
	.loc	28	47	0
	sub.s32 	%r50, %r13, %r11;
	sub.s32 	%r51, %r14, %r5;
	cvt.rn.f32.s32 	%f136, %r5;
	cvt.rn.f32.s32 	%f137, %r11;
	cvt.rn.f32.s32 	%f138, %r50;
	cvt.rn.f32.s32 	%f139, %r51;
	ld.param.f32 	%f18, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+4];
	sub.ftz.f32 	%f140, %f18, %f136;
	sub.ftz.f32 	%f141, %f18, %f137;
	sub.ftz.f32 	%f142, %f18, %f138;
	sub.ftz.f32 	%f143, %f18, %f139;
	cvt.rzi.ftz.s32.f32 	%r52, %f140;
	cvt.rzi.ftz.s32.f32 	%r53, %f141;
	cvt.rzi.ftz.s32.f32 	%r54, %f142;
	cvt.rzi.ftz.s32.f32 	%r55, %f143;
	max.s32 	%r56, %r52, %r53;
	max.s32 	%r57, %r55, %r56;
	max.s32 	%r58, %r54, %r57;
	mov.u32 	%r59, 0;
	setp.le.s32 	%p32, %r58, %r59;
	@%p32 bra 	$Lt_50_15874;
	.loc	28	51	0
	cvt.rn.f32.s32 	%f144, %r58;
	sub.ftz.f32 	%f145, %f18, %f144;
	div.approx.ftz.f32 	%f146, %f145, %f18;
	mul.ftz.f32 	%f9, %f8, %f146;
	bra.uni 	$Lt_50_15874;
$Lt_50_17666:
	.loc	30	50	0
	ld.const.f32 	%f147, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f147, %f6;
	ld.param.f32 	%f148, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+12];
	ld.param.f32 	%f45, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+8];
	sub.ftz.f32 	%f149, %f148, %f45;
	ld.const.f32 	%f150, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f150, %f7, %f31;
	ld.param.f32 	%f151, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+16];
	mul.ftz.f32 	%f152, %f151, %f149;
	ld.const.f32 	%f153, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f153, %f5, %f33;
	add.ftz.f32 	%f154, %f152, %f148;
	sub.ftz.f32 	%f155, %f45, %f152;
	mov.f32 	%f156, 0f00000000;   	// 0
	max.ftz.f32 	%f157, %f154, %f156;
	mov.f32 	%f158, 0f00000000;   	// 0
	max.ftz.f32 	%f159, %f155, %f158;
	mov.f32 	%f160, 0f3f800000;   	// 1
	min.ftz.f32 	%f161, %f157, %f160;
	mov.f32 	%f162, 0f3f800000;   	// 1
	min.ftz.f32 	%f163, %f159, %f162;
	set.gt.ftz.u32.f32 	%r60, %f163, %f35;
	neg.s32 	%r61, %r60;
	set.le.ftz.u32.f32 	%r62, %f161, %f35;
	neg.s32 	%r63, %r62;
	or.b32 	%r64, %r61, %r63;
	mov.u32 	%r65, 0;
	setp.eq.s32 	%p33, %r64, %r65;
	@%p33 bra 	$Lt_50_213506;
	mov.f32 	%f164, 0f00000000;   	// 0
	bra.uni 	$Lt_50_214274;
$Lt_50_213506:
	add.ftz.f32 	%f165, %f152, %f45;
	mov.f32 	%f166, 0f00000000;   	// 0
	max.ftz.f32 	%f167, %f165, %f166;
	mov.f32 	%f168, 0f3f800000;   	// 1
	min.ftz.f32 	%f169, %f167, %f168;
	set.le.ftz.u32.f32 	%r66, %f169, %f35;
	neg.s32 	%r67, %r66;
	sub.ftz.f32 	%f170, %f148, %f152;
	mov.f32 	%f171, 0f00000000;   	// 0
	max.ftz.f32 	%f172, %f170, %f171;
	mov.f32 	%f173, 0f3f800000;   	// 1
	min.ftz.f32 	%f174, %f172, %f173;
	set.lt.ftz.u32.f32 	%r68, %f35, %f174;
	neg.s32 	%r69, %r68;
	and.b32 	%r70, %r67, %r69;
	mov.u32 	%r71, 0;
	setp.eq.s32 	%p34, %r70, %r71;
	@%p34 bra 	$Lt_50_214018;
	mov.f32 	%f164, 0f3f800000;   	// 1
	bra.uni 	$Lt_50_214274;
$Lt_50_214018:
	add.ftz.f32 	%f175, %f152, %f152;
	setp.gt.ftz.f32 	%p35, %f169, %f35;
	@!%p35 bra 	$Lt_50_214530;
	.loc	30	62	0
	sub.ftz.f32 	%f176, %f35, %f163;
	div.approx.ftz.f32 	%f164, %f176, %f175;
	bra.uni 	$Lt_50_214274;
$Lt_50_214530:
	.loc	30	66	0
	sub.ftz.f32 	%f177, %f161, %f35;
	div.approx.ftz.f32 	%f164, %f177, %f175;
$Lt_50_214274:
$Lt_50_213762:
$Lt_50_213250:
	.loc	30	69	0
	mov.f32 	%f178, 0f3f800000;   	// 1
	sub.ftz.f32 	%f179, %f178, %f164;
	ld.param.f32 	%f180, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+4];
	mov.f32 	%f181, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p36, %f180, %f181;
	selp.f32 	%f164, %f179, %f164, %p36;
	.loc	30	77	0
	ld.const.f32 	%f182, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f182, %f164;
	ld.const.f32 	%f183, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f6, %f183, %f164;
	ld.const.f32 	%f184, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f7, %f184, %f164;
	.loc	38	79	0
	bra.uni 	$Lt_50_15874;
$Lt_50_17922:
	.loc	38	80	0
	ld.param.f32 	%f185, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+44];
	mov.f32 	%f186, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p37, %f185, %f186;
	@!%p37 bra 	$L_50_199938;
	cvt.rn.f32.s32 	%f187, %r5;
	cvt.rn.f32.s32 	%f188, %r14;
	mul.ftz.f32 	%f189, %f188, %f185;
	setp.lt.ftz.f32 	%p38, %f187, %f189;
	@%p38 bra 	$L_50_199682;
$L_50_199938:
	mov.f32 	%f190, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p39, %f185, %f190;
	@!%p39 bra 	$Lt_50_15874;
	cvt.rn.f32.s32 	%f191, %r11;
	cvt.rn.f32.s32 	%f192, %r13;
	mul.ftz.f32 	%f193, %f192, %f185;
	neg.ftz.f32 	%f194, %f193;
	setp.lt.ftz.f32 	%p40, %f191, %f194;
	@!%p40 bra 	$Lt_50_15874;
$L_50_199682:
	.loc	31	47	0
	ld.const.f32 	%f195, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f196, %f195, %f6;
	ld.const.f32 	%f197, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f198, %f197, %f7, %f196;
	ld.const.f32 	%f199, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f199, %f5, %f198;
	mov.f32 	%f200, %f35;
	ld.param.f32 	%f45, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+8];
	ld.param.f32 	%f201, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+4];
	sub.ftz.f32 	%f202, %f201, %f45;
	ld.param.f32 	%f203, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+16];
	ld.param.f32 	%f204, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+12];
	sub.ftz.f32 	%f205, %f204, %f203;
	mov.f32 	%f206, 0f3f800000;   	// 1
	set.neu.ftz.u32.f32 	%r72, %f205, %f206;
	neg.s32 	%r73, %r72;
	mov.f32 	%f207, 0f3f800000;   	// 1
	set.neu.ftz.u32.f32 	%r74, %f202, %f207;
	neg.s32 	%r75, %r74;
	or.b32 	%r76, %r73, %r75;
	mov.u32 	%r77, 0;
	setp.eq.s32 	%p41, %r76, %r77;
	@%p41 bra 	$Lt_50_214786;
	.loc	20	143	0
	mov.s32 	%r78, 1;
	sub.s32 	%r79, %r78, %r5;
	shr.u32 	%r80, %r11, 13;
	sub.u32 	%r81, %r5, %r11;
	sub.u32 	%r82, %r79, %r11;
	xor.b32 	%r83, %r82, %r80;
	shl.b32 	%r84, %r83, 8;
	sub.u32 	%r85, %r81, %r83;
	sub.u32 	%r86, %r11, %r83;
	xor.b32 	%r87, %r85, %r84;
	shr.u32 	%r88, %r87, 13;
	sub.u32 	%r89, %r86, %r87;
	sub.u32 	%r90, %r83, %r87;
	xor.b32 	%r91, %r89, %r88;
	shr.u32 	%r92, %r91, 12;
	sub.u32 	%r93, %r90, %r91;
	xor.b32 	%r94, %r93, %r92;
	sub.u32 	%r95, %r87, %r91;
	sub.u32 	%r96, %r95, %r94;
	shl.b32 	%r97, %r94, 16;
	xor.b32 	%r98, %r96, %r97;
	.loc	20	144	0
	sub.u32 	%r99, %r91, %r94;
	sub.u32 	%r100, %r99, %r98;
	shr.u32 	%r101, %r98, 5;
	xor.b32 	%r102, %r100, %r101;
	.loc	20	145	0
	sub.u32 	%r103, %r94, %r98;
	sub.u32 	%r104, %r103, %r102;
	shr.u32 	%r105, %r102, 3;
	xor.b32 	%r106, %r104, %r105;
	.loc	20	146	0
	sub.u32 	%r107, %r98, %r102;
	sub.u32 	%r108, %r107, %r106;
	shl.b32 	%r109, %r106, 10;
	xor.b32 	%r110, %r108, %r109;
	.loc	20	147	0
	sub.u32 	%r111, %r102, %r106;
	sub.u32 	%r112, %r111, %r110;
	shr.u32 	%r113, %r110, 15;
	xor.b32 	%r114, %r112, %r113;
	.loc	31	57	0
	mov.f32 	%f208, 0f3b270d73;   	// 0.00254902
	mul.lo.u32 	%r115, %r114, 1103515245;
	add.u32 	%r116, %r115, 12345;
	shr.u32 	%r117, %r116, 16;
	and.b32 	%r118, %r117, 255;
	shl.b32 	%r119, %r118, 7;
	mul.lo.u32 	%r120, %r114, -1029531031;
	sub.u32 	%r121, %r120, 740551042;
	shr.u32 	%r122, %r121, 16;
	and.b32 	%r123, %r122, 255;
	xor.b32 	%r124, %r119, %r123;
	cvt.rn.f32.s32 	%f209, %r124;
	mov.f32 	%f210, 0f467ffe00;   	// 16383.5
	div.approx.ftz.f32 	%f211, %f209, %f210;
	mov.f32 	%f212, 0fbf800000;   	// -1
	add.ftz.f32 	%f213, %f211, %f212;
	fma.rn.ftz.f32 	%f200, %f208, %f213, %f35;
$Lt_50_214786:
	sub.ftz.f32 	%f214, %f200, %f45;
	ld.param.f32 	%f215, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+20];
	mov.f32 	%f216, 0f3f800000;   	// 1
	setp.neu.ftz.f32 	%p42, %f215, %f216;
	@!%p42 bra 	$Lt_50_215554;
	mov.f32 	%f217, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p43, %f214, %f217;
	@!%p43 bra 	$Lt_50_216066;
	.loc	31	66	0
	mov.f32 	%f200, %f203;
	bra.uni 	$Lt_50_215298;
$Lt_50_216066:
	.loc	31	70	0
	rcp.approx.ftz.f32 	%f218, %f202;
	mul.ftz.f32 	%f219, %f214, %f218;
	lg2.approx.ftz.f32 	%f220, %f219;
	mul.ftz.f32 	%f221, %f215, %f220;
	ex2.approx.ftz.f32 	%f222, %f221;
	fma.rn.ftz.f32 	%f200, %f205, %f222, %f203;
	bra.uni 	$Lt_50_215298;
$Lt_50_215554:
	.loc	31	77	0
	rcp.approx.ftz.f32 	%f223, %f202;
	mul.ftz.f32 	%f224, %f214, %f223;
	fma.rn.ftz.f32 	%f200, %f205, %f224, %f203;
$Lt_50_215298:
	.loc	22	267	0
	ld.const.f32 	%f225, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f226, %f225, %f6;
	ld.const.f32 	%f227, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f228, %f227, %f6;
	ld.param.f32 	%f229, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+28];
	sin.approx.ftz.f32 	%f230, %f229;
	cos.approx.ftz.f32 	%f231, %f229;
	ld.const.f32 	%f232, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f233, %f232, %f7, %f226;
	ld.const.f32 	%f234, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f235, %f234, %f7, %f228;
	ld.const.f32 	%f236, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f237, %f236, %f5, %f233;
	ld.const.f32 	%f238, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f239, %f238, %f5, %f235;
	mul.ftz.f32 	%f240, %f230, %f237;
	mul.ftz.f32 	%f241, %f231, %f237;
	mul.ftz.f32 	%f242, %f239, %f231;
	sub.ftz.f32 	%f243, %f242, %f240;
	fma.rn.ftz.f32 	%f244, %f239, %f230, %f241;
	ld.param.f32 	%f245, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+32];
	sub.ftz.f32 	%f246, %f245, %f243;
	ld.param.f32 	%f247, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+36];
	sub.ftz.f32 	%f248, %f247, %f244;
	ld.param.f32 	%f249, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+40];
	fma.rn.ftz.f32 	%f250, %f249, %f246, %f243;
	fma.rn.ftz.f32 	%f251, %f249, %f248, %f244;
	ld.param.f32 	%f252, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+24];
	mul.ftz.f32 	%f253, %f250, %f252;
	mul.ftz.f32 	%f254, %f251, %f252;
	ld.const.f32 	%f255, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f256, %f255, %f253;
	ld.const.f32 	%f257, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f258, %f257, %f200, %f256;
	ld.const.f32 	%f259, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f260, %f259, %f254, %f258;
	.loc	22	268	0
	ld.const.f32 	%f261, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f262, %f261, %f253;
	ld.const.f32 	%f263, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f264, %f263, %f200, %f262;
	ld.const.f32 	%f265, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f266, %f265, %f254, %f264;
	.loc	31	92	0
	ld.const.f32 	%f267, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f268, %f267, %f253;
	ld.const.f32 	%f269, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f270, %f269, %f200, %f268;
	ld.const.f32 	%f271, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f5, %f271, %f254, %f270;
	mov.f32 	%f6, %f260;
	mov.f32 	%f7, %f266;
	bra.uni 	$Lt_50_15874;
$Lt_50_18178:
	.loc	38	83	0
	ld.param.f32 	%f272, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+44];
	mov.f32 	%f273, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p44, %f272, %f273;
	@!%p44 bra 	$L_50_200706;
	cvt.rn.f32.s32 	%f274, %r5;
	cvt.rn.f32.s32 	%f275, %r14;
	mul.ftz.f32 	%f276, %f275, %f272;
	setp.lt.ftz.f32 	%p45, %f274, %f276;
	@%p45 bra 	$L_50_200450;
$L_50_200706:
	mov.f32 	%f277, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p46, %f272, %f277;
	@!%p46 bra 	$Lt_50_15874;
	cvt.rn.f32.s32 	%f278, %r11;
	cvt.rn.f32.s32 	%f279, %r13;
	mul.ftz.f32 	%f280, %f279, %f272;
	neg.ftz.f32 	%f281, %f280;
	setp.lt.ftz.f32 	%p47, %f278, %f281;
	@!%p47 bra 	$Lt_50_15874;
$L_50_200450:
	.loc	31	110	0
	ld.const.f32 	%f282, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f282, %f6;
	ld.param.f32 	%f45, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+8];
	ld.param.f32 	%f283, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+4];
	sub.ftz.f32 	%f284, %f283, %f45;
	ld.param.f32 	%f285, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+16];
	ld.param.f32 	%f286, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+12];
	sub.ftz.f32 	%f287, %f286, %f285;
	ld.const.f32 	%f288, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f288, %f7, %f31;
	ld.const.f32 	%f289, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f289, %f5, %f33;
	sub.ftz.f32 	%f290, %f35, %f45;
	ld.param.f32 	%f291, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+20];
	mov.f32 	%f292, 0f3f800000;   	// 1
	setp.neu.ftz.f32 	%p48, %f291, %f292;
	@!%p48 bra 	$Lt_50_216578;
	.loc	42	523	0
	mov.f32 	%f293, 0f00000000;   	// 0
	max.ftz.f32 	%f294, %f290, %f293;
	div.approx.ftz.f32 	%f295, %f294, %f284;
	lg2.approx.ftz.f32 	%f296, %f295;
	mul.ftz.f32 	%f297, %f291, %f296;
	ex2.approx.ftz.f32 	%f298, %f297;
	.loc	31	120	0
	fma.rn.ftz.f32 	%f299, %f287, %f298, %f285;
	bra.uni 	$Lt_50_216322;
$Lt_50_216578:
	.loc	31	129	0
	div.approx.ftz.f32 	%f300, %f290, %f284;
	fma.rn.ftz.f32 	%f299, %f287, %f300, %f285;
$Lt_50_216322:
	.loc	31	135	0
	ld.const.f32 	%f301, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f301, %f299;
	ld.const.f32 	%f302, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f6, %f302, %f299;
	ld.const.f32 	%f303, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f7, %f303, %f299;
	bra.uni 	$Lt_50_15874;
$Lt_50_18434:
	.loc	38	86	0
	ld.param.f32 	%f304, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+44];
	mov.f32 	%f305, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p49, %f304, %f305;
	@!%p49 bra 	$L_50_201474;
	cvt.rn.f32.s32 	%f306, %r5;
	cvt.rn.f32.s32 	%f307, %r14;
	mul.ftz.f32 	%f308, %f307, %f304;
	setp.lt.ftz.f32 	%p50, %f306, %f308;
	@%p50 bra 	$L_50_201218;
$L_50_201474:
	mov.f32 	%f309, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p51, %f304, %f309;
	@!%p51 bra 	$Lt_50_15874;
	cvt.rn.f32.s32 	%f310, %r11;
	cvt.rn.f32.s32 	%f311, %r13;
	mul.ftz.f32 	%f312, %f311, %f304;
	neg.ftz.f32 	%f313, %f312;
	setp.lt.ftz.f32 	%p52, %f310, %f313;
	@!%p52 bra 	$Lt_50_15874;
$L_50_201218:
	.loc	31	160	0
	ld.const.f32 	%f5, [k601YPbPr_To_RGB32f+24];
	ld.const.f32 	%f6, [k601YPbPr_To_RGB32f+12];
	ld.const.f32 	%f7, [k601YPbPr_To_RGB32f+0];
	bra.uni 	$Lt_50_15874;
$Lt_50_18690:
	.loc	32	42	0
	ld.param.f32 	%f314, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+4];
	mov.f32 	%f315, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p53, %f5, %f315;
	@!%p53 bra 	$Lt_50_217090;
	.loc	22	292	0
	mov.f32 	%f18, %f314;
	lg2.approx.ftz.f32 	%f316, %f5;
	mul.ftz.f32 	%f317, %f18, %f316;
	ex2.approx.ftz.f32 	%f318, %f317;
	bra.uni 	$Lt_50_216834;
$Lt_50_217090:
	mov.f32 	%f18, %f314;
	neg.ftz.f32 	%f319, %f5;
	lg2.approx.ftz.f32 	%f320, %f319;
	mul.ftz.f32 	%f321, %f18, %f320;
	ex2.approx.ftz.f32 	%f322, %f321;
	neg.ftz.f32 	%f318, %f322;
$Lt_50_216834:
	mov.f32 	%f323, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p54, %f6, %f323;
	@!%p54 bra 	$Lt_50_217602;
	.loc	22	293	0
	lg2.approx.ftz.f32 	%f324, %f6;
	mul.ftz.f32 	%f325, %f18, %f324;
	ex2.approx.ftz.f32 	%f326, %f325;
	bra.uni 	$Lt_50_217346;
$Lt_50_217602:
	neg.ftz.f32 	%f327, %f6;
	lg2.approx.ftz.f32 	%f328, %f327;
	mul.ftz.f32 	%f329, %f18, %f328;
	ex2.approx.ftz.f32 	%f330, %f329;
	neg.ftz.f32 	%f326, %f330;
$Lt_50_217346:
	mov.f32 	%f331, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p55, %f7, %f331;
	@!%p55 bra 	$Lt_50_218114;
	.loc	22	294	0
	lg2.approx.ftz.f32 	%f332, %f7;
	mul.ftz.f32 	%f333, %f18, %f332;
	ex2.approx.ftz.f32 	%f334, %f333;
	bra.uni 	$Lt_50_217858;
$Lt_50_218114:
	neg.ftz.f32 	%f335, %f7;
	lg2.approx.ftz.f32 	%f336, %f335;
	mul.ftz.f32 	%f337, %f18, %f336;
	ex2.approx.ftz.f32 	%f338, %f337;
	neg.ftz.f32 	%f334, %f338;
$Lt_50_217858:
	mov.f32 	%f339, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p56, %f8, %f339;
	@!%p56 bra 	$Lt_50_218626;
	.loc	22	295	0
	lg2.approx.ftz.f32 	%f340, %f8;
	mul.ftz.f32 	%f341, %f18, %f340;
	ex2.approx.ftz.f32 	%f342, %f341;
	bra.uni 	$Lt_50_218370;
$Lt_50_218626:
	neg.ftz.f32 	%f343, %f8;
	lg2.approx.ftz.f32 	%f344, %f343;
	mul.ftz.f32 	%f345, %f18, %f344;
	ex2.approx.ftz.f32 	%f346, %f345;
	neg.ftz.f32 	%f342, %f346;
$Lt_50_218370:
	.loc	32	42	0
	mov.f32 	%f5, %f318;
	mov.f32 	%f6, %f326;
	mov.f32 	%f7, %f334;
	mov.f32 	%f9, %f342;
	.loc	38	91	0
	bra.uni 	$Lt_50_15874;
$Lt_50_18946:
	.loc	33	41	0
	sub.s32 	%r125, %r14, %r5;
	sub.s32 	%r6, %r125, 1;
	.loc	38	94	0
	bra.uni 	$Lt_50_15874;
$Lt_50_19202:
	.loc	38	96	0
	ld.param.f32 	%f18, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+4];
	mov.f32 	%f347, 0f3f000000;   	// 0.5
	mul.ftz.f32 	%f348, %f18, %f347;
	ld.param.f32 	%f349, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+16];
	ld.param.f32 	%f350, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+8];
	mov.f32 	%f351, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p57, %f350, %f351;
	@!%p57 bra 	$Lt_50_219138;
	.loc	20	143	0
	mul.lo.s32 	%r126, %r5, 3;
	cvt.rzi.ftz.u32.f32 	%r127, %f349;
	sub.u32 	%r128, %r126, %r11;
	shr.u32 	%r129, %r127, 13;
	sub.u32 	%r130, %r11, %r127;
	sub.u32 	%r131, %r128, %r127;
	xor.b32 	%r132, %r131, %r129;
	shl.b32 	%r133, %r132, 8;
	sub.u32 	%r134, %r130, %r132;
	sub.u32 	%r135, %r127, %r132;
	xor.b32 	%r136, %r133, %r134;
	shr.u32 	%r137, %r136, 13;
	sub.u32 	%r138, %r135, %r136;
	sub.u32 	%r139, %r132, %r136;
	xor.b32 	%r140, %r137, %r138;
	shr.u32 	%r141, %r140, 12;
	sub.u32 	%r142, %r139, %r140;
	xor.b32 	%r143, %r141, %r142;
	shl.b32 	%r144, %r143, 16;
	sub.u32 	%r145, %r136, %r140;
	sub.u32 	%r146, %r145, %r143;
	xor.b32 	%r147, %r144, %r146;
	.loc	20	144	0
	sub.u32 	%r148, %r140, %r143;
	sub.u32 	%r149, %r148, %r147;
	shr.u32 	%r150, %r147, 5;
	xor.b32 	%r151, %r149, %r150;
	.loc	20	145	0
	sub.u32 	%r152, %r143, %r147;
	sub.u32 	%r153, %r152, %r151;
	shr.u32 	%r154, %r151, 3;
	xor.b32 	%r155, %r153, %r154;
	.loc	20	146	0
	sub.u32 	%r156, %r147, %r151;
	sub.u32 	%r157, %r156, %r155;
	shl.b32 	%r158, %r155, 10;
	xor.b32 	%r159, %r157, %r158;
	.loc	20	147	0
	sub.u32 	%r160, %r151, %r155;
	sub.u32 	%r161, %r160, %r159;
	shr.u32 	%r162, %r159, 15;
	xor.b32 	%r163, %r161, %r162;
	.loc	34	48	0
	mul.lo.u32 	%r164, %r163, 1103515245;
	add.u32 	%r165, %r164, 12345;
	shr.u32 	%r166, %r165, 16;
	and.b32 	%r167, %r166, 255;
	shl.b32 	%r168, %r167, 7;
	mul.lo.u32 	%r169, %r163, -1029531031;
	sub.u32 	%r170, %r169, 740551042;
	shr.u32 	%r171, %r170, 16;
	and.b32 	%r172, %r171, 255;
	xor.b32 	%r173, %r168, %r172;
	cvt.rn.f32.s32 	%f352, %r173;
	mov.f32 	%f353, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f354, %f352, %f353;
	mul.ftz.f32 	%f355, %f18, %f354;
	sub.ftz.f32 	%f356, %f355, %f348;
	.loc	20	143	0
	add.u32 	%r174, %r131, 1;
	xor.b32 	%r175, %r174, %r129;
	shl.b32 	%r176, %r175, 8;
	sub.u32 	%r177, %r130, %r175;
	sub.u32 	%r178, %r127, %r175;
	xor.b32 	%r179, %r176, %r177;
	shr.u32 	%r180, %r179, 13;
	sub.u32 	%r181, %r178, %r179;
	sub.u32 	%r182, %r175, %r179;
	xor.b32 	%r183, %r180, %r181;
	shr.u32 	%r184, %r183, 12;
	sub.u32 	%r185, %r182, %r183;
	xor.b32 	%r186, %r184, %r185;
	sub.u32 	%r187, %r179, %r183;
	sub.u32 	%r188, %r187, %r186;
	shl.b32 	%r189, %r186, 16;
	xor.b32 	%r190, %r188, %r189;
	.loc	20	144	0
	sub.u32 	%r191, %r183, %r186;
	sub.u32 	%r192, %r191, %r190;
	shr.u32 	%r193, %r190, 5;
	xor.b32 	%r194, %r192, %r193;
	.loc	20	145	0
	sub.u32 	%r195, %r186, %r190;
	sub.u32 	%r196, %r195, %r194;
	shr.u32 	%r197, %r194, 3;
	xor.b32 	%r198, %r196, %r197;
	.loc	20	146	0
	sub.u32 	%r199, %r190, %r194;
	sub.u32 	%r200, %r199, %r198;
	shl.b32 	%r201, %r198, 10;
	xor.b32 	%r202, %r200, %r201;
	.loc	20	147	0
	sub.u32 	%r203, %r194, %r198;
	sub.u32 	%r204, %r203, %r202;
	shr.u32 	%r205, %r202, 15;
	xor.b32 	%r206, %r204, %r205;
	.loc	34	49	0
	mul.lo.u32 	%r207, %r206, 1103515245;
	add.u32 	%r208, %r207, 12345;
	shr.u32 	%r209, %r208, 16;
	and.b32 	%r210, %r209, 255;
	shl.b32 	%r211, %r210, 7;
	mul.lo.u32 	%r212, %r206, -1029531031;
	sub.u32 	%r213, %r212, 740551042;
	shr.u32 	%r214, %r213, 16;
	and.b32 	%r215, %r214, 255;
	xor.b32 	%r216, %r211, %r215;
	cvt.rn.f32.s32 	%f357, %r216;
	mov.f32 	%f358, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f359, %f357, %f358;
	mul.ftz.f32 	%f360, %f18, %f359;
	sub.ftz.f32 	%f361, %f360, %f348;
	.loc	20	143	0
	add.u32 	%r217, %r131, 2;
	xor.b32 	%r218, %r217, %r129;
	shl.b32 	%r219, %r218, 8;
	sub.u32 	%r220, %r130, %r218;
	sub.u32 	%r221, %r127, %r218;
	xor.b32 	%r222, %r219, %r220;
	shr.u32 	%r223, %r222, 13;
	sub.u32 	%r224, %r221, %r222;
	sub.u32 	%r225, %r218, %r222;
	xor.b32 	%r226, %r223, %r224;
	shr.u32 	%r227, %r226, 12;
	sub.u32 	%r228, %r225, %r226;
	xor.b32 	%r229, %r227, %r228;
	sub.u32 	%r230, %r222, %r226;
	sub.u32 	%r231, %r230, %r229;
	shl.b32 	%r232, %r229, 16;
	xor.b32 	%r233, %r231, %r232;
	.loc	20	144	0
	sub.u32 	%r234, %r226, %r229;
	sub.u32 	%r235, %r234, %r233;
	shr.u32 	%r236, %r233, 5;
	xor.b32 	%r237, %r235, %r236;
	.loc	20	145	0
	sub.u32 	%r238, %r229, %r233;
	sub.u32 	%r239, %r238, %r237;
	shr.u32 	%r240, %r237, 3;
	xor.b32 	%r241, %r239, %r240;
	.loc	20	146	0
	sub.u32 	%r242, %r233, %r237;
	sub.u32 	%r243, %r242, %r241;
	shl.b32 	%r244, %r241, 10;
	xor.b32 	%r245, %r243, %r244;
	.loc	20	147	0
	sub.u32 	%r246, %r237, %r241;
	sub.u32 	%r247, %r246, %r245;
	shr.u32 	%r248, %r245, 15;
	xor.b32 	%r249, %r247, %r248;
	.loc	34	50	0
	mul.lo.u32 	%r250, %r249, 1103515245;
	add.u32 	%r251, %r250, 12345;
	shr.u32 	%r252, %r251, 16;
	and.b32 	%r253, %r252, 255;
	shl.b32 	%r254, %r253, 7;
	mul.lo.u32 	%r255, %r249, -1029531031;
	sub.u32 	%r256, %r255, 740551042;
	shr.u32 	%r257, %r256, 16;
	and.b32 	%r258, %r257, 255;
	xor.b32 	%r259, %r254, %r258;
	cvt.rn.f32.s32 	%f362, %r259;
	mov.f32 	%f363, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f364, %f362, %f363;
	mul.ftz.f32 	%f365, %f18, %f364;
	sub.ftz.f32 	%f366, %f365, %f348;
	bra.uni 	$Lt_50_218882;
$Lt_50_219138:
	.loc	20	143	0
	sub.u32 	%r81, %r5, %r11;
	cvt.rzi.ftz.u32.f32 	%r260, %f349;
	shr.u32 	%r261, %r260, 13;
	sub.u32 	%r262, %r81, %r260;
	sub.u32 	%r263, %r11, %r260;
	xor.b32 	%r264, %r262, %r261;
	shl.b32 	%r265, %r264, 8;
	sub.u32 	%r266, %r263, %r264;
	sub.u32 	%r267, %r260, %r264;
	xor.b32 	%r268, %r265, %r266;
	shr.u32 	%r269, %r268, 13;
	sub.u32 	%r270, %r267, %r268;
	sub.u32 	%r271, %r264, %r268;
	xor.b32 	%r272, %r269, %r270;
	shr.u32 	%r273, %r272, 12;
	sub.u32 	%r274, %r271, %r272;
	xor.b32 	%r275, %r273, %r274;
	shl.b32 	%r276, %r275, 16;
	sub.u32 	%r277, %r268, %r272;
	sub.u32 	%r278, %r277, %r275;
	xor.b32 	%r279, %r276, %r278;
	.loc	20	144	0
	sub.u32 	%r280, %r272, %r275;
	sub.u32 	%r281, %r280, %r279;
	shr.u32 	%r282, %r279, 5;
	xor.b32 	%r283, %r281, %r282;
	.loc	20	145	0
	sub.u32 	%r284, %r275, %r279;
	sub.u32 	%r285, %r284, %r283;
	shr.u32 	%r286, %r283, 3;
	xor.b32 	%r287, %r285, %r286;
	.loc	20	146	0
	sub.u32 	%r288, %r279, %r283;
	sub.u32 	%r289, %r288, %r287;
	shl.b32 	%r290, %r287, 10;
	xor.b32 	%r291, %r289, %r290;
	.loc	20	147	0
	sub.u32 	%r292, %r283, %r287;
	sub.u32 	%r293, %r292, %r291;
	shr.u32 	%r294, %r291, 15;
	xor.b32 	%r295, %r293, %r294;
	.loc	34	54	0
	mul.lo.u32 	%r296, %r295, 1103515245;
	mul.lo.u32 	%r297, %r295, -1029531031;
	add.u32 	%r298, %r296, 12345;
	sub.u32 	%r299, %r297, 740551042;
	shr.u32 	%r300, %r298, 16;
	shr.u32 	%r301, %r299, 16;
	and.b32 	%r302, %r300, 255;
	and.b32 	%r303, %r301, 255;
	shl.b32 	%r304, %r302, 7;
	xor.b32 	%r305, %r304, %r303;
	cvt.rn.f32.s32 	%f367, %r305;
	mov.f32 	%f368, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f369, %f367, %f368;
	mul.ftz.f32 	%f370, %f18, %f369;
	sub.ftz.f32 	%f371, %f370, %f348;
	mov.f32 	%f366, %f371;
	mov.f32 	%f361, %f371;
	mov.f32 	%f356, %f371;
$Lt_50_218882:
	.loc	34	57	0
	add.ftz.f32 	%f7, %f356, %f7;
	.loc	34	58	0
	add.ftz.f32 	%f6, %f361, %f6;
	.loc	34	59	0
	add.ftz.f32 	%f5, %f5, %f366;
	ld.param.f32 	%f372, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+12];
	mov.f32 	%f373, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p58, %f372, %f373;
	@!%p58 bra 	$Lt_50_15874;
	.loc	34	63	0
	mov.f32 	%f374, 0f00000000;   	// 0
	max.ftz.f32 	%f375, %f5, %f374;
	mov.f32 	%f376, 0f3f800000;   	// 1
	min.ftz.f32 	%f5, %f375, %f376;
	mov.f32 	%f377, 0f00000000;   	// 0
	max.ftz.f32 	%f378, %f6, %f377;
	mov.f32 	%f379, 0f3f800000;   	// 1
	min.ftz.f32 	%f6, %f378, %f379;
	mov.f32 	%f380, 0f00000000;   	// 0
	max.ftz.f32 	%f381, %f7, %f380;
	mov.f32 	%f382, 0f3f800000;   	// 1
	min.ftz.f32 	%f7, %f381, %f382;
	mov.f32 	%f383, 0f00000000;   	// 0
	max.ftz.f32 	%f384, %f8, %f383;
	mov.f32 	%f385, 0f3f800000;   	// 1
	min.ftz.f32 	%f9, %f384, %f385;
	bra.uni 	$Lt_50_15874;
$Lt_50_19458:
	.loc	38	99	0
	cvt.rn.f32.s32 	%f386, %r5;
	cvt.rn.f32.s32 	%f387, %r14;
	ld.param.f32 	%f388, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+20];
	mul.ftz.f32 	%f389, %f387, %f388;
	setp.lt.ftz.f32 	%p59, %f386, %f389;
	@!%p59 bra 	$Lt_50_15874;
	.loc	22	267	0
	ld.const.f32 	%f390, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f390, %f6;
	ld.const.f32 	%f391, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f392, %f391, %f6;
	ld.const.f32 	%f393, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f394, %f393, %f6;
	ld.const.f32 	%f395, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f395, %f7, %f31;
	ld.const.f32 	%f396, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f397, %f396, %f7, %f392;
	ld.const.f32 	%f398, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f399, %f398, %f7, %f394;
	ld.const.f32 	%f400, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f400, %f5, %f33;
	ld.const.f32 	%f401, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f402, %f401, %f5, %f397;
	ld.const.f32 	%f403, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f404, %f403, %f5, %f399;
	ld.param.f32 	%f405, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+4];
	ld.param.f32 	%f406, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+8];
	fma.rn.ftz.f32 	%f407, %f35, %f406, %f405;
	ld.param.f32 	%f408, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+16];
	mul.ftz.f32 	%f409, %f402, %f408;
	ld.param.f32 	%f410, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+12];
	mul.ftz.f32 	%f411, %f402, %f410;
	mul.ftz.f32 	%f412, %f404, %f410;
	sub.ftz.f32 	%f413, %f412, %f409;
	fma.rn.ftz.f32 	%f414, %f404, %f408, %f411;
	ld.const.f32 	%f415, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f416, %f415, %f413;
	ld.const.f32 	%f417, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f418, %f417, %f407, %f416;
	ld.const.f32 	%f419, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f420, %f419, %f414, %f418;
	.loc	22	268	0
	ld.const.f32 	%f421, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f422, %f421, %f413;
	ld.const.f32 	%f423, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f424, %f423, %f407, %f422;
	ld.const.f32 	%f425, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f426, %f425, %f414, %f424;
	.loc	35	56	0
	ld.const.f32 	%f427, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f428, %f427, %f413;
	ld.const.f32 	%f429, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f430, %f429, %f407, %f428;
	ld.const.f32 	%f431, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f5, %f431, %f414, %f430;
	mov.f32 	%f6, %f420;
	mov.f32 	%f7, %f426;
	bra.uni 	$Lt_50_15874;
$Lt_50_19714:
	.loc	36	46	0
	ld.const.f32 	%f432, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f31, %f432, %f6;
	ld.const.f32 	%f433, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f33, %f433, %f7, %f31;
	ld.const.f32 	%f434, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f35, %f434, %f5, %f33;
	ld.param.f32 	%f45, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+8];
	ld.param.f32 	%f435, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+20];
	sub.ftz.f32 	%f436, %f435, %f45;
	fma.rn.ftz.f32 	%f437, %f35, %f436, %f45;
	.loc	36	47	0
	ld.param.f32 	%f438, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+12];
	ld.param.f32 	%f439, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+24];
	sub.ftz.f32 	%f440, %f439, %f438;
	fma.rn.ftz.f32 	%f441, %f35, %f440, %f438;
	.loc	36	49	0
	ld.param.f32 	%f442, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+28];
	ld.param.f32 	%f18, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+4];
	ld.param.f32 	%f443, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter0+16];
	sub.ftz.f32 	%f444, %f443, %f18;
	fma.rn.ftz.f32 	%f445, %f35, %f444, %f18;
	sub.ftz.f32 	%f446, %f445, %f5;
	fma.rn.ftz.f32 	%f5, %f442, %f446, %f5;
	.loc	36	50	0
	sub.ftz.f32 	%f447, %f437, %f6;
	fma.rn.ftz.f32 	%f6, %f442, %f447, %f6;
	.loc	36	51	0
	sub.ftz.f32 	%f448, %f441, %f7;
	fma.rn.ftz.f32 	%f7, %f442, %f448, %f7;
	.loc	38	103	0
	bra.uni 	$Lt_50_15874;
$Lt_50_19970:
	.loc	37	41	0
	sub.s32 	%r306, %r13, %r11;
	sub.s32 	%r12, %r306, 1;
$Lt_50_15874:
	.loc	38	54	0
	ld.param.u32 	%r307, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+0];
	mov.u32 	%r308, 0;
	setp.eq.s32 	%p60, %r307, %r308;
	@%p60 bra 	$Lt_50_10498;
	mov.u32 	%r309, 1;
	setp.eq.s32 	%p61, %r307, %r309;
	@%p61 bra 	$Lt_50_11010;
	mov.u32 	%r310, 2;
	setp.eq.s32 	%p62, %r307, %r310;
	@%p62 bra 	$Lt_50_11266;
	mov.u32 	%r311, 3;
	setp.eq.s32 	%p63, %r307, %r311;
	@%p63 bra 	$Lt_50_11522;
	mov.u32 	%r312, 4;
	setp.eq.s32 	%p64, %r307, %r312;
	@%p64 bra 	$Lt_50_11778;
	mov.u32 	%r313, 5;
	setp.eq.s32 	%p65, %r307, %r313;
	@%p65 bra 	$Lt_50_12034;
	mov.u32 	%r314, 6;
	setp.eq.s32 	%p66, %r307, %r314;
	@%p66 bra 	$Lt_50_12290;
	mov.u32 	%r315, 7;
	setp.eq.s32 	%p67, %r307, %r315;
	@%p67 bra 	$Lt_50_12546;
	mov.u32 	%r316, 8;
	setp.eq.s32 	%p68, %r307, %r316;
	@%p68 bra 	$Lt_50_12802;
	mov.u32 	%r317, 9;
	setp.eq.s32 	%p69, %r307, %r317;
	@%p69 bra 	$Lt_50_13058;
	mov.u32 	%r318, 10;
	setp.eq.s32 	%p70, %r307, %r318;
	@%p70 bra 	$Lt_50_13314;
	mov.u32 	%r319, 11;
	setp.eq.s32 	%p71, %r307, %r319;
	@%p71 bra 	$Lt_50_13570;
	mov.u32 	%r320, 12;
	setp.eq.s32 	%p72, %r307, %r320;
	@%p72 bra 	$Lt_50_13826;
	mov.u32 	%r321, 13;
	setp.eq.s32 	%p73, %r307, %r321;
	@%p73 bra 	$Lt_50_14082;
	mov.u32 	%r322, 14;
	setp.eq.s32 	%p74, %r307, %r322;
	@%p74 bra 	$Lt_50_14338;
	mov.u32 	%r323, 15;
	setp.eq.s32 	%p75, %r307, %r323;
	@%p75 bra 	$Lt_50_14594;
	mov.u32 	%r324, 16;
	setp.eq.s32 	%p76, %r307, %r324;
	@%p76 bra 	$Lt_50_14850;
	bra.uni 	$Lt_50_10754;
$Lt_50_10498:
	.loc	21	42	0
	cvt.ftz.sat.f32.f32 	%f9, %f9;
	ld.param.f32 	%f449, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+8];
	mov.f32 	%f450, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p77, %f449, %f450;
	ld.param.f32 	%f451, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+12];
	mov.f32 	%f452, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p78, %f451, %f452;
	ld.param.f32 	%f453, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+4];
	ld.param.f32 	%f454, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+16];
	mov.f32 	%f455, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p79, %f454, %f455;
	@!%p79 bra 	$Lt_50_220674;
	.loc	21	45	0
	mov.f32 	%f456, %f453;
	mul.ftz.f32 	%f457, %f456, %f9;
	selp.f32 	%f458, %f456, %f457, %p77;
	.loc	21	53	0
	mov.f32 	%f459, 0f3f800000;   	// 1
	sub.ftz.f32 	%f460, %f459, %f458;
	selp.f32 	%f9, %f460, %f458, %p78;
	.loc	21	57	0
	mov.f32 	%f5, %f9;
	mov.f32 	%f6, %f9;
	mov.f32 	%f7, %f9;
	bra.uni 	$Lt_50_10754;
$Lt_50_220674:
	@!%p77 bra 	$Lt_50_221186;
	.loc	21	61	0
	mov.f32 	%f456, %f453;
	mov.f32 	%f461, 0f3f800000;   	// 1
	sub.ftz.f32 	%f462, %f461, %f456;
	selp.f32 	%f9, %f462, %f456, %p78;
	bra.uni 	$Lt_50_10754;
$Lt_50_221186:
	.loc	21	69	0
	mov.f32 	%f463, 0f3f800000;   	// 1
	sub.ftz.f32 	%f464, %f463, %f9;
	selp.f32 	%f465, %f464, %f9, %p78;
	.loc	21	73	0
	mul.ftz.f32 	%f9, %f465, %f453;
	bra.uni 	$Lt_50_10754;
$Lt_50_11010:
	.loc	22	267	0
	ld.const.f32 	%f466, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f466, %f6;
	ld.const.f32 	%f468, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f468, %f7, %f467;
	ld.const.f32 	%f470, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f470, %f5, %f469;
	ld.const.f32 	%f472, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f473, %f472, %f471;
	.loc	22	268	0
	ld.const.f32 	%f474, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f475, %f474, %f471;
	.loc	23	44	0
	ld.const.f32 	%f476, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f476, %f471;
	mov.f32 	%f6, %f473;
	mov.f32 	%f7, %f475;
	.loc	38	61	0
	bra.uni 	$Lt_50_10754;
$Lt_50_11266:
	.loc	38	63	0
	ld.param.f32 	%f477, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+20];
	ld.param.f32 	%f478, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+12];
	ld.param.f32 	%f479, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+4];
	mov.f32 	%f480, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p80, %f479, %f480;
	@!%p80 bra 	$Lt_50_221698;
	.loc	24	44	0
	ld.param.f32 	%f481, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+8];
	sub.ftz.f32 	%f482, %f7, %f478;
	fma.rn.ftz.f32 	%f7, %f481, %f482, %f477;
	.loc	24	45	0
	sub.ftz.f32 	%f483, %f6, %f478;
	fma.rn.ftz.f32 	%f6, %f481, %f483, %f477;
	.loc	24	46	0
	sub.ftz.f32 	%f484, %f5, %f478;
	fma.rn.ftz.f32 	%f5, %f481, %f484, %f477;
	bra.uni 	$Lt_50_10754;
$Lt_50_221698:
	.loc	24	50	0
	ld.param.f32 	%f485, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+24];
	setp.gt.ftz.f32 	%p81, %f7, %f478;
	selp.f32 	%f7, %f485, %f477, %p81;
	.loc	24	51	0
	setp.gt.ftz.f32 	%p82, %f6, %f478;
	selp.f32 	%f6, %f485, %f477, %p82;
	.loc	24	52	0
	setp.gt.ftz.f32 	%p83, %f5, %f478;
	selp.f32 	%f5, %f485, %f477, %p83;
	bra.uni 	$Lt_50_10754;
$Lt_50_11522:
	.loc	25	47	0
	ld.param.f32 	%f486, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+4];
	mul.ftz.f32 	%f5, %f486, %f5;
	ld.param.f32 	%f487, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+8];
	mul.ftz.f32 	%f6, %f487, %f6;
	ld.param.f32 	%f488, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+12];
	mul.ftz.f32 	%f7, %f488, %f7;
	.loc	38	67	0
	bra.uni 	$Lt_50_10754;
$Lt_50_11778:
	.loc	26	48	0
	ld.param.f32 	%f489, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+4];
	mov.f32 	%f490, 0f00000000;   	// 0
	setp.eq.ftz.f32 	%p84, %f489, %f490;
	ld.param.f32 	%f491, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+8];
	mov.f32 	%f492, 0f00000000;   	// 0
	max.ftz.f32 	%f493, %f5, %f492;
	mov.f32 	%f494, 0f3f800000;   	// 1
	min.ftz.f32 	%f495, %f493, %f494;
	ld.param.f32 	%f496, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+12];
	sub.ftz.f32 	%f497, %f495, %f496;
	abs.ftz.f32 	%f498, %f497;
	mov.f32 	%f499, 0f00000000;   	// 0
	max.ftz.f32 	%f500, %f6, %f499;
	mov.f32 	%f501, 0f3f800000;   	// 1
	min.ftz.f32 	%f502, %f500, %f501;
	ld.param.f32 	%f503, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+16];
	sub.ftz.f32 	%f504, %f502, %f503;
	abs.ftz.f32 	%f505, %f504;
	mov.f32 	%f506, 0f00000000;   	// 0
	max.ftz.f32 	%f507, %f7, %f506;
	mov.f32 	%f508, 0f3f800000;   	// 1
	min.ftz.f32 	%f509, %f507, %f508;
	ld.param.f32 	%f510, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+20];
	sub.ftz.f32 	%f511, %f509, %f510;
	abs.ftz.f32 	%f512, %f511;
	mov.f32 	%f513, 0f00000000;   	// 0
	max.ftz.f32 	%f514, %f9, %f513;
	mov.f32 	%f515, 0f3f800000;   	// 1
	min.ftz.f32 	%f516, %f514, %f515;
	sub.ftz.f32 	%f517, %f516, %f9;
	abs.ftz.f32 	%f518, %f517;
	max.ftz.f32 	%f519, %f512, %f518;
	max.ftz.f32 	%f520, %f505, %f519;
	max.ftz.f32 	%f521, %f498, %f520;
	setp.ge.ftz.f32 	%p85, %f491, %f521;
	xor.pred 	%p86, %p84, %p85;
	@!%p86 bra 	$Lt_50_10754;
	.loc	22	267	0
	ld.const.f32 	%f522, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f522, %f6;
	ld.const.f32 	%f523, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f523, %f7, %f467;
	ld.const.f32 	%f524, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f524, %f5, %f469;
	ld.const.f32 	%f525, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f526, %f525, %f471;
	.loc	22	268	0
	ld.const.f32 	%f527, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f528, %f527, %f471;
	.loc	23	44	0
	ld.const.f32 	%f529, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f529, %f471;
	mov.f32 	%f6, %f526;
	mov.f32 	%f7, %f528;
	bra.uni 	$Lt_50_10754;
$Lt_50_12034:
	.loc	27	48	0
	ld.param.f32 	%f530, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+8];
	mov.f32 	%f531, 0f00000000;   	// 0
	max.ftz.f32 	%f532, %f5, %f531;
	mov.f32 	%f533, 0f3f800000;   	// 1
	min.ftz.f32 	%f534, %f532, %f533;
	ld.param.f32 	%f535, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+12];
	sub.ftz.f32 	%f536, %f534, %f535;
	abs.ftz.f32 	%f537, %f536;
	mov.f32 	%f538, 0f00000000;   	// 0
	max.ftz.f32 	%f539, %f6, %f538;
	mov.f32 	%f540, 0f3f800000;   	// 1
	min.ftz.f32 	%f541, %f539, %f540;
	ld.param.f32 	%f542, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+16];
	sub.ftz.f32 	%f543, %f541, %f542;
	abs.ftz.f32 	%f544, %f543;
	mov.f32 	%f545, 0f00000000;   	// 0
	max.ftz.f32 	%f546, %f7, %f545;
	mov.f32 	%f547, 0f3f800000;   	// 1
	min.ftz.f32 	%f548, %f546, %f547;
	ld.param.f32 	%f549, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+20];
	sub.ftz.f32 	%f550, %f548, %f549;
	abs.ftz.f32 	%f551, %f550;
	mov.f32 	%f552, 0f00000000;   	// 0
	max.ftz.f32 	%f553, %f9, %f552;
	mov.f32 	%f554, 0f3f800000;   	// 1
	min.ftz.f32 	%f555, %f553, %f554;
	sub.ftz.f32 	%f556, %f555, %f9;
	abs.ftz.f32 	%f557, %f556;
	max.ftz.f32 	%f558, %f551, %f557;
	max.ftz.f32 	%f559, %f544, %f558;
	max.ftz.f32 	%f560, %f537, %f559;
	setp.ge.ftz.f32 	%p87, %f530, %f560;
	@!%p87 bra 	$Lt_50_10754;
	.loc	27	51	0
	ld.param.f32 	%f561, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+24];
	mov.f32 	%f126, %f561;
	ld.param.f32 	%f562, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+28];
	mov.f32 	%f128, %f562;
	ld.param.f32 	%f563, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+32];
	mov.f32 	%f130, %f563;
	ld.param.f32 	%f564, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+4];
	mov.f32 	%f565, 0f00000000;   	// 0
	setp.eq.ftz.f32 	%p88, %f564, %f565;
	@!%p88 bra 	$Lt_50_222978;
	.loc	27	60	0
	ld.const.f32 	%f566, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f566, %f6;
	ld.const.f32 	%f567, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f567, %f7, %f467;
	ld.const.f32 	%f568, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f568, %f5, %f469;
	mul.ftz.f32 	%f126, %f471, %f561;
	.loc	27	61	0
	mul.ftz.f32 	%f128, %f471, %f562;
	.loc	27	62	0
	mul.ftz.f32 	%f130, %f471, %f563;
$Lt_50_222978:
	.loc	27	65	0
	mov.f32 	%f5, %f126;
	mov.f32 	%f6, %f128;
	mov.f32 	%f7, %f130;
	bra.uni 	$Lt_50_10754;
$Lt_50_12290:
	.loc	28	47	0
	sub.s32 	%r325, %r13, %r12;
	sub.s32 	%r326, %r14, %r6;
	cvt.rn.f32.s32 	%f569, %r6;
	cvt.rn.f32.s32 	%f570, %r12;
	cvt.rn.f32.s32 	%f138, %r325;
	cvt.rn.f32.s32 	%f139, %r326;
	ld.param.f32 	%f456, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+4];
	sub.ftz.f32 	%f571, %f456, %f569;
	sub.ftz.f32 	%f572, %f456, %f570;
	sub.ftz.f32 	%f573, %f456, %f138;
	sub.ftz.f32 	%f574, %f456, %f139;
	cvt.rzi.ftz.s32.f32 	%r327, %f571;
	cvt.rzi.ftz.s32.f32 	%r328, %f572;
	cvt.rzi.ftz.s32.f32 	%r329, %f573;
	cvt.rzi.ftz.s32.f32 	%r330, %f574;
	max.s32 	%r331, %r327, %r328;
	max.s32 	%r332, %r330, %r331;
	max.s32 	%r333, %r329, %r332;
	mov.u32 	%r334, 0;
	setp.le.s32 	%p89, %r333, %r334;
	@%p89 bra 	$Lt_50_10754;
	.loc	28	51	0
	cvt.rn.f32.s32 	%f575, %r333;
	sub.ftz.f32 	%f576, %f456, %f575;
	div.approx.ftz.f32 	%f577, %f576, %f456;
	mul.ftz.f32 	%f9, %f9, %f577;
	bra.uni 	$Lt_50_10754;
$Lt_50_12546:
	.loc	30	50	0
	ld.const.f32 	%f578, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f578, %f6;
	ld.param.f32 	%f579, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+12];
	ld.param.f32 	%f481, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+8];
	sub.ftz.f32 	%f580, %f579, %f481;
	ld.const.f32 	%f581, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f581, %f7, %f467;
	ld.param.f32 	%f582, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+16];
	mul.ftz.f32 	%f583, %f582, %f580;
	ld.const.f32 	%f584, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f584, %f5, %f469;
	add.ftz.f32 	%f585, %f583, %f579;
	sub.ftz.f32 	%f586, %f481, %f583;
	mov.f32 	%f587, 0f00000000;   	// 0
	max.ftz.f32 	%f588, %f585, %f587;
	mov.f32 	%f589, 0f00000000;   	// 0
	max.ftz.f32 	%f590, %f586, %f589;
	mov.f32 	%f591, 0f3f800000;   	// 1
	min.ftz.f32 	%f592, %f588, %f591;
	mov.f32 	%f593, 0f3f800000;   	// 1
	min.ftz.f32 	%f594, %f590, %f593;
	set.gt.ftz.u32.f32 	%r335, %f594, %f471;
	neg.s32 	%r336, %r335;
	set.le.ftz.u32.f32 	%r337, %f592, %f471;
	neg.s32 	%r338, %r337;
	or.b32 	%r339, %r336, %r338;
	mov.u32 	%r340, 0;
	setp.eq.s32 	%p90, %r339, %r340;
	@%p90 bra 	$Lt_50_224258;
	mov.f32 	%f164, 0f00000000;   	// 0
	bra.uni 	$Lt_50_225026;
$Lt_50_224258:
	add.ftz.f32 	%f595, %f583, %f481;
	mov.f32 	%f596, 0f00000000;   	// 0
	max.ftz.f32 	%f597, %f595, %f596;
	mov.f32 	%f598, 0f3f800000;   	// 1
	min.ftz.f32 	%f599, %f597, %f598;
	set.le.ftz.u32.f32 	%r341, %f599, %f471;
	neg.s32 	%r342, %r341;
	sub.ftz.f32 	%f600, %f579, %f583;
	mov.f32 	%f601, 0f00000000;   	// 0
	max.ftz.f32 	%f602, %f600, %f601;
	mov.f32 	%f603, 0f3f800000;   	// 1
	min.ftz.f32 	%f604, %f602, %f603;
	set.lt.ftz.u32.f32 	%r343, %f471, %f604;
	neg.s32 	%r344, %r343;
	and.b32 	%r345, %r342, %r344;
	mov.u32 	%r346, 0;
	setp.eq.s32 	%p91, %r345, %r346;
	@%p91 bra 	$Lt_50_224770;
	mov.f32 	%f164, 0f3f800000;   	// 1
	bra.uni 	$Lt_50_225026;
$Lt_50_224770:
	add.ftz.f32 	%f605, %f583, %f583;
	setp.gt.ftz.f32 	%p92, %f599, %f471;
	@!%p92 bra 	$Lt_50_225282;
	.loc	30	62	0
	sub.ftz.f32 	%f606, %f471, %f594;
	div.approx.ftz.f32 	%f164, %f606, %f605;
	bra.uni 	$Lt_50_225026;
$Lt_50_225282:
	.loc	30	66	0
	sub.ftz.f32 	%f607, %f592, %f471;
	div.approx.ftz.f32 	%f164, %f607, %f605;
$Lt_50_225026:
$Lt_50_224514:
$Lt_50_224002:
	.loc	30	69	0
	mov.f32 	%f608, 0f3f800000;   	// 1
	sub.ftz.f32 	%f609, %f608, %f164;
	ld.param.f32 	%f610, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+4];
	mov.f32 	%f611, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p93, %f610, %f611;
	selp.f32 	%f164, %f609, %f164, %p93;
	.loc	30	77	0
	ld.const.f32 	%f612, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f612, %f164;
	ld.const.f32 	%f613, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f6, %f613, %f164;
	ld.const.f32 	%f614, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f7, %f614, %f164;
	.loc	38	79	0
	bra.uni 	$Lt_50_10754;
$Lt_50_12802:
	.loc	38	80	0
	ld.param.f32 	%f615, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+44];
	mov.f32 	%f616, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p94, %f615, %f616;
	@!%p94 bra 	$L_50_202242;
	cvt.rn.f32.s32 	%f617, %r6;
	cvt.rn.f32.s32 	%f618, %r14;
	mul.ftz.f32 	%f619, %f618, %f615;
	setp.lt.ftz.f32 	%p95, %f617, %f619;
	@%p95 bra 	$L_50_201986;
$L_50_202242:
	mov.f32 	%f620, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p96, %f615, %f620;
	@!%p96 bra 	$Lt_50_10754;
	cvt.rn.f32.s32 	%f621, %r12;
	cvt.rn.f32.s32 	%f622, %r13;
	mul.ftz.f32 	%f623, %f622, %f615;
	neg.ftz.f32 	%f624, %f623;
	setp.lt.ftz.f32 	%p97, %f621, %f624;
	@!%p97 bra 	$Lt_50_10754;
$L_50_201986:
	.loc	31	47	0
	ld.const.f32 	%f625, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f626, %f625, %f6;
	ld.const.f32 	%f627, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f628, %f627, %f7, %f626;
	ld.const.f32 	%f629, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f629, %f5, %f628;
	mov.f32 	%f200, %f471;
	ld.param.f32 	%f481, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+8];
	ld.param.f32 	%f630, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+4];
	sub.ftz.f32 	%f631, %f630, %f481;
	ld.param.f32 	%f632, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+16];
	ld.param.f32 	%f633, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+12];
	sub.ftz.f32 	%f634, %f633, %f632;
	mov.f32 	%f635, 0f3f800000;   	// 1
	set.neu.ftz.u32.f32 	%r347, %f634, %f635;
	neg.s32 	%r348, %r347;
	mov.f32 	%f636, 0f3f800000;   	// 1
	set.neu.ftz.u32.f32 	%r349, %f631, %f636;
	neg.s32 	%r350, %r349;
	or.b32 	%r351, %r348, %r350;
	mov.u32 	%r352, 0;
	setp.eq.s32 	%p98, %r351, %r352;
	@%p98 bra 	$Lt_50_225538;
	.loc	20	143	0
	mov.s32 	%r353, 1;
	sub.s32 	%r79, %r353, %r5;
	shr.u32 	%r80, %r11, 13;
	sub.u32 	%r81, %r5, %r11;
	sub.u32 	%r82, %r79, %r11;
	xor.b32 	%r83, %r82, %r80;
	shl.b32 	%r84, %r83, 8;
	sub.u32 	%r85, %r81, %r83;
	sub.u32 	%r86, %r11, %r83;
	xor.b32 	%r87, %r85, %r84;
	shr.u32 	%r88, %r87, 13;
	sub.u32 	%r89, %r86, %r87;
	sub.u32 	%r90, %r83, %r87;
	xor.b32 	%r91, %r89, %r88;
	shr.u32 	%r92, %r91, 12;
	sub.u32 	%r93, %r90, %r91;
	xor.b32 	%r94, %r93, %r92;
	sub.u32 	%r354, %r87, %r91;
	sub.u32 	%r96, %r354, %r94;
	shl.b32 	%r355, %r94, 16;
	xor.b32 	%r98, %r96, %r355;
	.loc	20	144	0
	sub.u32 	%r99, %r91, %r94;
	sub.u32 	%r100, %r99, %r98;
	shr.u32 	%r356, %r98, 5;
	xor.b32 	%r102, %r100, %r356;
	.loc	20	145	0
	sub.u32 	%r103, %r94, %r98;
	sub.u32 	%r104, %r103, %r102;
	shr.u32 	%r357, %r102, 3;
	xor.b32 	%r106, %r104, %r357;
	.loc	20	146	0
	sub.u32 	%r107, %r98, %r102;
	sub.u32 	%r108, %r107, %r106;
	shl.b32 	%r358, %r106, 10;
	xor.b32 	%r110, %r108, %r358;
	.loc	20	147	0
	sub.u32 	%r111, %r102, %r106;
	sub.u32 	%r112, %r111, %r110;
	shr.u32 	%r359, %r110, 15;
	xor.b32 	%r114, %r112, %r359;
	.loc	31	57	0
	mov.f32 	%f637, 0f3b270d73;   	// 0.00254902
	mul.lo.u32 	%r360, %r114, 1103515245;
	add.u32 	%r361, %r360, 12345;
	shr.u32 	%r362, %r361, 16;
	and.b32 	%r363, %r362, 255;
	shl.b32 	%r364, %r363, 7;
	mul.lo.u32 	%r365, %r114, -1029531031;
	sub.u32 	%r366, %r365, 740551042;
	shr.u32 	%r367, %r366, 16;
	and.b32 	%r368, %r367, 255;
	xor.b32 	%r369, %r364, %r368;
	cvt.rn.f32.s32 	%f638, %r369;
	mov.f32 	%f639, 0f467ffe00;   	// 16383.5
	div.approx.ftz.f32 	%f640, %f638, %f639;
	mov.f32 	%f641, 0fbf800000;   	// -1
	add.ftz.f32 	%f642, %f640, %f641;
	fma.rn.ftz.f32 	%f200, %f637, %f642, %f471;
$Lt_50_225538:
	sub.ftz.f32 	%f643, %f200, %f481;
	ld.param.f32 	%f644, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+20];
	mov.f32 	%f645, 0f3f800000;   	// 1
	setp.neu.ftz.f32 	%p99, %f644, %f645;
	@!%p99 bra 	$Lt_50_226306;
	mov.f32 	%f646, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p100, %f643, %f646;
	@!%p100 bra 	$Lt_50_226818;
	.loc	31	66	0
	mov.f32 	%f200, %f632;
	bra.uni 	$Lt_50_226050;
$Lt_50_226818:
	.loc	31	70	0
	rcp.approx.ftz.f32 	%f647, %f631;
	mul.ftz.f32 	%f648, %f643, %f647;
	lg2.approx.ftz.f32 	%f649, %f648;
	mul.ftz.f32 	%f650, %f644, %f649;
	ex2.approx.ftz.f32 	%f651, %f650;
	fma.rn.ftz.f32 	%f200, %f634, %f651, %f632;
	bra.uni 	$Lt_50_226050;
$Lt_50_226306:
	.loc	31	77	0
	rcp.approx.ftz.f32 	%f652, %f631;
	mul.ftz.f32 	%f653, %f643, %f652;
	fma.rn.ftz.f32 	%f200, %f634, %f653, %f632;
$Lt_50_226050:
	.loc	22	267	0
	ld.const.f32 	%f654, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f655, %f654, %f6;
	ld.const.f32 	%f656, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f657, %f656, %f6;
	ld.param.f32 	%f658, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+28];
	sin.approx.ftz.f32 	%f659, %f658;
	cos.approx.ftz.f32 	%f660, %f658;
	ld.const.f32 	%f661, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f662, %f661, %f7, %f655;
	ld.const.f32 	%f663, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f664, %f663, %f7, %f657;
	ld.const.f32 	%f665, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f666, %f665, %f5, %f662;
	ld.const.f32 	%f667, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f668, %f667, %f5, %f664;
	mul.ftz.f32 	%f669, %f659, %f666;
	mul.ftz.f32 	%f670, %f660, %f666;
	mul.ftz.f32 	%f671, %f668, %f660;
	sub.ftz.f32 	%f672, %f671, %f669;
	fma.rn.ftz.f32 	%f673, %f668, %f659, %f670;
	ld.param.f32 	%f674, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+32];
	sub.ftz.f32 	%f675, %f674, %f672;
	ld.param.f32 	%f676, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+36];
	sub.ftz.f32 	%f677, %f676, %f673;
	ld.param.f32 	%f678, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+40];
	fma.rn.ftz.f32 	%f679, %f678, %f675, %f672;
	fma.rn.ftz.f32 	%f680, %f678, %f677, %f673;
	ld.param.f32 	%f681, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+24];
	mul.ftz.f32 	%f682, %f679, %f681;
	mul.ftz.f32 	%f683, %f680, %f681;
	ld.const.f32 	%f684, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f685, %f684, %f682;
	ld.const.f32 	%f686, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f687, %f686, %f200, %f685;
	ld.const.f32 	%f688, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f260, %f688, %f683, %f687;
	.loc	22	268	0
	ld.const.f32 	%f689, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f690, %f689, %f682;
	ld.const.f32 	%f691, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f692, %f691, %f200, %f690;
	ld.const.f32 	%f693, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f266, %f693, %f683, %f692;
	.loc	31	92	0
	ld.const.f32 	%f694, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f695, %f694, %f682;
	ld.const.f32 	%f696, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f697, %f696, %f200, %f695;
	ld.const.f32 	%f698, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f5, %f698, %f683, %f697;
	mov.f32 	%f6, %f260;
	mov.f32 	%f7, %f266;
	bra.uni 	$Lt_50_10754;
$Lt_50_13058:
	.loc	38	83	0
	ld.param.f32 	%f699, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+44];
	mov.f32 	%f700, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p101, %f699, %f700;
	@!%p101 bra 	$L_50_203010;
	cvt.rn.f32.s32 	%f701, %r6;
	cvt.rn.f32.s32 	%f702, %r14;
	mul.ftz.f32 	%f703, %f702, %f699;
	setp.lt.ftz.f32 	%p102, %f701, %f703;
	@%p102 bra 	$L_50_202754;
$L_50_203010:
	mov.f32 	%f704, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p103, %f699, %f704;
	@!%p103 bra 	$Lt_50_10754;
	cvt.rn.f32.s32 	%f705, %r12;
	cvt.rn.f32.s32 	%f706, %r13;
	mul.ftz.f32 	%f707, %f706, %f699;
	neg.ftz.f32 	%f708, %f707;
	setp.lt.ftz.f32 	%p104, %f705, %f708;
	@!%p104 bra 	$Lt_50_10754;
$L_50_202754:
	.loc	31	110	0
	ld.const.f32 	%f709, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f709, %f6;
	ld.param.f32 	%f481, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+8];
	ld.param.f32 	%f710, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+4];
	sub.ftz.f32 	%f711, %f710, %f481;
	ld.param.f32 	%f712, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+16];
	ld.param.f32 	%f713, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+12];
	sub.ftz.f32 	%f714, %f713, %f712;
	ld.const.f32 	%f715, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f715, %f7, %f467;
	ld.const.f32 	%f716, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f716, %f5, %f469;
	sub.ftz.f32 	%f717, %f471, %f481;
	ld.param.f32 	%f718, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+20];
	mov.f32 	%f719, 0f3f800000;   	// 1
	setp.neu.ftz.f32 	%p105, %f718, %f719;
	@!%p105 bra 	$Lt_50_227330;
	.loc	42	523	0
	mov.f32 	%f720, 0f00000000;   	// 0
	max.ftz.f32 	%f721, %f717, %f720;
	div.approx.ftz.f32 	%f722, %f721, %f711;
	lg2.approx.ftz.f32 	%f723, %f722;
	mul.ftz.f32 	%f724, %f718, %f723;
	ex2.approx.ftz.f32 	%f298, %f724;
	.loc	31	120	0
	fma.rn.ftz.f32 	%f299, %f714, %f298, %f712;
	bra.uni 	$Lt_50_227074;
$Lt_50_227330:
	.loc	31	129	0
	div.approx.ftz.f32 	%f725, %f717, %f711;
	fma.rn.ftz.f32 	%f299, %f714, %f725, %f712;
$Lt_50_227074:
	.loc	31	135	0
	ld.const.f32 	%f726, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f726, %f299;
	ld.const.f32 	%f727, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f6, %f727, %f299;
	ld.const.f32 	%f728, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f7, %f728, %f299;
	bra.uni 	$Lt_50_10754;
$Lt_50_13314:
	.loc	38	86	0
	ld.param.f32 	%f729, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+44];
	mov.f32 	%f730, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p106, %f729, %f730;
	@!%p106 bra 	$L_50_203778;
	cvt.rn.f32.s32 	%f731, %r6;
	cvt.rn.f32.s32 	%f732, %r14;
	mul.ftz.f32 	%f733, %f732, %f729;
	setp.lt.ftz.f32 	%p107, %f731, %f733;
	@%p107 bra 	$L_50_203522;
$L_50_203778:
	mov.f32 	%f734, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p108, %f729, %f734;
	@!%p108 bra 	$Lt_50_10754;
	cvt.rn.f32.s32 	%f735, %r12;
	cvt.rn.f32.s32 	%f736, %r13;
	mul.ftz.f32 	%f737, %f736, %f729;
	neg.ftz.f32 	%f738, %f737;
	setp.lt.ftz.f32 	%p109, %f735, %f738;
	@!%p109 bra 	$Lt_50_10754;
$L_50_203522:
	.loc	31	160	0
	ld.const.f32 	%f5, [k601YPbPr_To_RGB32f+24];
	ld.const.f32 	%f6, [k601YPbPr_To_RGB32f+12];
	ld.const.f32 	%f7, [k601YPbPr_To_RGB32f+0];
	bra.uni 	$Lt_50_10754;
$Lt_50_13570:
	.loc	32	42	0
	ld.param.f32 	%f739, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+4];
	mov.f32 	%f740, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p110, %f5, %f740;
	@!%p110 bra 	$Lt_50_227842;
	.loc	22	292	0
	mov.f32 	%f456, %f739;
	lg2.approx.ftz.f32 	%f741, %f5;
	mul.ftz.f32 	%f742, %f456, %f741;
	ex2.approx.ftz.f32 	%f743, %f742;
	bra.uni 	$Lt_50_227586;
$Lt_50_227842:
	mov.f32 	%f456, %f739;
	neg.ftz.f32 	%f744, %f5;
	lg2.approx.ftz.f32 	%f745, %f744;
	mul.ftz.f32 	%f746, %f456, %f745;
	ex2.approx.ftz.f32 	%f747, %f746;
	neg.ftz.f32 	%f743, %f747;
$Lt_50_227586:
	mov.f32 	%f748, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p111, %f6, %f748;
	@!%p111 bra 	$Lt_50_228354;
	.loc	22	293	0
	lg2.approx.ftz.f32 	%f749, %f6;
	mul.ftz.f32 	%f750, %f456, %f749;
	ex2.approx.ftz.f32 	%f751, %f750;
	bra.uni 	$Lt_50_228098;
$Lt_50_228354:
	neg.ftz.f32 	%f752, %f6;
	lg2.approx.ftz.f32 	%f753, %f752;
	mul.ftz.f32 	%f754, %f456, %f753;
	ex2.approx.ftz.f32 	%f755, %f754;
	neg.ftz.f32 	%f751, %f755;
$Lt_50_228098:
	mov.f32 	%f756, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p112, %f7, %f756;
	@!%p112 bra 	$Lt_50_228866;
	.loc	22	294	0
	lg2.approx.ftz.f32 	%f757, %f7;
	mul.ftz.f32 	%f758, %f456, %f757;
	ex2.approx.ftz.f32 	%f759, %f758;
	bra.uni 	$Lt_50_228610;
$Lt_50_228866:
	neg.ftz.f32 	%f760, %f7;
	lg2.approx.ftz.f32 	%f761, %f760;
	mul.ftz.f32 	%f762, %f456, %f761;
	ex2.approx.ftz.f32 	%f763, %f762;
	neg.ftz.f32 	%f759, %f763;
$Lt_50_228610:
	mov.f32 	%f764, 0f00000000;   	// 0
	setp.ge.ftz.f32 	%p113, %f9, %f764;
	@!%p113 bra 	$Lt_50_229378;
	.loc	22	295	0
	lg2.approx.ftz.f32 	%f765, %f9;
	mul.ftz.f32 	%f766, %f456, %f765;
	ex2.approx.ftz.f32 	%f767, %f766;
	bra.uni 	$Lt_50_229122;
$Lt_50_229378:
	neg.ftz.f32 	%f768, %f9;
	lg2.approx.ftz.f32 	%f769, %f768;
	mul.ftz.f32 	%f770, %f456, %f769;
	ex2.approx.ftz.f32 	%f771, %f770;
	neg.ftz.f32 	%f767, %f771;
$Lt_50_229122:
	.loc	32	42	0
	mov.f32 	%f5, %f743;
	mov.f32 	%f6, %f751;
	mov.f32 	%f7, %f759;
	mov.f32 	%f9, %f767;
	.loc	38	91	0
	bra.uni 	$Lt_50_10754;
$Lt_50_13826:
	.loc	33	41	0
	sub.s32 	%r370, %r14, %r6;
	sub.s32 	%r6, %r370, 1;
	.loc	38	94	0
	bra.uni 	$Lt_50_10754;
$Lt_50_14082:
	.loc	38	96	0
	ld.param.f32 	%f456, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+4];
	mov.f32 	%f772, 0f3f000000;   	// 0.5
	mul.ftz.f32 	%f773, %f456, %f772;
	ld.param.f32 	%f774, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+16];
	ld.param.f32 	%f775, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+8];
	mov.f32 	%f776, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p114, %f775, %f776;
	@!%p114 bra 	$Lt_50_229890;
	.loc	20	143	0
	mul.lo.s32 	%r126, %r6, 3;
	cvt.rzi.ftz.u32.f32 	%r371, %f774;
	sub.u32 	%r128, %r126, %r12;
	shr.u32 	%r372, %r371, 13;
	sub.u32 	%r373, %r12, %r371;
	sub.u32 	%r374, %r128, %r371;
	xor.b32 	%r375, %r374, %r372;
	shl.b32 	%r376, %r375, 8;
	sub.u32 	%r377, %r373, %r375;
	sub.u32 	%r378, %r371, %r375;
	xor.b32 	%r379, %r376, %r377;
	shr.u32 	%r380, %r379, 13;
	sub.u32 	%r381, %r378, %r379;
	sub.u32 	%r382, %r375, %r379;
	xor.b32 	%r383, %r380, %r381;
	shr.u32 	%r384, %r383, 12;
	sub.u32 	%r385, %r382, %r383;
	xor.b32 	%r386, %r384, %r385;
	shl.b32 	%r387, %r386, 16;
	sub.u32 	%r388, %r379, %r383;
	sub.u32 	%r389, %r388, %r386;
	xor.b32 	%r390, %r387, %r389;
	.loc	20	144	0
	sub.u32 	%r391, %r383, %r386;
	sub.u32 	%r392, %r391, %r390;
	shr.u32 	%r393, %r390, 5;
	xor.b32 	%r394, %r392, %r393;
	.loc	20	145	0
	sub.u32 	%r395, %r386, %r390;
	sub.u32 	%r396, %r395, %r394;
	shr.u32 	%r397, %r394, 3;
	xor.b32 	%r155, %r396, %r397;
	.loc	20	146	0
	sub.u32 	%r398, %r390, %r394;
	sub.u32 	%r399, %r398, %r155;
	shl.b32 	%r400, %r155, 10;
	xor.b32 	%r159, %r399, %r400;
	.loc	20	147	0
	sub.u32 	%r401, %r394, %r155;
	sub.u32 	%r402, %r401, %r159;
	shr.u32 	%r403, %r159, 15;
	xor.b32 	%r163, %r402, %r403;
	.loc	34	48	0
	mul.lo.u32 	%r404, %r163, 1103515245;
	add.u32 	%r405, %r404, 12345;
	shr.u32 	%r406, %r405, 16;
	and.b32 	%r407, %r406, 255;
	shl.b32 	%r408, %r407, 7;
	mul.lo.u32 	%r409, %r163, -1029531031;
	sub.u32 	%r410, %r409, 740551042;
	shr.u32 	%r411, %r410, 16;
	and.b32 	%r412, %r411, 255;
	xor.b32 	%r413, %r408, %r412;
	cvt.rn.f32.s32 	%f777, %r413;
	mov.f32 	%f778, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f779, %f777, %f778;
	mul.ftz.f32 	%f780, %f456, %f779;
	sub.ftz.f32 	%f356, %f780, %f773;
	.loc	20	143	0
	add.u32 	%r414, %r374, 1;
	xor.b32 	%r415, %r414, %r372;
	shl.b32 	%r416, %r415, 8;
	sub.u32 	%r417, %r373, %r415;
	sub.u32 	%r418, %r371, %r415;
	xor.b32 	%r419, %r416, %r417;
	shr.u32 	%r420, %r419, 13;
	sub.u32 	%r421, %r418, %r419;
	sub.u32 	%r422, %r415, %r419;
	xor.b32 	%r423, %r420, %r421;
	shr.u32 	%r424, %r423, 12;
	sub.u32 	%r425, %r422, %r423;
	xor.b32 	%r426, %r424, %r425;
	sub.u32 	%r427, %r419, %r423;
	sub.u32 	%r428, %r427, %r426;
	shl.b32 	%r429, %r426, 16;
	xor.b32 	%r430, %r428, %r429;
	.loc	20	144	0
	sub.u32 	%r431, %r423, %r426;
	sub.u32 	%r432, %r431, %r430;
	shr.u32 	%r433, %r430, 5;
	xor.b32 	%r434, %r432, %r433;
	.loc	20	145	0
	sub.u32 	%r435, %r426, %r430;
	sub.u32 	%r436, %r435, %r434;
	shr.u32 	%r437, %r434, 3;
	xor.b32 	%r198, %r436, %r437;
	.loc	20	146	0
	sub.u32 	%r438, %r430, %r434;
	sub.u32 	%r439, %r438, %r198;
	shl.b32 	%r440, %r198, 10;
	xor.b32 	%r202, %r439, %r440;
	.loc	20	147	0
	sub.u32 	%r441, %r434, %r198;
	sub.u32 	%r442, %r441, %r202;
	shr.u32 	%r443, %r202, 15;
	xor.b32 	%r206, %r442, %r443;
	.loc	34	49	0
	mul.lo.u32 	%r444, %r206, 1103515245;
	add.u32 	%r445, %r444, 12345;
	shr.u32 	%r446, %r445, 16;
	and.b32 	%r447, %r446, 255;
	shl.b32 	%r448, %r447, 7;
	mul.lo.u32 	%r449, %r206, -1029531031;
	sub.u32 	%r450, %r449, 740551042;
	shr.u32 	%r451, %r450, 16;
	and.b32 	%r452, %r451, 255;
	xor.b32 	%r453, %r448, %r452;
	cvt.rn.f32.s32 	%f781, %r453;
	mov.f32 	%f782, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f783, %f781, %f782;
	mul.ftz.f32 	%f784, %f456, %f783;
	sub.ftz.f32 	%f361, %f784, %f773;
	.loc	20	143	0
	add.u32 	%r454, %r374, 2;
	xor.b32 	%r455, %r454, %r372;
	shl.b32 	%r456, %r455, 8;
	sub.u32 	%r457, %r373, %r455;
	sub.u32 	%r458, %r371, %r455;
	xor.b32 	%r459, %r456, %r457;
	shr.u32 	%r460, %r459, 13;
	sub.u32 	%r461, %r458, %r459;
	sub.u32 	%r462, %r455, %r459;
	xor.b32 	%r463, %r460, %r461;
	shr.u32 	%r464, %r463, 12;
	sub.u32 	%r465, %r462, %r463;
	xor.b32 	%r466, %r464, %r465;
	sub.u32 	%r467, %r459, %r463;
	sub.u32 	%r468, %r467, %r466;
	shl.b32 	%r469, %r466, 16;
	xor.b32 	%r470, %r468, %r469;
	.loc	20	144	0
	sub.u32 	%r471, %r463, %r466;
	sub.u32 	%r472, %r471, %r470;
	shr.u32 	%r473, %r470, 5;
	xor.b32 	%r474, %r472, %r473;
	.loc	20	145	0
	sub.u32 	%r475, %r466, %r470;
	sub.u32 	%r476, %r475, %r474;
	shr.u32 	%r477, %r474, 3;
	xor.b32 	%r241, %r476, %r477;
	.loc	20	146	0
	sub.u32 	%r478, %r470, %r474;
	sub.u32 	%r479, %r478, %r241;
	shl.b32 	%r480, %r241, 10;
	xor.b32 	%r245, %r479, %r480;
	.loc	20	147	0
	sub.u32 	%r481, %r474, %r241;
	sub.u32 	%r482, %r481, %r245;
	shr.u32 	%r483, %r245, 15;
	xor.b32 	%r249, %r482, %r483;
	.loc	34	50	0
	mul.lo.u32 	%r484, %r249, 1103515245;
	add.u32 	%r485, %r484, 12345;
	shr.u32 	%r486, %r485, 16;
	and.b32 	%r487, %r486, 255;
	shl.b32 	%r488, %r487, 7;
	mul.lo.u32 	%r489, %r249, -1029531031;
	sub.u32 	%r490, %r489, 740551042;
	shr.u32 	%r491, %r490, 16;
	and.b32 	%r492, %r491, 255;
	xor.b32 	%r493, %r488, %r492;
	cvt.rn.f32.s32 	%f785, %r493;
	mov.f32 	%f786, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f787, %f785, %f786;
	mul.ftz.f32 	%f788, %f456, %f787;
	sub.ftz.f32 	%f366, %f788, %f773;
	bra.uni 	$Lt_50_229634;
$Lt_50_229890:
	.loc	20	143	0
	sub.u32 	%r494, %r6, %r12;
	cvt.rzi.ftz.u32.f32 	%r495, %f774;
	shr.u32 	%r496, %r495, 13;
	sub.u32 	%r497, %r494, %r495;
	sub.u32 	%r498, %r12, %r495;
	xor.b32 	%r499, %r497, %r496;
	shl.b32 	%r500, %r499, 8;
	sub.u32 	%r501, %r498, %r499;
	sub.u32 	%r502, %r495, %r499;
	xor.b32 	%r503, %r500, %r501;
	shr.u32 	%r504, %r503, 13;
	sub.u32 	%r505, %r502, %r503;
	sub.u32 	%r506, %r499, %r503;
	xor.b32 	%r507, %r504, %r505;
	shr.u32 	%r508, %r507, 12;
	sub.u32 	%r509, %r506, %r507;
	xor.b32 	%r510, %r508, %r509;
	shl.b32 	%r511, %r510, 16;
	sub.u32 	%r512, %r503, %r507;
	sub.u32 	%r513, %r512, %r510;
	xor.b32 	%r514, %r511, %r513;
	.loc	20	144	0
	sub.u32 	%r515, %r507, %r510;
	sub.u32 	%r516, %r515, %r514;
	shr.u32 	%r517, %r514, 5;
	xor.b32 	%r518, %r516, %r517;
	.loc	20	145	0
	sub.u32 	%r519, %r510, %r514;
	sub.u32 	%r520, %r519, %r518;
	shr.u32 	%r521, %r518, 3;
	xor.b32 	%r287, %r520, %r521;
	.loc	20	146	0
	sub.u32 	%r522, %r514, %r518;
	sub.u32 	%r523, %r522, %r287;
	shl.b32 	%r524, %r287, 10;
	xor.b32 	%r291, %r523, %r524;
	.loc	20	147	0
	sub.u32 	%r525, %r518, %r287;
	sub.u32 	%r526, %r525, %r291;
	shr.u32 	%r527, %r291, 15;
	xor.b32 	%r295, %r526, %r527;
	.loc	34	54	0
	mul.lo.u32 	%r296, %r295, 1103515245;
	mul.lo.u32 	%r297, %r295, -1029531031;
	add.u32 	%r298, %r296, 12345;
	sub.u32 	%r299, %r297, 740551042;
	shr.u32 	%r300, %r298, 16;
	shr.u32 	%r301, %r299, 16;
	and.b32 	%r302, %r300, 255;
	and.b32 	%r303, %r301, 255;
	shl.b32 	%r304, %r302, 7;
	xor.b32 	%r305, %r304, %r303;
	cvt.rn.f32.s32 	%f367, %r305;
	mov.f32 	%f789, 0f46fffe00;   	// 32767
	div.approx.ftz.f32 	%f369, %f367, %f789;
	mul.ftz.f32 	%f790, %f456, %f369;
	sub.ftz.f32 	%f791, %f790, %f773;
	mov.f32 	%f366, %f791;
	mov.f32 	%f361, %f791;
	mov.f32 	%f356, %f791;
$Lt_50_229634:
	.loc	34	57	0
	add.ftz.f32 	%f7, %f356, %f7;
	.loc	34	58	0
	add.ftz.f32 	%f6, %f361, %f6;
	.loc	34	59	0
	add.ftz.f32 	%f5, %f5, %f366;
	ld.param.f32 	%f792, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+12];
	mov.f32 	%f793, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p115, %f792, %f793;
	@!%p115 bra 	$Lt_50_10754;
	.loc	34	63	0
	mov.f32 	%f794, 0f00000000;   	// 0
	max.ftz.f32 	%f795, %f5, %f794;
	mov.f32 	%f796, 0f3f800000;   	// 1
	min.ftz.f32 	%f5, %f795, %f796;
	mov.f32 	%f797, 0f00000000;   	// 0
	max.ftz.f32 	%f798, %f6, %f797;
	mov.f32 	%f799, 0f3f800000;   	// 1
	min.ftz.f32 	%f6, %f798, %f799;
	mov.f32 	%f800, 0f00000000;   	// 0
	max.ftz.f32 	%f801, %f7, %f800;
	mov.f32 	%f802, 0f3f800000;   	// 1
	min.ftz.f32 	%f7, %f801, %f802;
	mov.f32 	%f803, 0f00000000;   	// 0
	max.ftz.f32 	%f804, %f9, %f803;
	mov.f32 	%f805, 0f3f800000;   	// 1
	min.ftz.f32 	%f9, %f804, %f805;
	bra.uni 	$Lt_50_10754;
$Lt_50_14338:
	.loc	38	99	0
	cvt.rn.f32.s32 	%f806, %r6;
	cvt.rn.f32.s32 	%f807, %r14;
	ld.param.f32 	%f808, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+20];
	mul.ftz.f32 	%f809, %f807, %f808;
	setp.lt.ftz.f32 	%p116, %f806, %f809;
	@!%p116 bra 	$Lt_50_10754;
	.loc	22	267	0
	ld.const.f32 	%f810, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f810, %f6;
	ld.const.f32 	%f811, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f812, %f811, %f6;
	ld.const.f32 	%f813, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f814, %f813, %f6;
	ld.const.f32 	%f815, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f815, %f7, %f467;
	ld.const.f32 	%f816, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f817, %f816, %f7, %f812;
	ld.const.f32 	%f818, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f819, %f818, %f7, %f814;
	ld.const.f32 	%f820, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f820, %f5, %f469;
	ld.const.f32 	%f821, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f822, %f821, %f5, %f817;
	ld.const.f32 	%f823, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f824, %f823, %f5, %f819;
	ld.param.f32 	%f825, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+4];
	ld.param.f32 	%f826, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+8];
	fma.rn.ftz.f32 	%f827, %f471, %f826, %f825;
	ld.param.f32 	%f828, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+16];
	mul.ftz.f32 	%f829, %f822, %f828;
	ld.param.f32 	%f830, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+12];
	mul.ftz.f32 	%f831, %f822, %f830;
	mul.ftz.f32 	%f832, %f824, %f830;
	sub.ftz.f32 	%f833, %f832, %f829;
	fma.rn.ftz.f32 	%f834, %f824, %f828, %f831;
	ld.const.f32 	%f835, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f836, %f835, %f833;
	ld.const.f32 	%f837, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f838, %f837, %f827, %f836;
	ld.const.f32 	%f839, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f420, %f839, %f834, %f838;
	.loc	22	268	0
	ld.const.f32 	%f840, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f841, %f840, %f833;
	ld.const.f32 	%f842, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f843, %f842, %f827, %f841;
	ld.const.f32 	%f844, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f426, %f844, %f834, %f843;
	.loc	35	56	0
	ld.const.f32 	%f845, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f846, %f845, %f833;
	ld.const.f32 	%f847, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f848, %f847, %f827, %f846;
	ld.const.f32 	%f849, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f5, %f849, %f834, %f848;
	mov.f32 	%f6, %f420;
	mov.f32 	%f7, %f426;
	bra.uni 	$Lt_50_10754;
$Lt_50_14594:
	.loc	36	46	0
	ld.const.f32 	%f850, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f467, %f850, %f6;
	ld.const.f32 	%f851, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f469, %f851, %f7, %f467;
	ld.const.f32 	%f852, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f471, %f852, %f5, %f469;
	ld.param.f32 	%f481, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+8];
	ld.param.f32 	%f853, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+20];
	sub.ftz.f32 	%f854, %f853, %f481;
	fma.rn.ftz.f32 	%f437, %f471, %f854, %f481;
	.loc	36	47	0
	ld.param.f32 	%f855, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+12];
	ld.param.f32 	%f856, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+24];
	sub.ftz.f32 	%f857, %f856, %f855;
	fma.rn.ftz.f32 	%f441, %f471, %f857, %f855;
	.loc	36	49	0
	ld.param.f32 	%f858, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+28];
	ld.param.f32 	%f456, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+4];
	ld.param.f32 	%f859, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter1+16];
	sub.ftz.f32 	%f860, %f859, %f456;
	fma.rn.ftz.f32 	%f861, %f471, %f860, %f456;
	sub.ftz.f32 	%f862, %f861, %f5;
	fma.rn.ftz.f32 	%f5, %f858, %f862, %f5;
	.loc	36	50	0
	sub.ftz.f32 	%f863, %f437, %f6;
	fma.rn.ftz.f32 	%f6, %f858, %f863, %f6;
	.loc	36	51	0
	sub.ftz.f32 	%f864, %f441, %f7;
	fma.rn.ftz.f32 	%f7, %f858, %f864, %f7;
	.loc	38	103	0
	bra.uni 	$Lt_50_10754;
$Lt_50_14850:
	.loc	37	41	0
	sub.s32 	%r528, %r13, %r12;
	sub.s32 	%r12, %r528, 1;
$Lt_50_10754:
	.loc	38	54	0
	ld.param.u32 	%r529, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+0];
	mov.u32 	%r530, 0;
	setp.eq.s32 	%p117, %r529, %r530;
	@%p117 bra 	$Lt_50_5378;
	mov.u32 	%r531, 1;
	setp.eq.s32 	%p118, %r529, %r531;
	@%p118 bra 	$Lt_50_5890;
	mov.u32 	%r532, 2;
	setp.eq.s32 	%p119, %r529, %r532;
	@%p119 bra 	$Lt_50_6146;
	mov.u32 	%r533, 3;
	setp.eq.s32 	%p120, %r529, %r533;
	@%p120 bra 	$Lt_50_6402;
	mov.u32 	%r534, 4;
	setp.eq.s32 	%p121, %r529, %r534;
	@%p121 bra 	$Lt_50_6658;
	mov.u32 	%r535, 5;
	setp.eq.s32 	%p122, %r529, %r535;
	@%p122 bra 	$Lt_50_6914;
	mov.u32 	%r536, 6;
	setp.eq.s32 	%p123, %r529, %r536;
	@%p123 bra 	$Lt_50_7170;
	mov.u32 	%r537, 7;
	setp.eq.s32 	%p124, %r529, %r537;
	@%p124 bra 	$Lt_50_7426;
	mov.u32 	%r538, 8;
	setp.eq.s32 	%p125, %r529, %r538;
	@%p125 bra 	$Lt_50_7682;
	mov.u32 	%r539, 9;
	setp.eq.s32 	%p126, %r529, %r539;
	@%p126 bra 	$Lt_50_7938;
	mov.u32 	%r540, 10;
	setp.eq.s32 	%p127, %r529, %r540;
	@%p127 bra 	$Lt_50_8194;
	mov.u32 	%r541, 11;
	setp.eq.s32 	%p128, %r529, %r541;
	@%p128 bra 	$Lt_50_8450;
	mov.u32 	%r542, 12;
	setp.eq.s32 	%p129, %r529, %r542;
	@%p129 bra 	$Lt_50_8706;
	mov.u32 	%r543, 13;
	setp.eq.s32 	%p130, %r529, %r543;
	@%p130 bra 	$Lt_50_8962;
	mov.u32 	%r544, 14;
	setp.eq.s32 	%p131, %r529, %r544;
	@%p131 bra 	$Lt_50_9218;
	mov.u32 	%r545, 15;
	setp.eq.s32 	%p132, %r529, %r545;
	@%p132 bra 	$Lt_50_9474;
	mov.u32 	%r546, 16;
	setp.eq.s32 	%p133, %r529, %r546;
	@%p133 bra 	$Lt_50_9730;
	bra.uni 	$Lt_50_5634;
$Lt_50_5378:
	.loc	21	42	0
	cvt.ftz.sat.f32.f32 	%f9, %f9;
	ld.param.f32 	%f865, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+8];
	mov.f32 	%f866, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p134, %f865, %f866;
	ld.param.f32 	%f867, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+12];
	mov.f32 	%f868, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p135, %f867, %f868;
	ld.param.f32 	%f869, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+4];
	ld.param.f32 	%f870, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+16];
	mov.f32 	%f871, 0f3f800000;   	// 1
	setp.eq.ftz.f32 	%p136, %f870, %f871;
	@!%p136 bra 	$Lt_50_231426;
	.loc	21	45	0
	mov.f32 	%f872, %f869;
	mul.ftz.f32 	%f873, %f872, %f9;
	selp.f32 	%f874, %f872, %f873, %p134;
	.loc	21	53	0
	mov.f32 	%f875, 0f3f800000;   	// 1
	sub.ftz.f32 	%f876, %f875, %f874;
	selp.f32 	%f9, %f876, %f874, %p135;
	.loc	21	57	0
	mov.f32 	%f5, %f9;
	mov.f32 	%f6, %f9;
	mov.f32 	%f7, %f9;
	bra.uni 	$Lt_50_5634;
$Lt_50_231426:
	@!%p134 bra 	$Lt_50_231938;
	.loc	21	61	0
	mov.f32 	%f872, %f869;
	mov.f32 	%f877, 0f3f800000;   	// 1
	sub.ftz.f32 	%f878, %f877, %f872;
	selp.f32 	%f9, %f878, %f872, %p135;
	bra.uni 	$Lt_50_5634;
$Lt_50_231938:
	.loc	21	69	0
	mov.f32 	%f879, 0f3f800000;   	// 1
	sub.ftz.f32 	%f880, %f879, %f9;
	selp.f32 	%f881, %f880, %f9, %p135;
	.loc	21	73	0
	mul.ftz.f32 	%f9, %f881, %f869;
	bra.uni 	$Lt_50_5634;
$Lt_50_5890:
	.loc	22	267	0
	ld.const.f32 	%f882, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f883, %f882, %f6;
	ld.const.f32 	%f884, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f885, %f884, %f7, %f883;
	ld.const.f32 	%f886, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f887, %f886, %f5, %f885;
	ld.const.f32 	%f888, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f889, %f888, %f887;
	.loc	22	268	0
	ld.const.f32 	%f890, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f891, %f890, %f887;
	.loc	23	44	0
	ld.const.f32 	%f892, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f892, %f887;
	mov.f32 	%f6, %f889;
	mov.f32 	%f7, %f891;
	.loc	38	61	0
	bra.uni 	$Lt_50_5634;
$Lt_50_6146:
	.loc	38	63	0
	ld.param.f32 	%f893, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+20];
	ld.param.f32 	%f894, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+12];
	ld.param.f32 	%f895, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+4];
	mov.f32 	%f896, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p137, %f895, %f896;
	@!%p137 bra 	$Lt_50_232450;
	.loc	24	44	0
	ld.param.f32 	%f897, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+8];
	sub.ftz.f32 	%f898, %f7, %f894;
	fma.rn.ftz.f32 	%f7, %f897, %f898, %f893;
	.loc	24	45	0
	sub.ftz.f32 	%f899, %f6, %f894;
	fma.rn.ftz.f32 	%f6, %f897, %f899, %f893;
	.loc	24	46	0
	sub.ftz.f32 	%f900, %f5, %f894;
	fma.rn.ftz.f32 	%f5, %f897, %f900, %f893;
	bra.uni 	$Lt_50_5634;
$Lt_50_232450:
	.loc	24	50	0
	ld.param.f32 	%f901, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+24];
	setp.gt.ftz.f32 	%p138, %f7, %f894;
	selp.f32 	%f7, %f901, %f893, %p138;
	.loc	24	51	0
	setp.gt.ftz.f32 	%p139, %f6, %f894;
	selp.f32 	%f6, %f901, %f893, %p139;
	.loc	24	52	0
	setp.gt.ftz.f32 	%p140, %f5, %f894;
	selp.f32 	%f5, %f901, %f893, %p140;
	bra.uni 	$Lt_50_5634;
$Lt_50_6402:
	.loc	25	47	0
	ld.param.f32 	%f902, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+4];
	mul.ftz.f32 	%f5, %f902, %f5;
	ld.param.f32 	%f903, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+8];
	mul.ftz.f32 	%f6, %f903, %f6;
	ld.param.f32 	%f904, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+12];
	mul.ftz.f32 	%f7, %f904, %f7;
	.loc	38	67	0
	bra.uni 	$Lt_50_5634;
$Lt_50_6658:
	.loc	26	48	0
	ld.param.f32 	%f905, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+4];
	mov.f32 	%f906, 0f00000000;   	// 0
	setp.eq.ftz.f32 	%p141, %f905, %f906;
	ld.param.f32 	%f907, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+8];
	mov.f32 	%f908, 0f00000000;   	// 0
	max.ftz.f32 	%f909, %f5, %f908;
	mov.f32 	%f910, 0f3f800000;   	// 1
	min.ftz.f32 	%f911, %f909, %f910;
	ld.param.f32 	%f912, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+12];
	sub.ftz.f32 	%f913, %f911, %f912;
	abs.ftz.f32 	%f914, %f913;
	mov.f32 	%f915, 0f00000000;   	// 0
	max.ftz.f32 	%f916, %f6, %f915;
	mov.f32 	%f917, 0f3f800000;   	// 1
	min.ftz.f32 	%f918, %f916, %f917;
	ld.param.f32 	%f919, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+16];
	sub.ftz.f32 	%f920, %f918, %f919;
	abs.ftz.f32 	%f921, %f920;
	mov.f32 	%f922, 0f00000000;   	// 0
	max.ftz.f32 	%f923, %f7, %f922;
	mov.f32 	%f924, 0f3f800000;   	// 1
	min.ftz.f32 	%f925, %f923, %f924;
	ld.param.f32 	%f926, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+20];
	sub.ftz.f32 	%f927, %f925, %f926;
	abs.ftz.f32 	%f928, %f927;
	mov.f32 	%f929, 0f00000000;   	// 0
	max.ftz.f32 	%f930, %f9, %f929;
	mov.f32 	%f931, 0f3f800000;   	// 1
	min.ftz.f32 	%f932, %f930, %f931;
	sub.ftz.f32 	%f933, %f932, %f9;
	abs.ftz.f32 	%f934, %f933;
	max.ftz.f32 	%f935, %f928, %f934;
	max.ftz.f32 	%f936, %f921, %f935;
	max.ftz.f32 	%f937, %f914, %f936;
	setp.ge.ftz.f32 	%p142, %f907, %f937;
	xor.pred 	%p143, %p141, %p142;
	@!%p143 bra 	$Lt_50_5634;
	.loc	22	267	0
	ld.const.f32 	%f938, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f883, %f938, %f6;
	ld.const.f32 	%f939, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f885, %f939, %f7, %f883;
	ld.const.f32 	%f940, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f887, %f940, %f5, %f885;
	ld.const.f32 	%f941, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f942, %f941, %f887;
	.loc	22	268	0
	ld.const.f32 	%f943, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f944, %f943, %f887;
	.loc	23	44	0
	ld.const.f32 	%f945, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f945, %f887;
	mov.f32 	%f6, %f942;
	mov.f32 	%f7, %f944;
	bra.uni 	$Lt_50_5634;
$Lt_50_6914:
	.loc	27	48	0
	ld.param.f32 	%f946, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+8];
	mov.f32 	%f947, 0f00000000;   	// 0
	max.ftz.f32 	%f948, %f5, %f947;
	mov.f32 	%f949, 0f3f800000;   	// 1
	min.ftz.f32 	%f950, %f948, %f949;
	ld.param.f32 	%f951, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+12];
	sub.ftz.f32 	%f952, %f950, %f951;
	abs.ftz.f32 	%f953, %f952;
	mov.f32 	%f954, 0f00000000;   	// 0
	max.ftz.f32 	%f955, %f6, %f954;
	mov.f32 	%f956, 0f3f800000;   	// 1
	min.ftz.f32 	%f957, %f955, %f956;
	ld.param.f32 	%f958, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+16];
	sub.ftz.f32 	%f959, %f957, %f958;
	abs.ftz.f32 	%f960, %f959;
	mov.f32 	%f961, 0f00000000;   	// 0
	max.ftz.f32 	%f962, %f7, %f961;
	mov.f32 	%f963, 0f3f800000;   	// 1
	min.ftz.f32 	%f964, %f962, %f963;
	ld.param.f32 	%f965, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+20];
	sub.ftz.f32 	%f966, %f964, %f965;
	abs.ftz.f32 	%f967, %f966;
	mov.f32 	%f968, 0f00000000;   	// 0
	max.ftz.f32 	%f969, %f9, %f968;
	mov.f32 	%f970, 0f3f800000;   	// 1
	min.ftz.f32 	%f971, %f969, %f970;
	sub.ftz.f32 	%f972, %f971, %f9;
	abs.ftz.f32 	%f973, %f972;
	max.ftz.f32 	%f974, %f967, %f973;
	max.ftz.f32 	%f975, %f960, %f974;
	max.ftz.f32 	%f976, %f953, %f975;
	setp.ge.ftz.f32 	%p144, %f946, %f976;
	@!%p144 bra 	$Lt_50_5634;
	.loc	27	51	0
	ld.param.f32 	%f977, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+24];
	mov.f32 	%f126, %f977;
	ld.param.f32 	%f978, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+28];
	mov.f32 	%f128, %f978;
	ld.param.f32 	%f979, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+32];
	mov.f32 	%f130, %f979;
	ld.param.f32 	%f980, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+4];
	mov.f32 	%f981, 0f00000000;   	// 0
	setp.eq.ftz.f32 	%p145, %f980, %f981;
	@!%p145 bra 	$Lt_50_233730;
	.loc	27	60	0
	ld.const.f32 	%f982, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f883, %f982, %f6;
	ld.const.f32 	%f983, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f885, %f983, %f7, %f883;
	ld.const.f32 	%f984, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f887, %f984, %f5, %f885;
	mul.ftz.f32 	%f126, %f887, %f977;
	.loc	27	61	0
	mul.ftz.f32 	%f128, %f887, %f978;
	.loc	27	62	0
	mul.ftz.f32 	%f130, %f887, %f979;
$Lt_50_233730:
	.loc	27	65	0
	mov.f32 	%f5, %f126;
	mov.f32 	%f6, %f128;
	mov.f32 	%f7, %f130;
	bra.uni 	$Lt_50_5634;
$Lt_50_7170:
	.loc	28	47	0
	sub.s32 	%r547, %r13, %r12;
	sub.s32 	%r548, %r14, %r6;
	cvt.rn.f32.s32 	%f985, %r6;
	cvt.rn.f32.s32 	%f986, %r12;
	cvt.rn.f32.s32 	%f138, %r547;
	cvt.rn.f32.s32 	%f139, %r548;
	ld.param.f32 	%f872, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+4];
	sub.ftz.f32 	%f987, %f872, %f985;
	sub.ftz.f32 	%f988, %f872, %f986;
	sub.ftz.f32 	%f989, %f872, %f138;
	sub.ftz.f32 	%f990, %f872, %f139;
	cvt.rzi.ftz.s32.f32 	%r549, %f987;
	cvt.rzi.ftz.s32.f32 	%r550, %f988;
	cvt.rzi.ftz.s32.f32 	%r551, %f989;
	cvt.rzi.ftz.s32.f32 	%r552, %f990;
	max.s32 	%r553, %r549, %r550;
	max.s32 	%r554, %r552, %r553;
	max.s32 	%r555, %r551, %r554;
	mov.u32 	%r556, 0;
	setp.le.s32 	%p146, %r555, %r556;
	@%p146 bra 	$Lt_50_5634;
	.loc	28	51	0
	cvt.rn.f32.s32 	%f991, %r555;
	sub.ftz.f32 	%f992, %f872, %f991;
	div.approx.ftz.f32 	%f993, %f992, %f872;
	mul.ftz.f32 	%f9, %f9, %f993;
	bra.uni 	$Lt_50_5634;
$Lt_50_7426:
	.loc	30	50	0
	ld.const.f32 	%f994, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f883, %f994, %f6;
	ld.param.f32 	%f995, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+12];
	ld.param.f32 	%f897, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+8];
	sub.ftz.f32 	%f996, %f995, %f897;
	ld.const.f32 	%f997, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f885, %f997, %f7, %f883;
	ld.param.f32 	%f998, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+16];
	mul.ftz.f32 	%f999, %f998, %f996;
	ld.const.f32 	%f1000, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f887, %f1000, %f5, %f885;
	add.ftz.f32 	%f1001, %f999, %f995;
	sub.ftz.f32 	%f1002, %f897, %f999;
	mov.f32 	%f1003, 0f00000000;  	// 0
	max.ftz.f32 	%f1004, %f1001, %f1003;
	mov.f32 	%f1005, 0f00000000;  	// 0
	max.ftz.f32 	%f1006, %f1002, %f1005;
	mov.f32 	%f1007, 0f3f800000;  	// 1
	min.ftz.f32 	%f1008, %f1004, %f1007;
	mov.f32 	%f1009, 0f3f800000;  	// 1
	min.ftz.f32 	%f1010, %f1006, %f1009;
	set.gt.ftz.u32.f32 	%r557, %f1010, %f887;
	neg.s32 	%r558, %r557;
	set.le.ftz.u32.f32 	%r559, %f1008, %f887;
	neg.s32 	%r560, %r559;
	or.b32 	%r561, %r558, %r560;
	mov.u32 	%r562, 0;
	setp.eq.s32 	%p147, %r561, %r562;
	@%p147 bra 	$Lt_50_235010;
	mov.f32 	%f164, 0f00000000;   	// 0
	bra.uni 	$Lt_50_235778;
$Lt_50_235010:
	add.ftz.f32 	%f1011, %f999, %f897;
	mov.f32 	%f1012, 0f00000000;  	// 0
	max.ftz.f32 	%f1013, %f1011, %f1012;
	mov.f32 	%f1014, 0f3f800000;  	// 1
	min.ftz.f32 	%f1015, %f1013, %f1014;
	set.le.ftz.u32.f32 	%r563, %f1015, %f887;
	neg.s32 	%r564, %r563;
	sub.ftz.f32 	%f1016, %f995, %f999;
	mov.f32 	%f1017, 0f00000000;  	// 0
	max.ftz.f32 	%f1018, %f1016, %f1017;
	mov.f32 	%f1019, 0f3f800000;  	// 1
	min.ftz.f32 	%f1020, %f1018, %f1019;
	set.lt.ftz.u32.f32 	%r565, %f887, %f1020;
	neg.s32 	%r566, %r565;
	and.b32 	%r567, %r564, %r566;
	mov.u32 	%r568, 0;
	setp.eq.s32 	%p148, %r567, %r568;
	@%p148 bra 	$Lt_50_235522;
	mov.f32 	%f164, 0f3f800000;   	// 1
	bra.uni 	$Lt_50_235778;
$Lt_50_235522:
	add.ftz.f32 	%f1021, %f999, %f999;
	setp.gt.ftz.f32 	%p149, %f1015, %f887;
	@!%p149 bra 	$Lt_50_236034;
	.loc	30	62	0
	sub.ftz.f32 	%f1022, %f887, %f1010;
	div.approx.ftz.f32 	%f164, %f1022, %f1021;
	bra.uni 	$Lt_50_235778;
$Lt_50_236034:
	.loc	30	66	0
	sub.ftz.f32 	%f1023, %f1008, %f887;
	div.approx.ftz.f32 	%f164, %f1023, %f1021;
$Lt_50_235778:
$Lt_50_235266:
$Lt_50_234754:
	.loc	30	69	0
	mov.f32 	%f1024, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1025, %f1024, %f164;
	ld.param.f32 	%f1026, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+4];
	mov.f32 	%f1027, 0f00000000;  	// 0
	setp.neu.ftz.f32 	%p150, %f1026, %f1027;
	selp.f32 	%f164, %f1025, %f164, %p150;
	.loc	30	77	0
	ld.const.f32 	%f1028, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f1028, %f164;
	ld.const.f32 	%f1029, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f6, %f1029, %f164;
	ld.const.f32 	%f1030, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f7, %f1030, %f164;
	.loc	38	79	0
	bra.uni 	$Lt_50_5634;
$Lt_50_7682:
	.loc	38	80	0
	ld.param.f32 	%f1031, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+44];
	mov.f32 	%f1032, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p151, %f1031, %f1032;
	@!%p151 bra 	$L_50_204546;
	cvt.rn.f32.s32 	%f1033, %r6;
	cvt.rn.f32.s32 	%f1034, %r14;
	mul.ftz.f32 	%f1035, %f1034, %f1031;
	setp.lt.ftz.f32 	%p152, %f1033, %f1035;
	@%p152 bra 	$L_50_204290;
$L_50_204546:
	mov.f32 	%f1036, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p153, %f1031, %f1036;
	@!%p153 bra 	$Lt_50_5634;
	cvt.rn.f32.s32 	%f1037, %r12;
	cvt.rn.f32.s32 	%f1038, %r13;
	mul.ftz.f32 	%f1039, %f1038, %f1031;
	neg.ftz.f32 	%f1040, %f1039;
	setp.lt.ftz.f32 	%p154, %f1037, %f1040;
	@!%p154 bra 	$Lt_50_5634;
$L_50_204290:
	.loc	31	47	0
	ld.const.f32 	%f1041, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1042, %f1041, %f6;
	ld.const.f32 	%f1043, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1044, %f1043, %f7, %f1042;
	ld.const.f32 	%f1045, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f887, %f1045, %f5, %f1044;
	mov.f32 	%f200, %f887;
	ld.param.f32 	%f897, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+8];
	ld.param.f32 	%f1046, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+4];
	sub.ftz.f32 	%f1047, %f1046, %f897;
	ld.param.f32 	%f1048, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+16];
	ld.param.f32 	%f1049, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+12];
	sub.ftz.f32 	%f1050, %f1049, %f1048;
	mov.f32 	%f1051, 0f3f800000;  	// 1
	set.neu.ftz.u32.f32 	%r569, %f1050, %f1051;
	neg.s32 	%r570, %r569;
	mov.f32 	%f1052, 0f3f800000;  	// 1
	set.neu.ftz.u32.f32 	%r571, %f1047, %f1052;
	neg.s32 	%r572, %r571;
	or.b32 	%r573, %r570, %r572;
	mov.u32 	%r574, 0;
	setp.eq.s32 	%p155, %r573, %r574;
	@%p155 bra 	$Lt_50_236290;
	.loc	20	143	0
	mov.s32 	%r575, 1;
	sub.s32 	%r79, %r575, %r5;
	shr.u32 	%r80, %r11, 13;
	sub.u32 	%r81, %r5, %r11;
	sub.u32 	%r82, %r79, %r11;
	xor.b32 	%r83, %r82, %r80;
	shl.b32 	%r84, %r83, 8;
	sub.u32 	%r85, %r81, %r83;
	sub.u32 	%r86, %r11, %r83;
	xor.b32 	%r87, %r85, %r84;
	shr.u32 	%r88, %r87, 13;
	sub.u32 	%r89, %r86, %r87;
	sub.u32 	%r90, %r83, %r87;
	xor.b32 	%r91, %r89, %r88;
	shr.u32 	%r92, %r91, 12;
	sub.u32 	%r93, %r90, %r91;
	xor.b32 	%r94, %r93, %r92;
	sub.u32 	%r576, %r87, %r91;
	sub.u32 	%r96, %r576, %r94;
	shl.b32 	%r577, %r94, 16;
	xor.b32 	%r98, %r96, %r577;
	.loc	20	144	0
	sub.u32 	%r99, %r91, %r94;
	sub.u32 	%r100, %r99, %r98;
	shr.u32 	%r578, %r98, 5;
	xor.b32 	%r102, %r100, %r578;
	.loc	20	145	0
	sub.u32 	%r103, %r94, %r98;
	sub.u32 	%r104, %r103, %r102;
	shr.u32 	%r579, %r102, 3;
	xor.b32 	%r106, %r104, %r579;
	.loc	20	146	0
	sub.u32 	%r107, %r98, %r102;
	sub.u32 	%r108, %r107, %r106;
	shl.b32 	%r580, %r106, 10;
	xor.b32 	%r110, %r108, %r580;
	.loc	20	147	0
	sub.u32 	%r111, %r102, %r106;
	sub.u32 	%r112, %r111, %r110;
	shr.u32 	%r581, %r110, 15;
	xor.b32 	%r114, %r112, %r581;
	.loc	31	57	0
	mov.f32 	%f1053, 0f3b270d73;  	// 0.00254902
	mul.lo.u32 	%r582, %r114, 1103515245;
	add.u32 	%r583, %r582, 12345;
	shr.u32 	%r584, %r583, 16;
	and.b32 	%r585, %r584, 255;
	shl.b32 	%r586, %r585, 7;
	mul.lo.u32 	%r587, %r114, -1029531031;
	sub.u32 	%r588, %r587, 740551042;
	shr.u32 	%r589, %r588, 16;
	and.b32 	%r590, %r589, 255;
	xor.b32 	%r591, %r586, %r590;
	cvt.rn.f32.s32 	%f1054, %r591;
	mov.f32 	%f1055, 0f467ffe00;  	// 16383.5
	div.approx.ftz.f32 	%f1056, %f1054, %f1055;
	mov.f32 	%f1057, 0fbf800000;  	// -1
	add.ftz.f32 	%f1058, %f1056, %f1057;
	fma.rn.ftz.f32 	%f200, %f1053, %f1058, %f887;
$Lt_50_236290:
	sub.ftz.f32 	%f1059, %f200, %f897;
	ld.param.f32 	%f1060, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+20];
	mov.f32 	%f1061, 0f3f800000;  	// 1
	setp.neu.ftz.f32 	%p156, %f1060, %f1061;
	@!%p156 bra 	$Lt_50_237058;
	mov.f32 	%f1062, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p157, %f1059, %f1062;
	@!%p157 bra 	$Lt_50_237570;
	.loc	31	66	0
	mov.f32 	%f200, %f1048;
	bra.uni 	$Lt_50_236802;
$Lt_50_237570:
	.loc	31	70	0
	rcp.approx.ftz.f32 	%f1063, %f1047;
	mul.ftz.f32 	%f1064, %f1059, %f1063;
	lg2.approx.ftz.f32 	%f1065, %f1064;
	mul.ftz.f32 	%f1066, %f1060, %f1065;
	ex2.approx.ftz.f32 	%f1067, %f1066;
	fma.rn.ftz.f32 	%f200, %f1050, %f1067, %f1048;
	bra.uni 	$Lt_50_236802;
$Lt_50_237058:
	.loc	31	77	0
	rcp.approx.ftz.f32 	%f1068, %f1047;
	mul.ftz.f32 	%f1069, %f1059, %f1068;
	fma.rn.ftz.f32 	%f200, %f1050, %f1069, %f1048;
$Lt_50_236802:
	.loc	22	267	0
	ld.const.f32 	%f1070, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f1071, %f1070, %f6;
	ld.const.f32 	%f1072, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f1073, %f1072, %f6;
	ld.param.f32 	%f1074, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+28];
	sin.approx.ftz.f32 	%f1075, %f1074;
	cos.approx.ftz.f32 	%f1076, %f1074;
	ld.const.f32 	%f1077, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f1078, %f1077, %f7, %f1071;
	ld.const.f32 	%f1079, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f1080, %f1079, %f7, %f1073;
	ld.const.f32 	%f1081, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f1082, %f1081, %f5, %f1078;
	ld.const.f32 	%f1083, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f1084, %f1083, %f5, %f1080;
	mul.ftz.f32 	%f1085, %f1075, %f1082;
	mul.ftz.f32 	%f1086, %f1076, %f1082;
	mul.ftz.f32 	%f1087, %f1084, %f1076;
	sub.ftz.f32 	%f1088, %f1087, %f1085;
	fma.rn.ftz.f32 	%f1089, %f1084, %f1075, %f1086;
	ld.param.f32 	%f1090, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+32];
	sub.ftz.f32 	%f1091, %f1090, %f1088;
	ld.param.f32 	%f1092, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+36];
	sub.ftz.f32 	%f1093, %f1092, %f1089;
	ld.param.f32 	%f1094, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+40];
	fma.rn.ftz.f32 	%f1095, %f1094, %f1091, %f1088;
	fma.rn.ftz.f32 	%f1096, %f1094, %f1093, %f1089;
	ld.param.f32 	%f1097, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+24];
	mul.ftz.f32 	%f1098, %f1095, %f1097;
	mul.ftz.f32 	%f1099, %f1096, %f1097;
	ld.const.f32 	%f1100, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f1101, %f1100, %f1098;
	ld.const.f32 	%f1102, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f1103, %f1102, %f200, %f1101;
	ld.const.f32 	%f1104, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f260, %f1104, %f1099, %f1103;
	.loc	22	268	0
	ld.const.f32 	%f1105, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f1106, %f1105, %f1098;
	ld.const.f32 	%f1107, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f1108, %f1107, %f200, %f1106;
	ld.const.f32 	%f1109, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f266, %f1109, %f1099, %f1108;
	.loc	31	92	0
	ld.const.f32 	%f1110, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f1111, %f1110, %f1098;
	ld.const.f32 	%f1112, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f1113, %f1112, %f200, %f1111;
	ld.const.f32 	%f1114, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f5, %f1114, %f1099, %f1113;
	mov.f32 	%f6, %f260;
	mov.f32 	%f7, %f266;
	bra.uni 	$Lt_50_5634;
$Lt_50_7938:
	.loc	38	83	0
	ld.param.f32 	%f1115, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+44];
	mov.f32 	%f1116, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p158, %f1115, %f1116;
	@!%p158 bra 	$L_50_205314;
	cvt.rn.f32.s32 	%f1117, %r6;
	cvt.rn.f32.s32 	%f1118, %r14;
	mul.ftz.f32 	%f1119, %f1118, %f1115;
	setp.lt.ftz.f32 	%p159, %f1117, %f1119;
	@%p159 bra 	$L_50_205058;
$L_50_205314:
	mov.f32 	%f1120, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p160, %f1115, %f1120;
	@!%p160 bra 	$Lt_50_5634;
	cvt.rn.f32.s32 	%f1121, %r12;
	cvt.rn.f32.s32 	%f1122, %r13;
	mul.ftz.f32 	%f1123, %f1122, %f1115;
	neg.ftz.f32 	%f1124, %f1123;
	setp.lt.ftz.f32 	%p161, %f1121, %f1124;
	@!%p161 bra 	$Lt_50_5634;
$L_50_205058:
	.loc	31	110	0
	ld.const.f32 	%f1125, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f883, %f1125, %f6;
	ld.param.f32 	%f897, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+8];
	ld.param.f32 	%f1126, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+4];
	sub.ftz.f32 	%f1127, %f1126, %f897;
	ld.param.f32 	%f1128, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+16];
	ld.param.f32 	%f1129, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+12];
	sub.ftz.f32 	%f1130, %f1129, %f1128;
	ld.const.f32 	%f1131, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f885, %f1131, %f7, %f883;
	ld.const.f32 	%f1132, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f887, %f1132, %f5, %f885;
	sub.ftz.f32 	%f1133, %f887, %f897;
	ld.param.f32 	%f1134, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+20];
	mov.f32 	%f1135, 0f3f800000;  	// 1
	setp.neu.ftz.f32 	%p162, %f1134, %f1135;
	@!%p162 bra 	$Lt_50_238082;
	.loc	42	523	0
	mov.f32 	%f1136, 0f00000000;  	// 0
	max.ftz.f32 	%f1137, %f1133, %f1136;
	div.approx.ftz.f32 	%f1138, %f1137, %f1127;
	lg2.approx.ftz.f32 	%f1139, %f1138;
	mul.ftz.f32 	%f1140, %f1134, %f1139;
	ex2.approx.ftz.f32 	%f298, %f1140;
	.loc	31	120	0
	fma.rn.ftz.f32 	%f299, %f1130, %f298, %f1128;
	bra.uni 	$Lt_50_237826;
$Lt_50_238082:
	.loc	31	129	0
	div.approx.ftz.f32 	%f1141, %f1133, %f1127;
	fma.rn.ftz.f32 	%f299, %f1130, %f1141, %f1128;
$Lt_50_237826:
	.loc	31	135	0
	ld.const.f32 	%f1142, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f1142, %f299;
	ld.const.f32 	%f1143, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f6, %f1143, %f299;
	ld.const.f32 	%f1144, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f7, %f1144, %f299;
	bra.uni 	$Lt_50_5634;
$Lt_50_8194:
	.loc	38	86	0
	ld.param.f32 	%f1145, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+44];
	mov.f32 	%f1146, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p163, %f1145, %f1146;
	@!%p163 bra 	$L_50_206082;
	cvt.rn.f32.s32 	%f1147, %r6;
	cvt.rn.f32.s32 	%f1148, %r14;
	mul.ftz.f32 	%f1149, %f1148, %f1145;
	setp.lt.ftz.f32 	%p164, %f1147, %f1149;
	@%p164 bra 	$L_50_205826;
$L_50_206082:
	mov.f32 	%f1150, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p165, %f1145, %f1150;
	@!%p165 bra 	$Lt_50_5634;
	cvt.rn.f32.s32 	%f1151, %r12;
	cvt.rn.f32.s32 	%f1152, %r13;
	mul.ftz.f32 	%f1153, %f1152, %f1145;
	neg.ftz.f32 	%f1154, %f1153;
	setp.lt.ftz.f32 	%p166, %f1151, %f1154;
	@!%p166 bra 	$Lt_50_5634;
$L_50_205826:
	.loc	31	160	0
	ld.const.f32 	%f5, [k601YPbPr_To_RGB32f+24];
	ld.const.f32 	%f6, [k601YPbPr_To_RGB32f+12];
	ld.const.f32 	%f7, [k601YPbPr_To_RGB32f+0];
	bra.uni 	$Lt_50_5634;
$Lt_50_8450:
	.loc	32	42	0
	ld.param.f32 	%f1155, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+4];
	mov.f32 	%f1156, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p167, %f5, %f1156;
	@!%p167 bra 	$Lt_50_238594;
	.loc	22	292	0
	mov.f32 	%f872, %f1155;
	lg2.approx.ftz.f32 	%f1157, %f5;
	mul.ftz.f32 	%f1158, %f872, %f1157;
	ex2.approx.ftz.f32 	%f1159, %f1158;
	bra.uni 	$Lt_50_238338;
$Lt_50_238594:
	mov.f32 	%f872, %f1155;
	neg.ftz.f32 	%f1160, %f5;
	lg2.approx.ftz.f32 	%f1161, %f1160;
	mul.ftz.f32 	%f1162, %f872, %f1161;
	ex2.approx.ftz.f32 	%f1163, %f1162;
	neg.ftz.f32 	%f1159, %f1163;
$Lt_50_238338:
	mov.f32 	%f1164, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p168, %f6, %f1164;
	@!%p168 bra 	$Lt_50_239106;
	.loc	22	293	0
	lg2.approx.ftz.f32 	%f1165, %f6;
	mul.ftz.f32 	%f1166, %f872, %f1165;
	ex2.approx.ftz.f32 	%f1167, %f1166;
	bra.uni 	$Lt_50_238850;
$Lt_50_239106:
	neg.ftz.f32 	%f1168, %f6;
	lg2.approx.ftz.f32 	%f1169, %f1168;
	mul.ftz.f32 	%f1170, %f872, %f1169;
	ex2.approx.ftz.f32 	%f1171, %f1170;
	neg.ftz.f32 	%f1167, %f1171;
$Lt_50_238850:
	mov.f32 	%f1172, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p169, %f7, %f1172;
	@!%p169 bra 	$Lt_50_239618;
	.loc	22	294	0
	lg2.approx.ftz.f32 	%f1173, %f7;
	mul.ftz.f32 	%f1174, %f872, %f1173;
	ex2.approx.ftz.f32 	%f1175, %f1174;
	bra.uni 	$Lt_50_239362;
$Lt_50_239618:
	neg.ftz.f32 	%f1176, %f7;
	lg2.approx.ftz.f32 	%f1177, %f1176;
	mul.ftz.f32 	%f1178, %f872, %f1177;
	ex2.approx.ftz.f32 	%f1179, %f1178;
	neg.ftz.f32 	%f1175, %f1179;
$Lt_50_239362:
	mov.f32 	%f1180, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p170, %f9, %f1180;
	@!%p170 bra 	$Lt_50_240130;
	.loc	22	295	0
	lg2.approx.ftz.f32 	%f1181, %f9;
	mul.ftz.f32 	%f1182, %f872, %f1181;
	ex2.approx.ftz.f32 	%f1183, %f1182;
	bra.uni 	$Lt_50_239874;
$Lt_50_240130:
	neg.ftz.f32 	%f1184, %f9;
	lg2.approx.ftz.f32 	%f1185, %f1184;
	mul.ftz.f32 	%f1186, %f872, %f1185;
	ex2.approx.ftz.f32 	%f1187, %f1186;
	neg.ftz.f32 	%f1183, %f1187;
$Lt_50_239874:
	.loc	32	42	0
	mov.f32 	%f5, %f1159;
	mov.f32 	%f6, %f1167;
	mov.f32 	%f7, %f1175;
	mov.f32 	%f9, %f1183;
	.loc	38	91	0
	bra.uni 	$Lt_50_5634;
$Lt_50_8706:
	.loc	33	41	0
	sub.s32 	%r592, %r14, %r6;
	sub.s32 	%r6, %r592, 1;
	.loc	38	94	0
	bra.uni 	$Lt_50_5634;
$Lt_50_8962:
	.loc	38	96	0
	ld.param.f32 	%f872, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+4];
	mov.f32 	%f1188, 0f3f000000;  	// 0.5
	mul.ftz.f32 	%f1189, %f872, %f1188;
	ld.param.f32 	%f1190, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+16];
	ld.param.f32 	%f1191, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+8];
	mov.f32 	%f1192, 0f3f800000;  	// 1
	setp.eq.ftz.f32 	%p171, %f1191, %f1192;
	@!%p171 bra 	$Lt_50_240642;
	.loc	20	143	0
	mul.lo.s32 	%r126, %r6, 3;
	cvt.rzi.ftz.u32.f32 	%r593, %f1190;
	sub.u32 	%r128, %r126, %r12;
	shr.u32 	%r594, %r593, 13;
	sub.u32 	%r595, %r12, %r593;
	sub.u32 	%r596, %r128, %r593;
	xor.b32 	%r597, %r596, %r594;
	shl.b32 	%r598, %r597, 8;
	sub.u32 	%r599, %r595, %r597;
	sub.u32 	%r600, %r593, %r597;
	xor.b32 	%r601, %r598, %r599;
	shr.u32 	%r602, %r601, 13;
	sub.u32 	%r603, %r600, %r601;
	sub.u32 	%r604, %r597, %r601;
	xor.b32 	%r605, %r602, %r603;
	shr.u32 	%r606, %r605, 12;
	sub.u32 	%r607, %r604, %r605;
	xor.b32 	%r608, %r606, %r607;
	shl.b32 	%r609, %r608, 16;
	sub.u32 	%r610, %r601, %r605;
	sub.u32 	%r611, %r610, %r608;
	xor.b32 	%r612, %r609, %r611;
	.loc	20	144	0
	sub.u32 	%r613, %r605, %r608;
	sub.u32 	%r614, %r613, %r612;
	shr.u32 	%r615, %r612, 5;
	xor.b32 	%r616, %r614, %r615;
	.loc	20	145	0
	sub.u32 	%r617, %r608, %r612;
	sub.u32 	%r618, %r617, %r616;
	shr.u32 	%r619, %r616, 3;
	xor.b32 	%r155, %r618, %r619;
	.loc	20	146	0
	sub.u32 	%r620, %r612, %r616;
	sub.u32 	%r621, %r620, %r155;
	shl.b32 	%r622, %r155, 10;
	xor.b32 	%r159, %r621, %r622;
	.loc	20	147	0
	sub.u32 	%r623, %r616, %r155;
	sub.u32 	%r624, %r623, %r159;
	shr.u32 	%r625, %r159, 15;
	xor.b32 	%r163, %r624, %r625;
	.loc	34	48	0
	mul.lo.u32 	%r626, %r163, 1103515245;
	add.u32 	%r627, %r626, 12345;
	shr.u32 	%r628, %r627, 16;
	and.b32 	%r629, %r628, 255;
	shl.b32 	%r630, %r629, 7;
	mul.lo.u32 	%r631, %r163, -1029531031;
	sub.u32 	%r632, %r631, 740551042;
	shr.u32 	%r633, %r632, 16;
	and.b32 	%r634, %r633, 255;
	xor.b32 	%r635, %r630, %r634;
	cvt.rn.f32.s32 	%f1193, %r635;
	mov.f32 	%f1194, 0f46fffe00;  	// 32767
	div.approx.ftz.f32 	%f1195, %f1193, %f1194;
	mul.ftz.f32 	%f1196, %f872, %f1195;
	sub.ftz.f32 	%f356, %f1196, %f1189;
	.loc	20	143	0
	add.u32 	%r636, %r596, 1;
	xor.b32 	%r637, %r636, %r594;
	shl.b32 	%r638, %r637, 8;
	sub.u32 	%r639, %r595, %r637;
	sub.u32 	%r640, %r593, %r637;
	xor.b32 	%r641, %r638, %r639;
	shr.u32 	%r642, %r641, 13;
	sub.u32 	%r643, %r640, %r641;
	sub.u32 	%r644, %r637, %r641;
	xor.b32 	%r645, %r642, %r643;
	shr.u32 	%r646, %r645, 12;
	sub.u32 	%r647, %r644, %r645;
	xor.b32 	%r648, %r646, %r647;
	sub.u32 	%r649, %r641, %r645;
	sub.u32 	%r650, %r649, %r648;
	shl.b32 	%r651, %r648, 16;
	xor.b32 	%r652, %r650, %r651;
	.loc	20	144	0
	sub.u32 	%r653, %r645, %r648;
	sub.u32 	%r654, %r653, %r652;
	shr.u32 	%r655, %r652, 5;
	xor.b32 	%r656, %r654, %r655;
	.loc	20	145	0
	sub.u32 	%r657, %r648, %r652;
	sub.u32 	%r658, %r657, %r656;
	shr.u32 	%r659, %r656, 3;
	xor.b32 	%r198, %r658, %r659;
	.loc	20	146	0
	sub.u32 	%r660, %r652, %r656;
	sub.u32 	%r661, %r660, %r198;
	shl.b32 	%r662, %r198, 10;
	xor.b32 	%r202, %r661, %r662;
	.loc	20	147	0
	sub.u32 	%r663, %r656, %r198;
	sub.u32 	%r664, %r663, %r202;
	shr.u32 	%r665, %r202, 15;
	xor.b32 	%r206, %r664, %r665;
	.loc	34	49	0
	mul.lo.u32 	%r666, %r206, 1103515245;
	add.u32 	%r667, %r666, 12345;
	shr.u32 	%r668, %r667, 16;
	and.b32 	%r669, %r668, 255;
	shl.b32 	%r670, %r669, 7;
	mul.lo.u32 	%r671, %r206, -1029531031;
	sub.u32 	%r672, %r671, 740551042;
	shr.u32 	%r673, %r672, 16;
	and.b32 	%r674, %r673, 255;
	xor.b32 	%r675, %r670, %r674;
	cvt.rn.f32.s32 	%f1197, %r675;
	mov.f32 	%f1198, 0f46fffe00;  	// 32767
	div.approx.ftz.f32 	%f1199, %f1197, %f1198;
	mul.ftz.f32 	%f1200, %f872, %f1199;
	sub.ftz.f32 	%f361, %f1200, %f1189;
	.loc	20	143	0
	add.u32 	%r676, %r596, 2;
	xor.b32 	%r677, %r676, %r594;
	shl.b32 	%r678, %r677, 8;
	sub.u32 	%r679, %r595, %r677;
	sub.u32 	%r680, %r593, %r677;
	xor.b32 	%r681, %r678, %r679;
	shr.u32 	%r682, %r681, 13;
	sub.u32 	%r683, %r680, %r681;
	sub.u32 	%r684, %r677, %r681;
	xor.b32 	%r685, %r682, %r683;
	shr.u32 	%r686, %r685, 12;
	sub.u32 	%r687, %r684, %r685;
	xor.b32 	%r688, %r686, %r687;
	sub.u32 	%r689, %r681, %r685;
	sub.u32 	%r690, %r689, %r688;
	shl.b32 	%r691, %r688, 16;
	xor.b32 	%r692, %r690, %r691;
	.loc	20	144	0
	sub.u32 	%r693, %r685, %r688;
	sub.u32 	%r694, %r693, %r692;
	shr.u32 	%r695, %r692, 5;
	xor.b32 	%r696, %r694, %r695;
	.loc	20	145	0
	sub.u32 	%r697, %r688, %r692;
	sub.u32 	%r698, %r697, %r696;
	shr.u32 	%r699, %r696, 3;
	xor.b32 	%r241, %r698, %r699;
	.loc	20	146	0
	sub.u32 	%r700, %r692, %r696;
	sub.u32 	%r701, %r700, %r241;
	shl.b32 	%r702, %r241, 10;
	xor.b32 	%r245, %r701, %r702;
	.loc	20	147	0
	sub.u32 	%r703, %r696, %r241;
	sub.u32 	%r704, %r703, %r245;
	shr.u32 	%r705, %r245, 15;
	xor.b32 	%r249, %r704, %r705;
	.loc	34	50	0
	mul.lo.u32 	%r706, %r249, 1103515245;
	add.u32 	%r707, %r706, 12345;
	shr.u32 	%r708, %r707, 16;
	and.b32 	%r709, %r708, 255;
	shl.b32 	%r710, %r709, 7;
	mul.lo.u32 	%r711, %r249, -1029531031;
	sub.u32 	%r712, %r711, 740551042;
	shr.u32 	%r713, %r712, 16;
	and.b32 	%r714, %r713, 255;
	xor.b32 	%r715, %r710, %r714;
	cvt.rn.f32.s32 	%f1201, %r715;
	mov.f32 	%f1202, 0f46fffe00;  	// 32767
	div.approx.ftz.f32 	%f1203, %f1201, %f1202;
	mul.ftz.f32 	%f1204, %f872, %f1203;
	sub.ftz.f32 	%f366, %f1204, %f1189;
	bra.uni 	$Lt_50_240386;
$Lt_50_240642:
	.loc	20	143	0
	sub.u32 	%r494, %r6, %r12;
	cvt.rzi.ftz.u32.f32 	%r716, %f1190;
	shr.u32 	%r717, %r716, 13;
	sub.u32 	%r718, %r494, %r716;
	sub.u32 	%r719, %r12, %r716;
	xor.b32 	%r720, %r718, %r717;
	shl.b32 	%r721, %r720, 8;
	sub.u32 	%r722, %r719, %r720;
	sub.u32 	%r723, %r716, %r720;
	xor.b32 	%r724, %r721, %r722;
	shr.u32 	%r725, %r724, 13;
	sub.u32 	%r726, %r723, %r724;
	sub.u32 	%r727, %r720, %r724;
	xor.b32 	%r728, %r725, %r726;
	shr.u32 	%r729, %r728, 12;
	sub.u32 	%r730, %r727, %r728;
	xor.b32 	%r731, %r729, %r730;
	shl.b32 	%r732, %r731, 16;
	sub.u32 	%r733, %r724, %r728;
	sub.u32 	%r734, %r733, %r731;
	xor.b32 	%r735, %r732, %r734;
	.loc	20	144	0
	sub.u32 	%r736, %r728, %r731;
	sub.u32 	%r737, %r736, %r735;
	shr.u32 	%r738, %r735, 5;
	xor.b32 	%r739, %r737, %r738;
	.loc	20	145	0
	sub.u32 	%r740, %r731, %r735;
	sub.u32 	%r741, %r740, %r739;
	shr.u32 	%r742, %r739, 3;
	xor.b32 	%r287, %r741, %r742;
	.loc	20	146	0
	sub.u32 	%r743, %r735, %r739;
	sub.u32 	%r744, %r743, %r287;
	shl.b32 	%r745, %r287, 10;
	xor.b32 	%r291, %r744, %r745;
	.loc	20	147	0
	sub.u32 	%r746, %r739, %r287;
	sub.u32 	%r747, %r746, %r291;
	shr.u32 	%r748, %r291, 15;
	xor.b32 	%r295, %r747, %r748;
	.loc	34	54	0
	mul.lo.u32 	%r296, %r295, 1103515245;
	mul.lo.u32 	%r297, %r295, -1029531031;
	add.u32 	%r298, %r296, 12345;
	sub.u32 	%r299, %r297, 740551042;
	shr.u32 	%r300, %r298, 16;
	shr.u32 	%r301, %r299, 16;
	and.b32 	%r302, %r300, 255;
	and.b32 	%r303, %r301, 255;
	shl.b32 	%r304, %r302, 7;
	xor.b32 	%r305, %r304, %r303;
	cvt.rn.f32.s32 	%f367, %r305;
	mov.f32 	%f1205, 0f46fffe00;  	// 32767
	div.approx.ftz.f32 	%f369, %f367, %f1205;
	mul.ftz.f32 	%f1206, %f872, %f369;
	sub.ftz.f32 	%f1207, %f1206, %f1189;
	mov.f32 	%f366, %f1207;
	mov.f32 	%f361, %f1207;
	mov.f32 	%f356, %f1207;
$Lt_50_240386:
	.loc	34	57	0
	add.ftz.f32 	%f7, %f356, %f7;
	.loc	34	58	0
	add.ftz.f32 	%f6, %f361, %f6;
	.loc	34	59	0
	add.ftz.f32 	%f5, %f5, %f366;
	ld.param.f32 	%f1208, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+12];
	mov.f32 	%f1209, 0f3f800000;  	// 1
	setp.eq.ftz.f32 	%p172, %f1208, %f1209;
	@!%p172 bra 	$Lt_50_5634;
	.loc	34	63	0
	mov.f32 	%f1210, 0f00000000;  	// 0
	max.ftz.f32 	%f1211, %f5, %f1210;
	mov.f32 	%f1212, 0f3f800000;  	// 1
	min.ftz.f32 	%f5, %f1211, %f1212;
	mov.f32 	%f1213, 0f00000000;  	// 0
	max.ftz.f32 	%f1214, %f6, %f1213;
	mov.f32 	%f1215, 0f3f800000;  	// 1
	min.ftz.f32 	%f6, %f1214, %f1215;
	mov.f32 	%f1216, 0f00000000;  	// 0
	max.ftz.f32 	%f1217, %f7, %f1216;
	mov.f32 	%f1218, 0f3f800000;  	// 1
	min.ftz.f32 	%f7, %f1217, %f1218;
	mov.f32 	%f1219, 0f00000000;  	// 0
	max.ftz.f32 	%f1220, %f9, %f1219;
	mov.f32 	%f1221, 0f3f800000;  	// 1
	min.ftz.f32 	%f9, %f1220, %f1221;
	bra.uni 	$Lt_50_5634;
$Lt_50_9218:
	.loc	38	99	0
	cvt.rn.f32.s32 	%f1222, %r6;
	cvt.rn.f32.s32 	%f1223, %r14;
	ld.param.f32 	%f1224, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+20];
	mul.ftz.f32 	%f1225, %f1223, %f1224;
	setp.lt.ftz.f32 	%p173, %f1222, %f1225;
	@!%p173 bra 	$Lt_50_5634;
	.loc	22	267	0
	ld.const.f32 	%f1226, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f883, %f1226, %f6;
	ld.const.f32 	%f1227, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f1228, %f1227, %f6;
	ld.const.f32 	%f1229, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f1230, %f1229, %f6;
	ld.const.f32 	%f1231, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f885, %f1231, %f7, %f883;
	ld.const.f32 	%f1232, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f1233, %f1232, %f7, %f1228;
	ld.const.f32 	%f1234, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f1235, %f1234, %f7, %f1230;
	ld.const.f32 	%f1236, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f887, %f1236, %f5, %f885;
	ld.const.f32 	%f1237, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f1238, %f1237, %f5, %f1233;
	ld.const.f32 	%f1239, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f1240, %f1239, %f5, %f1235;
	ld.param.f32 	%f1241, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+4];
	ld.param.f32 	%f1242, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+8];
	fma.rn.ftz.f32 	%f1243, %f887, %f1242, %f1241;
	ld.param.f32 	%f1244, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+16];
	mul.ftz.f32 	%f1245, %f1238, %f1244;
	ld.param.f32 	%f1246, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+12];
	mul.ftz.f32 	%f1247, %f1238, %f1246;
	mul.ftz.f32 	%f1248, %f1240, %f1246;
	sub.ftz.f32 	%f1249, %f1248, %f1245;
	fma.rn.ftz.f32 	%f1250, %f1240, %f1244, %f1247;
	ld.const.f32 	%f1251, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f1252, %f1251, %f1249;
	ld.const.f32 	%f1253, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f1254, %f1253, %f1243, %f1252;
	ld.const.f32 	%f1255, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f420, %f1255, %f1250, %f1254;
	.loc	22	268	0
	ld.const.f32 	%f1256, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f1257, %f1256, %f1249;
	ld.const.f32 	%f1258, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f1259, %f1258, %f1243, %f1257;
	ld.const.f32 	%f1260, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f426, %f1260, %f1250, %f1259;
	.loc	35	56	0
	ld.const.f32 	%f1261, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f1262, %f1261, %f1249;
	ld.const.f32 	%f1263, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f1264, %f1263, %f1243, %f1262;
	ld.const.f32 	%f1265, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f5, %f1265, %f1250, %f1264;
	mov.f32 	%f6, %f420;
	mov.f32 	%f7, %f426;
	bra.uni 	$Lt_50_5634;
$Lt_50_9474:
	.loc	36	46	0
	ld.const.f32 	%f1266, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f883, %f1266, %f6;
	ld.const.f32 	%f1267, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f885, %f1267, %f7, %f883;
	ld.const.f32 	%f1268, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f887, %f1268, %f5, %f885;
	ld.param.f32 	%f897, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+8];
	ld.param.f32 	%f1269, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+20];
	sub.ftz.f32 	%f1270, %f1269, %f897;
	fma.rn.ftz.f32 	%f437, %f887, %f1270, %f897;
	.loc	36	47	0
	ld.param.f32 	%f1271, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+12];
	ld.param.f32 	%f1272, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+24];
	sub.ftz.f32 	%f1273, %f1272, %f1271;
	fma.rn.ftz.f32 	%f441, %f887, %f1273, %f1271;
	.loc	36	49	0
	ld.param.f32 	%f1274, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+28];
	ld.param.f32 	%f872, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+4];
	ld.param.f32 	%f1275, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter2+16];
	sub.ftz.f32 	%f1276, %f1275, %f872;
	fma.rn.ftz.f32 	%f1277, %f887, %f1276, %f872;
	sub.ftz.f32 	%f1278, %f1277, %f5;
	fma.rn.ftz.f32 	%f5, %f1274, %f1278, %f5;
	.loc	36	50	0
	sub.ftz.f32 	%f1279, %f437, %f6;
	fma.rn.ftz.f32 	%f6, %f1274, %f1279, %f6;
	.loc	36	51	0
	sub.ftz.f32 	%f1280, %f441, %f7;
	fma.rn.ftz.f32 	%f7, %f1274, %f1280, %f7;
	.loc	38	103	0
	bra.uni 	$Lt_50_5634;
$Lt_50_9730:
	.loc	37	41	0
	sub.s32 	%r749, %r13, %r12;
	sub.s32 	%r12, %r749, 1;
$Lt_50_5634:
	.loc	38	54	0
	ld.param.u32 	%r750, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+0];
	mov.u32 	%r751, 0;
	setp.eq.s32 	%p174, %r750, %r751;
	@%p174 bra 	$Lt_50_258;
	mov.u32 	%r752, 1;
	setp.eq.s32 	%p175, %r750, %r752;
	@%p175 bra 	$Lt_50_770;
	mov.u32 	%r753, 2;
	setp.eq.s32 	%p176, %r750, %r753;
	@%p176 bra 	$Lt_50_1026;
	mov.u32 	%r754, 3;
	setp.eq.s32 	%p177, %r750, %r754;
	@%p177 bra 	$Lt_50_1282;
	mov.u32 	%r755, 4;
	setp.eq.s32 	%p178, %r750, %r755;
	@%p178 bra 	$Lt_50_1538;
	mov.u32 	%r756, 5;
	setp.eq.s32 	%p179, %r750, %r756;
	@%p179 bra 	$Lt_50_1794;
	mov.u32 	%r757, 6;
	setp.eq.s32 	%p180, %r750, %r757;
	@%p180 bra 	$Lt_50_2050;
	mov.u32 	%r758, 7;
	setp.eq.s32 	%p181, %r750, %r758;
	@%p181 bra 	$Lt_50_2306;
	mov.u32 	%r759, 8;
	setp.eq.s32 	%p182, %r750, %r759;
	@%p182 bra 	$Lt_50_2562;
	mov.u32 	%r760, 9;
	setp.eq.s32 	%p183, %r750, %r760;
	@%p183 bra 	$Lt_50_2818;
	mov.u32 	%r761, 10;
	setp.eq.s32 	%p184, %r750, %r761;
	@%p184 bra 	$Lt_50_3074;
	mov.u32 	%r762, 11;
	setp.eq.s32 	%p185, %r750, %r762;
	@%p185 bra 	$Lt_50_3330;
	mov.u32 	%r763, 12;
	setp.eq.s32 	%p186, %r750, %r763;
	@%p186 bra 	$Lt_50_3586;
	mov.u32 	%r764, 13;
	setp.eq.s32 	%p187, %r750, %r764;
	@%p187 bra 	$Lt_50_3842;
	mov.u32 	%r765, 14;
	setp.eq.s32 	%p188, %r750, %r765;
	@%p188 bra 	$Lt_50_4098;
	mov.u32 	%r766, 15;
	setp.eq.s32 	%p189, %r750, %r766;
	@%p189 bra 	$Lt_50_4354;
	mov.u32 	%r767, 16;
	setp.eq.s32 	%p190, %r750, %r767;
	@%p190 bra 	$Lt_50_4610;
	bra.uni 	$Lt_50_514;
$Lt_50_258:
	.loc	21	42	0
	cvt.ftz.sat.f32.f32 	%f9, %f9;
	ld.param.f32 	%f1281, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+8];
	mov.f32 	%f1282, 0f3f800000;  	// 1
	setp.eq.ftz.f32 	%p191, %f1281, %f1282;
	ld.param.f32 	%f1283, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+12];
	mov.f32 	%f1284, 0f3f800000;  	// 1
	setp.eq.ftz.f32 	%p192, %f1283, %f1284;
	ld.param.f32 	%f1285, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+4];
	ld.param.f32 	%f1286, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+16];
	mov.f32 	%f1287, 0f3f800000;  	// 1
	setp.eq.ftz.f32 	%p193, %f1286, %f1287;
	@!%p193 bra 	$Lt_50_242178;
	.loc	21	45	0
	mov.f32 	%f1288, %f1285;
	mul.ftz.f32 	%f1289, %f1288, %f9;
	selp.f32 	%f1290, %f1288, %f1289, %p191;
	.loc	21	53	0
	mov.f32 	%f1291, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1292, %f1291, %f1290;
	selp.f32 	%f9, %f1292, %f1290, %p192;
	.loc	21	57	0
	mov.f32 	%f5, %f9;
	mov.f32 	%f6, %f9;
	mov.f32 	%f7, %f9;
	bra.uni 	$Lt_50_514;
$Lt_50_242178:
	@!%p191 bra 	$Lt_50_242690;
	.loc	21	61	0
	mov.f32 	%f1288, %f1285;
	mov.f32 	%f1293, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1294, %f1293, %f1288;
	selp.f32 	%f9, %f1294, %f1288, %p192;
	bra.uni 	$Lt_50_514;
$Lt_50_242690:
	.loc	21	69	0
	mov.f32 	%f1295, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1296, %f1295, %f9;
	selp.f32 	%f1297, %f1296, %f9, %p192;
	.loc	21	73	0
	mul.ftz.f32 	%f9, %f1297, %f1285;
	bra.uni 	$Lt_50_514;
$Lt_50_770:
	.loc	22	267	0
	ld.const.f32 	%f1298, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1299, %f1298, %f6;
	ld.const.f32 	%f1300, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1301, %f1300, %f7, %f1299;
	ld.const.f32 	%f1302, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1303, %f1302, %f5, %f1301;
	ld.const.f32 	%f1304, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f1305, %f1304, %f1303;
	.loc	22	268	0
	ld.const.f32 	%f1306, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f1307, %f1306, %f1303;
	.loc	23	44	0
	ld.const.f32 	%f1308, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f1308, %f1303;
	mov.f32 	%f6, %f1305;
	mov.f32 	%f7, %f1307;
	.loc	38	61	0
	bra.uni 	$Lt_50_514;
$Lt_50_1026:
	.loc	38	63	0
	ld.param.f32 	%f1309, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+20];
	ld.param.f32 	%f1310, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+12];
	ld.param.f32 	%f1311, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+4];
	mov.f32 	%f1312, 0f00000000;  	// 0
	setp.neu.ftz.f32 	%p194, %f1311, %f1312;
	@!%p194 bra 	$Lt_50_243202;
	.loc	24	44	0
	ld.param.f32 	%f1313, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+8];
	sub.ftz.f32 	%f1314, %f7, %f1310;
	fma.rn.ftz.f32 	%f7, %f1313, %f1314, %f1309;
	.loc	24	45	0
	sub.ftz.f32 	%f1315, %f6, %f1310;
	fma.rn.ftz.f32 	%f6, %f1313, %f1315, %f1309;
	.loc	24	46	0
	sub.ftz.f32 	%f1316, %f5, %f1310;
	fma.rn.ftz.f32 	%f5, %f1313, %f1316, %f1309;
	bra.uni 	$Lt_50_514;
$Lt_50_243202:
	.loc	24	50	0
	ld.param.f32 	%f1317, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+24];
	setp.gt.ftz.f32 	%p195, %f7, %f1310;
	selp.f32 	%f7, %f1317, %f1309, %p195;
	.loc	24	51	0
	setp.gt.ftz.f32 	%p196, %f6, %f1310;
	selp.f32 	%f6, %f1317, %f1309, %p196;
	.loc	24	52	0
	setp.gt.ftz.f32 	%p197, %f5, %f1310;
	selp.f32 	%f5, %f1317, %f1309, %p197;
	bra.uni 	$Lt_50_514;
$Lt_50_1282:
	.loc	25	47	0
	ld.param.f32 	%f1318, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+4];
	mul.ftz.f32 	%f5, %f1318, %f5;
	ld.param.f32 	%f1319, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+8];
	mul.ftz.f32 	%f6, %f1319, %f6;
	ld.param.f32 	%f1320, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+12];
	mul.ftz.f32 	%f7, %f1320, %f7;
	.loc	38	67	0
	bra.uni 	$Lt_50_514;
$Lt_50_1538:
	.loc	26	48	0
	ld.param.f32 	%f1321, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+4];
	mov.f32 	%f1322, 0f00000000;  	// 0
	setp.eq.ftz.f32 	%p198, %f1321, %f1322;
	ld.param.f32 	%f1323, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+8];
	mov.f32 	%f1324, 0f00000000;  	// 0
	max.ftz.f32 	%f1325, %f5, %f1324;
	mov.f32 	%f1326, 0f3f800000;  	// 1
	min.ftz.f32 	%f1327, %f1325, %f1326;
	ld.param.f32 	%f1328, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+12];
	sub.ftz.f32 	%f1329, %f1327, %f1328;
	abs.ftz.f32 	%f1330, %f1329;
	mov.f32 	%f1331, 0f00000000;  	// 0
	max.ftz.f32 	%f1332, %f6, %f1331;
	mov.f32 	%f1333, 0f3f800000;  	// 1
	min.ftz.f32 	%f1334, %f1332, %f1333;
	ld.param.f32 	%f1335, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+16];
	sub.ftz.f32 	%f1336, %f1334, %f1335;
	abs.ftz.f32 	%f1337, %f1336;
	mov.f32 	%f1338, 0f00000000;  	// 0
	max.ftz.f32 	%f1339, %f7, %f1338;
	mov.f32 	%f1340, 0f3f800000;  	// 1
	min.ftz.f32 	%f1341, %f1339, %f1340;
	ld.param.f32 	%f1342, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+20];
	sub.ftz.f32 	%f1343, %f1341, %f1342;
	abs.ftz.f32 	%f1344, %f1343;
	mov.f32 	%f1345, 0f00000000;  	// 0
	max.ftz.f32 	%f1346, %f9, %f1345;
	mov.f32 	%f1347, 0f3f800000;  	// 1
	min.ftz.f32 	%f1348, %f1346, %f1347;
	sub.ftz.f32 	%f1349, %f1348, %f9;
	abs.ftz.f32 	%f1350, %f1349;
	max.ftz.f32 	%f1351, %f1344, %f1350;
	max.ftz.f32 	%f1352, %f1337, %f1351;
	max.ftz.f32 	%f1353, %f1330, %f1352;
	setp.ge.ftz.f32 	%p199, %f1323, %f1353;
	xor.pred 	%p200, %p198, %p199;
	@!%p200 bra 	$Lt_50_514;
	.loc	22	267	0
	ld.const.f32 	%f1354, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1299, %f1354, %f6;
	ld.const.f32 	%f1355, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1301, %f1355, %f7, %f1299;
	ld.const.f32 	%f1356, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1303, %f1356, %f5, %f1301;
	ld.const.f32 	%f1357, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f1358, %f1357, %f1303;
	.loc	22	268	0
	ld.const.f32 	%f1359, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f1360, %f1359, %f1303;
	.loc	23	44	0
	ld.const.f32 	%f1361, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f1361, %f1303;
	mov.f32 	%f6, %f1358;
	mov.f32 	%f7, %f1360;
	bra.uni 	$Lt_50_514;
$Lt_50_1794:
	.loc	27	48	0
	ld.param.f32 	%f1362, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+8];
	mov.f32 	%f1363, 0f00000000;  	// 0
	max.ftz.f32 	%f1364, %f5, %f1363;
	mov.f32 	%f1365, 0f3f800000;  	// 1
	min.ftz.f32 	%f1366, %f1364, %f1365;
	ld.param.f32 	%f1367, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+12];
	sub.ftz.f32 	%f1368, %f1366, %f1367;
	abs.ftz.f32 	%f1369, %f1368;
	mov.f32 	%f1370, 0f00000000;  	// 0
	max.ftz.f32 	%f1371, %f6, %f1370;
	mov.f32 	%f1372, 0f3f800000;  	// 1
	min.ftz.f32 	%f1373, %f1371, %f1372;
	ld.param.f32 	%f1374, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+16];
	sub.ftz.f32 	%f1375, %f1373, %f1374;
	abs.ftz.f32 	%f1376, %f1375;
	mov.f32 	%f1377, 0f00000000;  	// 0
	max.ftz.f32 	%f1378, %f7, %f1377;
	mov.f32 	%f1379, 0f3f800000;  	// 1
	min.ftz.f32 	%f1380, %f1378, %f1379;
	ld.param.f32 	%f1381, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+20];
	sub.ftz.f32 	%f1382, %f1380, %f1381;
	abs.ftz.f32 	%f1383, %f1382;
	mov.f32 	%f1384, 0f00000000;  	// 0
	max.ftz.f32 	%f1385, %f9, %f1384;
	mov.f32 	%f1386, 0f3f800000;  	// 1
	min.ftz.f32 	%f1387, %f1385, %f1386;
	sub.ftz.f32 	%f1388, %f1387, %f9;
	abs.ftz.f32 	%f1389, %f1388;
	max.ftz.f32 	%f1390, %f1383, %f1389;
	max.ftz.f32 	%f1391, %f1376, %f1390;
	max.ftz.f32 	%f1392, %f1369, %f1391;
	setp.ge.ftz.f32 	%p201, %f1362, %f1392;
	@!%p201 bra 	$Lt_50_514;
	.loc	27	51	0
	ld.param.f32 	%f1393, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+24];
	mov.f32 	%f126, %f1393;
	ld.param.f32 	%f1394, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+28];
	mov.f32 	%f128, %f1394;
	ld.param.f32 	%f1395, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+32];
	mov.f32 	%f130, %f1395;
	ld.param.f32 	%f1396, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+4];
	mov.f32 	%f1397, 0f00000000;  	// 0
	setp.eq.ftz.f32 	%p202, %f1396, %f1397;
	@!%p202 bra 	$Lt_50_244482;
	.loc	27	60	0
	ld.const.f32 	%f1398, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1299, %f1398, %f6;
	ld.const.f32 	%f1399, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1301, %f1399, %f7, %f1299;
	ld.const.f32 	%f1400, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1303, %f1400, %f5, %f1301;
	mul.ftz.f32 	%f126, %f1303, %f1393;
	.loc	27	61	0
	mul.ftz.f32 	%f128, %f1303, %f1394;
	.loc	27	62	0
	mul.ftz.f32 	%f130, %f1303, %f1395;
$Lt_50_244482:
	.loc	27	65	0
	mov.f32 	%f5, %f126;
	mov.f32 	%f6, %f128;
	mov.f32 	%f7, %f130;
	bra.uni 	$Lt_50_514;
$Lt_50_2050:
	.loc	28	47	0
	sub.s32 	%r768, %r13, %r12;
	sub.s32 	%r769, %r14, %r6;
	cvt.rn.f32.s32 	%f1401, %r6;
	cvt.rn.f32.s32 	%f1402, %r12;
	cvt.rn.f32.s32 	%f138, %r768;
	cvt.rn.f32.s32 	%f139, %r769;
	ld.param.f32 	%f1288, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+4];
	sub.ftz.f32 	%f1403, %f1288, %f1401;
	sub.ftz.f32 	%f1404, %f1288, %f1402;
	sub.ftz.f32 	%f1405, %f1288, %f138;
	sub.ftz.f32 	%f1406, %f1288, %f139;
	cvt.rzi.ftz.s32.f32 	%r770, %f1403;
	cvt.rzi.ftz.s32.f32 	%r771, %f1404;
	cvt.rzi.ftz.s32.f32 	%r772, %f1405;
	cvt.rzi.ftz.s32.f32 	%r773, %f1406;
	max.s32 	%r774, %r770, %r771;
	max.s32 	%r775, %r773, %r774;
	max.s32 	%r776, %r772, %r775;
	mov.u32 	%r777, 0;
	setp.le.s32 	%p203, %r776, %r777;
	@%p203 bra 	$Lt_50_514;
	.loc	28	51	0
	cvt.rn.f32.s32 	%f1407, %r776;
	sub.ftz.f32 	%f1408, %f1288, %f1407;
	div.approx.ftz.f32 	%f1409, %f1408, %f1288;
	mul.ftz.f32 	%f9, %f9, %f1409;
	bra.uni 	$Lt_50_514;
$Lt_50_2306:
	.loc	30	50	0
	ld.const.f32 	%f1410, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1299, %f1410, %f6;
	ld.param.f32 	%f1411, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+12];
	ld.param.f32 	%f1313, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+8];
	sub.ftz.f32 	%f1412, %f1411, %f1313;
	ld.const.f32 	%f1413, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1301, %f1413, %f7, %f1299;
	ld.param.f32 	%f1414, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+16];
	mul.ftz.f32 	%f1415, %f1414, %f1412;
	ld.const.f32 	%f1416, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1303, %f1416, %f5, %f1301;
	add.ftz.f32 	%f1417, %f1415, %f1411;
	sub.ftz.f32 	%f1418, %f1313, %f1415;
	mov.f32 	%f1419, 0f00000000;  	// 0
	max.ftz.f32 	%f1420, %f1417, %f1419;
	mov.f32 	%f1421, 0f00000000;  	// 0
	max.ftz.f32 	%f1422, %f1418, %f1421;
	mov.f32 	%f1423, 0f3f800000;  	// 1
	min.ftz.f32 	%f1424, %f1420, %f1423;
	mov.f32 	%f1425, 0f3f800000;  	// 1
	min.ftz.f32 	%f1426, %f1422, %f1425;
	set.gt.ftz.u32.f32 	%r778, %f1426, %f1303;
	neg.s32 	%r779, %r778;
	set.le.ftz.u32.f32 	%r780, %f1424, %f1303;
	neg.s32 	%r781, %r780;
	or.b32 	%r782, %r779, %r781;
	mov.u32 	%r783, 0;
	setp.eq.s32 	%p204, %r782, %r783;
	@%p204 bra 	$Lt_50_245762;
	mov.f32 	%f164, 0f00000000;   	// 0
	bra.uni 	$Lt_50_246530;
$Lt_50_245762:
	add.ftz.f32 	%f1427, %f1415, %f1313;
	mov.f32 	%f1428, 0f00000000;  	// 0
	max.ftz.f32 	%f1429, %f1427, %f1428;
	mov.f32 	%f1430, 0f3f800000;  	// 1
	min.ftz.f32 	%f1431, %f1429, %f1430;
	set.le.ftz.u32.f32 	%r784, %f1431, %f1303;
	neg.s32 	%r785, %r784;
	sub.ftz.f32 	%f1432, %f1411, %f1415;
	mov.f32 	%f1433, 0f00000000;  	// 0
	max.ftz.f32 	%f1434, %f1432, %f1433;
	mov.f32 	%f1435, 0f3f800000;  	// 1
	min.ftz.f32 	%f1436, %f1434, %f1435;
	set.lt.ftz.u32.f32 	%r786, %f1303, %f1436;
	neg.s32 	%r787, %r786;
	and.b32 	%r788, %r785, %r787;
	mov.u32 	%r789, 0;
	setp.eq.s32 	%p205, %r788, %r789;
	@%p205 bra 	$Lt_50_246274;
	mov.f32 	%f164, 0f3f800000;   	// 1
	bra.uni 	$Lt_50_246530;
$Lt_50_246274:
	add.ftz.f32 	%f1437, %f1415, %f1415;
	setp.gt.ftz.f32 	%p206, %f1431, %f1303;
	@!%p206 bra 	$Lt_50_246786;
	.loc	30	62	0
	sub.ftz.f32 	%f1438, %f1303, %f1426;
	div.approx.ftz.f32 	%f164, %f1438, %f1437;
	bra.uni 	$Lt_50_246530;
$Lt_50_246786:
	.loc	30	66	0
	sub.ftz.f32 	%f1439, %f1424, %f1303;
	div.approx.ftz.f32 	%f164, %f1439, %f1437;
$Lt_50_246530:
$Lt_50_246018:
$Lt_50_245506:
	.loc	30	69	0
	mov.f32 	%f1440, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1441, %f1440, %f164;
	ld.param.f32 	%f1442, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+4];
	mov.f32 	%f1443, 0f00000000;  	// 0
	setp.neu.ftz.f32 	%p207, %f1442, %f1443;
	selp.f32 	%f164, %f1441, %f164, %p207;
	.loc	30	77	0
	ld.const.f32 	%f1444, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f1444, %f164;
	ld.const.f32 	%f1445, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f6, %f1445, %f164;
	ld.const.f32 	%f1446, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f7, %f1446, %f164;
	.loc	38	79	0
	bra.uni 	$Lt_50_514;
$Lt_50_2562:
	.loc	38	80	0
	ld.param.f32 	%f1447, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+44];
	mov.f32 	%f1448, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p208, %f1447, %f1448;
	@!%p208 bra 	$L_50_206850;
	cvt.rn.f32.s32 	%f1449, %r6;
	cvt.rn.f32.s32 	%f1450, %r14;
	mul.ftz.f32 	%f1451, %f1450, %f1447;
	setp.lt.ftz.f32 	%p209, %f1449, %f1451;
	@%p209 bra 	$L_50_206594;
$L_50_206850:
	mov.f32 	%f1452, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p210, %f1447, %f1452;
	@!%p210 bra 	$Lt_50_514;
	cvt.rn.f32.s32 	%f1453, %r12;
	cvt.rn.f32.s32 	%f1454, %r13;
	mul.ftz.f32 	%f1455, %f1454, %f1447;
	neg.ftz.f32 	%f1456, %f1455;
	setp.lt.ftz.f32 	%p211, %f1453, %f1456;
	@!%p211 bra 	$Lt_50_514;
$L_50_206594:
	.loc	31	47	0
	ld.const.f32 	%f1457, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1458, %f1457, %f6;
	ld.const.f32 	%f1459, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1460, %f1459, %f7, %f1458;
	ld.const.f32 	%f1461, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1303, %f1461, %f5, %f1460;
	mov.f32 	%f200, %f1303;
	ld.param.f32 	%f1313, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+8];
	ld.param.f32 	%f1462, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+4];
	sub.ftz.f32 	%f1463, %f1462, %f1313;
	ld.param.f32 	%f1464, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+16];
	ld.param.f32 	%f1465, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+12];
	sub.ftz.f32 	%f1466, %f1465, %f1464;
	mov.f32 	%f1467, 0f3f800000;  	// 1
	set.neu.ftz.u32.f32 	%r790, %f1466, %f1467;
	neg.s32 	%r791, %r790;
	mov.f32 	%f1468, 0f3f800000;  	// 1
	set.neu.ftz.u32.f32 	%r792, %f1463, %f1468;
	neg.s32 	%r793, %r792;
	or.b32 	%r794, %r791, %r793;
	mov.u32 	%r795, 0;
	setp.eq.s32 	%p212, %r794, %r795;
	@%p212 bra 	$Lt_50_247042;
	.loc	20	143	0
	mov.s32 	%r796, 1;
	sub.s32 	%r79, %r796, %r5;
	shr.u32 	%r80, %r11, 13;
	sub.u32 	%r81, %r5, %r11;
	sub.u32 	%r82, %r79, %r11;
	xor.b32 	%r83, %r82, %r80;
	shl.b32 	%r84, %r83, 8;
	sub.u32 	%r85, %r81, %r83;
	sub.u32 	%r86, %r11, %r83;
	xor.b32 	%r87, %r85, %r84;
	shr.u32 	%r88, %r87, 13;
	sub.u32 	%r89, %r86, %r87;
	sub.u32 	%r90, %r83, %r87;
	xor.b32 	%r91, %r89, %r88;
	shr.u32 	%r92, %r91, 12;
	sub.u32 	%r93, %r90, %r91;
	xor.b32 	%r94, %r93, %r92;
	sub.u32 	%r797, %r87, %r91;
	sub.u32 	%r96, %r797, %r94;
	shl.b32 	%r798, %r94, 16;
	xor.b32 	%r98, %r96, %r798;
	.loc	20	144	0
	sub.u32 	%r99, %r91, %r94;
	sub.u32 	%r100, %r99, %r98;
	shr.u32 	%r799, %r98, 5;
	xor.b32 	%r102, %r100, %r799;
	.loc	20	145	0
	sub.u32 	%r103, %r94, %r98;
	sub.u32 	%r104, %r103, %r102;
	shr.u32 	%r800, %r102, 3;
	xor.b32 	%r106, %r104, %r800;
	.loc	20	146	0
	sub.u32 	%r107, %r98, %r102;
	sub.u32 	%r108, %r107, %r106;
	shl.b32 	%r801, %r106, 10;
	xor.b32 	%r110, %r108, %r801;
	.loc	20	147	0
	sub.u32 	%r111, %r102, %r106;
	sub.u32 	%r112, %r111, %r110;
	shr.u32 	%r802, %r110, 15;
	xor.b32 	%r114, %r112, %r802;
	.loc	31	57	0
	mov.f32 	%f1469, 0f3b270d73;  	// 0.00254902
	mul.lo.u32 	%r803, %r114, 1103515245;
	add.u32 	%r804, %r803, 12345;
	shr.u32 	%r805, %r804, 16;
	and.b32 	%r806, %r805, 255;
	shl.b32 	%r807, %r806, 7;
	mul.lo.u32 	%r808, %r114, -1029531031;
	sub.u32 	%r809, %r808, 740551042;
	shr.u32 	%r810, %r809, 16;
	and.b32 	%r811, %r810, 255;
	xor.b32 	%r812, %r807, %r811;
	cvt.rn.f32.s32 	%f1470, %r812;
	mov.f32 	%f1471, 0f467ffe00;  	// 16383.5
	div.approx.ftz.f32 	%f1472, %f1470, %f1471;
	mov.f32 	%f1473, 0fbf800000;  	// -1
	add.ftz.f32 	%f1474, %f1472, %f1473;
	fma.rn.ftz.f32 	%f200, %f1469, %f1474, %f1303;
$Lt_50_247042:
	sub.ftz.f32 	%f1475, %f200, %f1313;
	ld.param.f32 	%f1476, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+20];
	mov.f32 	%f1477, 0f3f800000;  	// 1
	setp.neu.ftz.f32 	%p213, %f1476, %f1477;
	@!%p213 bra 	$Lt_50_247810;
	mov.f32 	%f1478, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p214, %f1475, %f1478;
	@!%p214 bra 	$Lt_50_248322;
	.loc	31	66	0
	mov.f32 	%f200, %f1464;
	bra.uni 	$Lt_50_247554;
$Lt_50_248322:
	.loc	31	70	0
	rcp.approx.ftz.f32 	%f1479, %f1463;
	mul.ftz.f32 	%f1480, %f1475, %f1479;
	lg2.approx.ftz.f32 	%f1481, %f1480;
	mul.ftz.f32 	%f1482, %f1476, %f1481;
	ex2.approx.ftz.f32 	%f1483, %f1482;
	fma.rn.ftz.f32 	%f200, %f1466, %f1483, %f1464;
	bra.uni 	$Lt_50_247554;
$Lt_50_247810:
	.loc	31	77	0
	rcp.approx.ftz.f32 	%f1484, %f1463;
	mul.ftz.f32 	%f1485, %f1475, %f1484;
	fma.rn.ftz.f32 	%f200, %f1466, %f1485, %f1464;
$Lt_50_247554:
	.loc	22	267	0
	ld.param.f32 	%f1486, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+28];
	sin.approx.ftz.f32 	%f1487, %f1486;
	ld.const.f32 	%f1488, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f1489, %f1488, %f6;
	ld.const.f32 	%f1490, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f1491, %f1490, %f6;
	cos.approx.ftz.f32 	%f1492, %f1486;
	ld.const.f32 	%f1493, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f1494, %f1493, %f7, %f1489;
	ld.const.f32 	%f1495, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f1496, %f1495, %f7, %f1491;
	ld.const.f32 	%f1497, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f1498, %f1497, %f5, %f1494;
	ld.const.f32 	%f1499, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f1500, %f1499, %f5, %f1496;
	mul.ftz.f32 	%f1501, %f1487, %f1498;
	mul.ftz.f32 	%f1502, %f1492, %f1498;
	mul.ftz.f32 	%f1503, %f1500, %f1492;
	sub.ftz.f32 	%f1504, %f1503, %f1501;
	fma.rn.ftz.f32 	%f1505, %f1500, %f1487, %f1502;
	ld.param.f32 	%f1506, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+32];
	sub.ftz.f32 	%f1507, %f1506, %f1504;
	ld.param.f32 	%f1508, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+36];
	sub.ftz.f32 	%f1509, %f1508, %f1505;
	ld.param.f32 	%f1510, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+40];
	fma.rn.ftz.f32 	%f1511, %f1510, %f1507, %f1504;
	fma.rn.ftz.f32 	%f1512, %f1510, %f1509, %f1505;
	ld.param.f32 	%f1513, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+24];
	mul.ftz.f32 	%f1514, %f1511, %f1513;
	mul.ftz.f32 	%f1515, %f1512, %f1513;
	ld.const.f32 	%f1516, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f1517, %f1516, %f1514;
	ld.const.f32 	%f1518, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f1519, %f1518, %f200, %f1517;
	ld.const.f32 	%f1520, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f260, %f1520, %f1515, %f1519;
	.loc	22	268	0
	ld.const.f32 	%f1521, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f1522, %f1521, %f1514;
	ld.const.f32 	%f1523, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f1524, %f1523, %f200, %f1522;
	ld.const.f32 	%f1525, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f266, %f1525, %f1515, %f1524;
	.loc	31	92	0
	ld.const.f32 	%f1526, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f1527, %f1526, %f1514;
	ld.const.f32 	%f1528, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f1529, %f1528, %f200, %f1527;
	ld.const.f32 	%f1530, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f5, %f1530, %f1515, %f1529;
	mov.f32 	%f6, %f260;
	mov.f32 	%f7, %f266;
	bra.uni 	$Lt_50_514;
$Lt_50_2818:
	.loc	38	83	0
	ld.param.f32 	%f1531, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+44];
	mov.f32 	%f1532, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p215, %f1531, %f1532;
	@!%p215 bra 	$L_50_207618;
	cvt.rn.f32.s32 	%f1533, %r6;
	cvt.rn.f32.s32 	%f1534, %r14;
	mul.ftz.f32 	%f1535, %f1534, %f1531;
	setp.lt.ftz.f32 	%p216, %f1533, %f1535;
	@%p216 bra 	$L_50_207362;
$L_50_207618:
	mov.f32 	%f1536, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p217, %f1531, %f1536;
	@!%p217 bra 	$Lt_50_514;
	cvt.rn.f32.s32 	%f1537, %r12;
	cvt.rn.f32.s32 	%f1538, %r13;
	mul.ftz.f32 	%f1539, %f1538, %f1531;
	neg.ftz.f32 	%f1540, %f1539;
	setp.lt.ftz.f32 	%p218, %f1537, %f1540;
	@!%p218 bra 	$Lt_50_514;
$L_50_207362:
	.loc	31	110	0
	ld.const.f32 	%f1541, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1299, %f1541, %f6;
	ld.param.f32 	%f1313, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+8];
	ld.param.f32 	%f1542, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+4];
	sub.ftz.f32 	%f1543, %f1542, %f1313;
	ld.param.f32 	%f1544, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+16];
	ld.param.f32 	%f1545, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+12];
	sub.ftz.f32 	%f1546, %f1545, %f1544;
	ld.const.f32 	%f1547, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1301, %f1547, %f7, %f1299;
	ld.const.f32 	%f1548, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1303, %f1548, %f5, %f1301;
	sub.ftz.f32 	%f1549, %f1303, %f1313;
	ld.param.f32 	%f1550, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+20];
	mov.f32 	%f1551, 0f3f800000;  	// 1
	setp.neu.ftz.f32 	%p219, %f1550, %f1551;
	@!%p219 bra 	$Lt_50_248834;
	.loc	42	523	0
	mov.f32 	%f1552, 0f00000000;  	// 0
	max.ftz.f32 	%f1553, %f1549, %f1552;
	div.approx.ftz.f32 	%f1554, %f1553, %f1543;
	lg2.approx.ftz.f32 	%f1555, %f1554;
	mul.ftz.f32 	%f1556, %f1550, %f1555;
	ex2.approx.ftz.f32 	%f298, %f1556;
	.loc	31	120	0
	fma.rn.ftz.f32 	%f299, %f1546, %f298, %f1544;
	bra.uni 	$Lt_50_248578;
$Lt_50_248834:
	.loc	31	129	0
	div.approx.ftz.f32 	%f1557, %f1549, %f1543;
	fma.rn.ftz.f32 	%f299, %f1546, %f1557, %f1544;
$Lt_50_248578:
	.loc	31	135	0
	ld.const.f32 	%f1558, [k601YPbPr_To_RGB32f+24];
	mul.ftz.f32 	%f5, %f1558, %f299;
	ld.const.f32 	%f1559, [k601YPbPr_To_RGB32f+12];
	mul.ftz.f32 	%f6, %f1559, %f299;
	ld.const.f32 	%f1560, [k601YPbPr_To_RGB32f+0];
	mul.ftz.f32 	%f7, %f1560, %f299;
	bra.uni 	$Lt_50_514;
$Lt_50_3074:
	.loc	38	86	0
	ld.param.f32 	%f1561, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+44];
	mov.f32 	%f1562, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p220, %f1561, %f1562;
	@!%p220 bra 	$L_50_208386;
	cvt.rn.f32.s32 	%f1563, %r6;
	cvt.rn.f32.s32 	%f1564, %r14;
	mul.ftz.f32 	%f1565, %f1564, %f1561;
	setp.lt.ftz.f32 	%p221, %f1563, %f1565;
	@%p221 bra 	$L_50_208130;
$L_50_208386:
	mov.f32 	%f1566, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p222, %f1561, %f1566;
	@!%p222 bra 	$Lt_50_514;
	cvt.rn.f32.s32 	%f1567, %r12;
	cvt.rn.f32.s32 	%f1568, %r13;
	mul.ftz.f32 	%f1569, %f1568, %f1561;
	neg.ftz.f32 	%f1570, %f1569;
	setp.lt.ftz.f32 	%p223, %f1567, %f1570;
	@!%p223 bra 	$Lt_50_514;
$L_50_208130:
	.loc	31	160	0
	ld.const.f32 	%f5, [k601YPbPr_To_RGB32f+24];
	ld.const.f32 	%f6, [k601YPbPr_To_RGB32f+12];
	ld.const.f32 	%f7, [k601YPbPr_To_RGB32f+0];
	bra.uni 	$Lt_50_514;
$Lt_50_3330:
	.loc	32	42	0
	ld.param.f32 	%f1571, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+4];
	mov.f32 	%f1572, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p224, %f5, %f1572;
	@!%p224 bra 	$Lt_50_249346;
	.loc	22	292	0
	mov.f32 	%f1288, %f1571;
	lg2.approx.ftz.f32 	%f1573, %f5;
	mul.ftz.f32 	%f1574, %f1288, %f1573;
	ex2.approx.ftz.f32 	%f1575, %f1574;
	bra.uni 	$Lt_50_249090;
$Lt_50_249346:
	mov.f32 	%f1288, %f1571;
	neg.ftz.f32 	%f1576, %f5;
	lg2.approx.ftz.f32 	%f1577, %f1576;
	mul.ftz.f32 	%f1578, %f1288, %f1577;
	ex2.approx.ftz.f32 	%f1579, %f1578;
	neg.ftz.f32 	%f1575, %f1579;
$Lt_50_249090:
	mov.f32 	%f1580, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p225, %f6, %f1580;
	@!%p225 bra 	$Lt_50_249858;
	.loc	22	293	0
	lg2.approx.ftz.f32 	%f1581, %f6;
	mul.ftz.f32 	%f1582, %f1288, %f1581;
	ex2.approx.ftz.f32 	%f1583, %f1582;
	bra.uni 	$Lt_50_249602;
$Lt_50_249858:
	neg.ftz.f32 	%f1584, %f6;
	lg2.approx.ftz.f32 	%f1585, %f1584;
	mul.ftz.f32 	%f1586, %f1288, %f1585;
	ex2.approx.ftz.f32 	%f1587, %f1586;
	neg.ftz.f32 	%f1583, %f1587;
$Lt_50_249602:
	mov.f32 	%f1588, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p226, %f7, %f1588;
	@!%p226 bra 	$Lt_50_250370;
	.loc	22	294	0
	lg2.approx.ftz.f32 	%f1589, %f7;
	mul.ftz.f32 	%f1590, %f1288, %f1589;
	ex2.approx.ftz.f32 	%f1591, %f1590;
	bra.uni 	$Lt_50_250114;
$Lt_50_250370:
	neg.ftz.f32 	%f1592, %f7;
	lg2.approx.ftz.f32 	%f1593, %f1592;
	mul.ftz.f32 	%f1594, %f1288, %f1593;
	ex2.approx.ftz.f32 	%f1595, %f1594;
	neg.ftz.f32 	%f1591, %f1595;
$Lt_50_250114:
	mov.f32 	%f1596, 0f00000000;  	// 0
	setp.ge.ftz.f32 	%p227, %f9, %f1596;
	@!%p227 bra 	$Lt_50_250882;
	.loc	22	295	0
	lg2.approx.ftz.f32 	%f1597, %f9;
	mul.ftz.f32 	%f1598, %f1288, %f1597;
	ex2.approx.ftz.f32 	%f1599, %f1598;
	bra.uni 	$Lt_50_250626;
$Lt_50_250882:
	neg.ftz.f32 	%f1600, %f9;
	lg2.approx.ftz.f32 	%f1601, %f1600;
	mul.ftz.f32 	%f1602, %f1288, %f1601;
	ex2.approx.ftz.f32 	%f1603, %f1602;
	neg.ftz.f32 	%f1599, %f1603;
$Lt_50_250626:
	.loc	32	42	0
	mov.f32 	%f5, %f1575;
	mov.f32 	%f6, %f1583;
	mov.f32 	%f7, %f1591;
	mov.f32 	%f9, %f1599;
	.loc	38	91	0
	bra.uni 	$Lt_50_514;
$Lt_50_3586:
	.loc	33	41	0
	sub.s32 	%r813, %r14, %r6;
	sub.s32 	%r6, %r813, 1;
	.loc	38	94	0
	bra.uni 	$Lt_50_514;
$Lt_50_3842:
	.loc	38	96	0
	ld.param.f32 	%f1288, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+4];
	mov.f32 	%f1604, 0f3f000000;  	// 0.5
	mul.ftz.f32 	%f1605, %f1288, %f1604;
	ld.param.f32 	%f1606, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+16];
	ld.param.f32 	%f1607, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+8];
	mov.f32 	%f1608, 0f3f800000;  	// 1
	setp.eq.ftz.f32 	%p228, %f1607, %f1608;
	@!%p228 bra 	$Lt_50_251394;
	.loc	20	143	0
	cvt.rzi.ftz.u32.f32 	%r814, %f1606;
	mul.lo.s32 	%r126, %r6, 3;
	shr.u32 	%r815, %r814, 13;
	sub.u32 	%r816, %r12, %r814;
	sub.u32 	%r128, %r126, %r12;
	sub.u32 	%r817, %r128, %r814;
	xor.b32 	%r818, %r817, %r815;
	shl.b32 	%r819, %r818, 8;
	sub.u32 	%r820, %r816, %r818;
	sub.u32 	%r821, %r814, %r818;
	xor.b32 	%r822, %r819, %r820;
	shr.u32 	%r823, %r822, 13;
	sub.u32 	%r824, %r821, %r822;
	sub.u32 	%r825, %r818, %r822;
	xor.b32 	%r826, %r823, %r824;
	shr.u32 	%r827, %r826, 12;
	sub.u32 	%r828, %r825, %r826;
	xor.b32 	%r829, %r827, %r828;
	shl.b32 	%r830, %r829, 16;
	sub.u32 	%r831, %r822, %r826;
	sub.u32 	%r832, %r831, %r829;
	xor.b32 	%r833, %r830, %r832;
	.loc	20	144	0
	sub.u32 	%r834, %r826, %r829;
	sub.u32 	%r835, %r834, %r833;
	shr.u32 	%r836, %r833, 5;
	xor.b32 	%r837, %r835, %r836;
	.loc	20	145	0
	sub.u32 	%r838, %r829, %r833;
	sub.u32 	%r839, %r838, %r837;
	shr.u32 	%r840, %r837, 3;
	xor.b32 	%r155, %r839, %r840;
	.loc	20	146	0
	sub.u32 	%r841, %r833, %r837;
	sub.u32 	%r842, %r841, %r155;
	shl.b32 	%r843, %r155, 10;
	xor.b32 	%r159, %r842, %r843;
	.loc	20	147	0
	sub.u32 	%r844, %r837, %r155;
	sub.u32 	%r845, %r844, %r159;
	shr.u32 	%r846, %r159, 15;
	xor.b32 	%r163, %r845, %r846;
	.loc	34	48	0
	mul.lo.u32 	%r847, %r163, 1103515245;
	add.u32 	%r848, %r847, 12345;
	shr.u32 	%r849, %r848, 16;
	and.b32 	%r850, %r849, 255;
	shl.b32 	%r851, %r850, 7;
	mul.lo.u32 	%r852, %r163, -1029531031;
	sub.u32 	%r853, %r852, 740551042;
	shr.u32 	%r854, %r853, 16;
	and.b32 	%r855, %r854, 255;
	xor.b32 	%r856, %r851, %r855;
	cvt.rn.f32.s32 	%f1609, %r856;
	mov.f32 	%f1610, 0f46fffe00;  	// 32767
	div.approx.ftz.f32 	%f1611, %f1609, %f1610;
	mul.ftz.f32 	%f1612, %f1288, %f1611;
	sub.ftz.f32 	%f356, %f1612, %f1605;
	.loc	20	143	0
	add.u32 	%r857, %r817, 1;
	xor.b32 	%r858, %r857, %r815;
	shl.b32 	%r859, %r858, 8;
	sub.u32 	%r860, %r816, %r858;
	sub.u32 	%r861, %r814, %r858;
	xor.b32 	%r862, %r859, %r860;
	shr.u32 	%r863, %r862, 13;
	sub.u32 	%r864, %r861, %r862;
	sub.u32 	%r865, %r858, %r862;
	xor.b32 	%r866, %r863, %r864;
	shr.u32 	%r867, %r866, 12;
	sub.u32 	%r868, %r865, %r866;
	xor.b32 	%r869, %r867, %r868;
	sub.u32 	%r870, %r862, %r866;
	sub.u32 	%r871, %r870, %r869;
	shl.b32 	%r872, %r869, 16;
	xor.b32 	%r873, %r871, %r872;
	.loc	20	144	0
	sub.u32 	%r874, %r866, %r869;
	sub.u32 	%r875, %r874, %r873;
	shr.u32 	%r876, %r873, 5;
	xor.b32 	%r877, %r875, %r876;
	.loc	20	145	0
	sub.u32 	%r878, %r869, %r873;
	sub.u32 	%r879, %r878, %r877;
	shr.u32 	%r880, %r877, 3;
	xor.b32 	%r198, %r879, %r880;
	.loc	20	146	0
	sub.u32 	%r881, %r873, %r877;
	sub.u32 	%r882, %r881, %r198;
	shl.b32 	%r883, %r198, 10;
	xor.b32 	%r202, %r882, %r883;
	.loc	20	147	0
	sub.u32 	%r884, %r877, %r198;
	sub.u32 	%r885, %r884, %r202;
	shr.u32 	%r886, %r202, 15;
	xor.b32 	%r206, %r885, %r886;
	.loc	34	49	0
	mul.lo.u32 	%r887, %r206, 1103515245;
	add.u32 	%r888, %r887, 12345;
	shr.u32 	%r889, %r888, 16;
	and.b32 	%r890, %r889, 255;
	shl.b32 	%r891, %r890, 7;
	mul.lo.u32 	%r892, %r206, -1029531031;
	sub.u32 	%r893, %r892, 740551042;
	shr.u32 	%r894, %r893, 16;
	and.b32 	%r895, %r894, 255;
	xor.b32 	%r896, %r891, %r895;
	cvt.rn.f32.s32 	%f1613, %r896;
	mov.f32 	%f1614, 0f46fffe00;  	// 32767
	div.approx.ftz.f32 	%f1615, %f1613, %f1614;
	mul.ftz.f32 	%f1616, %f1288, %f1615;
	sub.ftz.f32 	%f361, %f1616, %f1605;
	.loc	20	143	0
	add.u32 	%r897, %r817, 2;
	xor.b32 	%r898, %r897, %r815;
	shl.b32 	%r899, %r898, 8;
	sub.u32 	%r900, %r816, %r898;
	sub.u32 	%r901, %r814, %r898;
	xor.b32 	%r902, %r899, %r900;
	shr.u32 	%r903, %r902, 13;
	sub.u32 	%r904, %r901, %r902;
	sub.u32 	%r905, %r898, %r902;
	xor.b32 	%r906, %r903, %r904;
	shr.u32 	%r907, %r906, 12;
	sub.u32 	%r908, %r905, %r906;
	xor.b32 	%r909, %r907, %r908;
	sub.u32 	%r910, %r902, %r906;
	sub.u32 	%r911, %r910, %r909;
	shl.b32 	%r912, %r909, 16;
	xor.b32 	%r913, %r911, %r912;
	.loc	20	144	0
	sub.u32 	%r914, %r906, %r909;
	sub.u32 	%r915, %r914, %r913;
	shr.u32 	%r916, %r913, 5;
	xor.b32 	%r917, %r915, %r916;
	.loc	20	145	0
	sub.u32 	%r918, %r909, %r913;
	sub.u32 	%r919, %r918, %r917;
	shr.u32 	%r920, %r917, 3;
	xor.b32 	%r241, %r919, %r920;
	.loc	20	146	0
	sub.u32 	%r921, %r913, %r917;
	sub.u32 	%r922, %r921, %r241;
	shl.b32 	%r923, %r241, 10;
	xor.b32 	%r245, %r922, %r923;
	.loc	20	147	0
	sub.u32 	%r924, %r917, %r241;
	sub.u32 	%r925, %r924, %r245;
	shr.u32 	%r926, %r245, 15;
	xor.b32 	%r249, %r925, %r926;
	.loc	34	50	0
	mul.lo.u32 	%r927, %r249, 1103515245;
	add.u32 	%r928, %r927, 12345;
	shr.u32 	%r929, %r928, 16;
	and.b32 	%r930, %r929, 255;
	shl.b32 	%r931, %r930, 7;
	mul.lo.u32 	%r932, %r249, -1029531031;
	sub.u32 	%r933, %r932, 740551042;
	shr.u32 	%r934, %r933, 16;
	and.b32 	%r935, %r934, 255;
	xor.b32 	%r936, %r931, %r935;
	cvt.rn.f32.s32 	%f1617, %r936;
	mov.f32 	%f1618, 0f46fffe00;  	// 32767
	div.approx.ftz.f32 	%f1619, %f1617, %f1618;
	mul.ftz.f32 	%f1620, %f1288, %f1619;
	sub.ftz.f32 	%f366, %f1620, %f1605;
	bra.uni 	$Lt_50_251138;
$Lt_50_251394:
	.loc	20	143	0
	cvt.rzi.ftz.u32.f32 	%r937, %f1606;
	sub.u32 	%r494, %r6, %r12;
	shr.u32 	%r938, %r937, 13;
	sub.u32 	%r939, %r12, %r937;
	sub.u32 	%r940, %r494, %r937;
	xor.b32 	%r941, %r940, %r938;
	shl.b32 	%r942, %r941, 8;
	sub.u32 	%r943, %r939, %r941;
	sub.u32 	%r944, %r937, %r941;
	xor.b32 	%r945, %r942, %r943;
	shr.u32 	%r946, %r945, 13;
	sub.u32 	%r947, %r944, %r945;
	sub.u32 	%r948, %r941, %r945;
	xor.b32 	%r949, %r946, %r947;
	shr.u32 	%r950, %r949, 12;
	sub.u32 	%r951, %r948, %r949;
	xor.b32 	%r952, %r950, %r951;
	shl.b32 	%r953, %r952, 16;
	sub.u32 	%r954, %r945, %r949;
	sub.u32 	%r955, %r954, %r952;
	xor.b32 	%r956, %r953, %r955;
	.loc	20	144	0
	sub.u32 	%r957, %r949, %r952;
	sub.u32 	%r958, %r957, %r956;
	shr.u32 	%r959, %r956, 5;
	xor.b32 	%r960, %r958, %r959;
	.loc	20	145	0
	sub.u32 	%r961, %r952, %r956;
	sub.u32 	%r962, %r961, %r960;
	shr.u32 	%r963, %r960, 3;
	xor.b32 	%r287, %r962, %r963;
	.loc	20	146	0
	sub.u32 	%r964, %r956, %r960;
	sub.u32 	%r965, %r964, %r287;
	shl.b32 	%r966, %r287, 10;
	xor.b32 	%r291, %r965, %r966;
	.loc	20	147	0
	sub.u32 	%r967, %r960, %r287;
	sub.u32 	%r968, %r967, %r291;
	shr.u32 	%r969, %r291, 15;
	xor.b32 	%r295, %r968, %r969;
	.loc	34	54	0
	mul.lo.u32 	%r296, %r295, 1103515245;
	mul.lo.u32 	%r297, %r295, -1029531031;
	add.u32 	%r298, %r296, 12345;
	sub.u32 	%r299, %r297, 740551042;
	shr.u32 	%r300, %r298, 16;
	shr.u32 	%r301, %r299, 16;
	and.b32 	%r302, %r300, 255;
	and.b32 	%r303, %r301, 255;
	shl.b32 	%r304, %r302, 7;
	xor.b32 	%r305, %r304, %r303;
	cvt.rn.f32.s32 	%f367, %r305;
	mov.f32 	%f1621, 0f46fffe00;  	// 32767
	div.approx.ftz.f32 	%f369, %f367, %f1621;
	mul.ftz.f32 	%f1622, %f1288, %f369;
	sub.ftz.f32 	%f1623, %f1622, %f1605;
	mov.f32 	%f366, %f1623;
	mov.f32 	%f361, %f1623;
	mov.f32 	%f356, %f1623;
$Lt_50_251138:
	.loc	34	57	0
	add.ftz.f32 	%f7, %f356, %f7;
	.loc	34	58	0
	add.ftz.f32 	%f6, %f361, %f6;
	.loc	34	59	0
	add.ftz.f32 	%f5, %f5, %f366;
	ld.param.f32 	%f1624, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+12];
	mov.f32 	%f1625, 0f3f800000;  	// 1
	setp.eq.ftz.f32 	%p229, %f1624, %f1625;
	@!%p229 bra 	$Lt_50_514;
	.loc	34	63	0
	mov.f32 	%f1626, 0f00000000;  	// 0
	max.ftz.f32 	%f1627, %f5, %f1626;
	mov.f32 	%f1628, 0f3f800000;  	// 1
	min.ftz.f32 	%f5, %f1627, %f1628;
	mov.f32 	%f1629, 0f00000000;  	// 0
	max.ftz.f32 	%f1630, %f6, %f1629;
	mov.f32 	%f1631, 0f3f800000;  	// 1
	min.ftz.f32 	%f6, %f1630, %f1631;
	mov.f32 	%f1632, 0f00000000;  	// 0
	max.ftz.f32 	%f1633, %f7, %f1632;
	mov.f32 	%f1634, 0f3f800000;  	// 1
	min.ftz.f32 	%f7, %f1633, %f1634;
	mov.f32 	%f1635, 0f00000000;  	// 0
	max.ftz.f32 	%f1636, %f9, %f1635;
	mov.f32 	%f1637, 0f3f800000;  	// 1
	min.ftz.f32 	%f9, %f1636, %f1637;
	bra.uni 	$Lt_50_514;
$Lt_50_4098:
	.loc	38	99	0
	cvt.rn.f32.s32 	%f1638, %r6;
	cvt.rn.f32.s32 	%f1639, %r14;
	ld.param.f32 	%f1640, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+20];
	mul.ftz.f32 	%f1641, %f1639, %f1640;
	setp.lt.ftz.f32 	%p230, %f1638, %f1641;
	@!%p230 bra 	$Lt_50_514;
	.loc	22	267	0
	ld.const.f32 	%f1642, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1299, %f1642, %f6;
	ld.const.f32 	%f1643, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f1644, %f1643, %f6;
	ld.const.f32 	%f1645, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f1646, %f1645, %f6;
	ld.const.f32 	%f1647, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1301, %f1647, %f7, %f1299;
	ld.const.f32 	%f1648, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f1649, %f1648, %f7, %f1644;
	ld.const.f32 	%f1650, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f1651, %f1650, %f7, %f1646;
	ld.const.f32 	%f1652, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1303, %f1652, %f5, %f1301;
	ld.const.f32 	%f1653, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f1654, %f1653, %f5, %f1649;
	ld.const.f32 	%f1655, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f1656, %f1655, %f5, %f1651;
	ld.param.f32 	%f1657, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+4];
	ld.param.f32 	%f1658, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+8];
	fma.rn.ftz.f32 	%f1659, %f1303, %f1658, %f1657;
	ld.param.f32 	%f1660, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+16];
	mul.ftz.f32 	%f1661, %f1654, %f1660;
	ld.param.f32 	%f1662, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+12];
	mul.ftz.f32 	%f1663, %f1654, %f1662;
	mul.ftz.f32 	%f1664, %f1656, %f1662;
	sub.ftz.f32 	%f1665, %f1664, %f1661;
	fma.rn.ftz.f32 	%f1666, %f1656, %f1660, %f1663;
	ld.const.f32 	%f1667, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f1668, %f1667, %f1665;
	ld.const.f32 	%f1669, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f1670, %f1669, %f1659, %f1668;
	ld.const.f32 	%f1671, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f420, %f1671, %f1666, %f1670;
	.loc	22	268	0
	ld.const.f32 	%f1672, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f1673, %f1672, %f1665;
	ld.const.f32 	%f1674, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f1675, %f1674, %f1659, %f1673;
	ld.const.f32 	%f1676, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f426, %f1676, %f1666, %f1675;
	.loc	35	56	0
	ld.const.f32 	%f1677, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f1678, %f1677, %f1665;
	ld.const.f32 	%f1679, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f1680, %f1679, %f1659, %f1678;
	ld.const.f32 	%f1681, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f5, %f1681, %f1666, %f1680;
	mov.f32 	%f6, %f420;
	mov.f32 	%f7, %f426;
	bra.uni 	$Lt_50_514;
$Lt_50_4354:
	.loc	36	46	0
	ld.const.f32 	%f1682, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1299, %f1682, %f6;
	ld.const.f32 	%f1683, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1301, %f1683, %f7, %f1299;
	ld.const.f32 	%f1684, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1303, %f1684, %f5, %f1301;
	ld.param.f32 	%f1313, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+8];
	ld.param.f32 	%f1685, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+20];
	sub.ftz.f32 	%f1686, %f1685, %f1313;
	fma.rn.ftz.f32 	%f437, %f1303, %f1686, %f1313;
	.loc	36	47	0
	ld.param.f32 	%f1687, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+12];
	ld.param.f32 	%f1688, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+24];
	sub.ftz.f32 	%f1689, %f1688, %f1687;
	fma.rn.ftz.f32 	%f441, %f1303, %f1689, %f1687;
	.loc	36	49	0
	ld.param.f32 	%f1690, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+28];
	ld.param.f32 	%f1288, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+4];
	ld.param.f32 	%f1691, [__cudaparm_PointwiseFilterHostKernel4___val_paraminFilter3+16];
	sub.ftz.f32 	%f1692, %f1691, %f1288;
	fma.rn.ftz.f32 	%f1693, %f1303, %f1692, %f1288;
	sub.ftz.f32 	%f1694, %f1693, %f5;
	fma.rn.ftz.f32 	%f5, %f1690, %f1694, %f5;
	.loc	36	50	0
	sub.ftz.f32 	%f1695, %f437, %f6;
	fma.rn.ftz.f32 	%f6, %f1690, %f1695, %f6;
	.loc	36	51	0
	sub.ftz.f32 	%f1696, %f441, %f7;
	fma.rn.ftz.f32 	%f7, %f1690, %f1696, %f7;
	.loc	38	103	0
	bra.uni 	$Lt_50_514;
$Lt_50_4610:
	.loc	37	41	0
	sub.s32 	%r970, %r13, %r12;
	sub.s32 	%r12, %r970, 1;
$Lt_50_514:
	.loc	38	209	0
	ld.param.s32 	%r971, [__cudaparm_PointwiseFilterHostKernel4_inDestPitch];
	mul.lo.s32 	%r972, %r971, %r12;
	add.s32 	%r973, %r6, %r972;
	cvt.s64.s32 	%rd8, %r973;
	ld.param.u64 	%rd9, [__cudaparm_PointwiseFilterHostKernel4_inDestImage];
	@!%p2 bra 	$Lt_50_252930;
	.loc	20	126	0
	mul.lo.u64 	%rd10, %rd8, 8;
	add.u64 	%rd11, %rd9, %rd10;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f5;
	mov.b32		%r974, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r975, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f7;
	mov.b32		%r976, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f9;
	mov.b32		%r977, %b1; }
	st.global.v4.u16 	[%rd11+0], {%r974,%r975,%r976,%r977};
	.loc	38	211	0
	bra.uni 	$Lt_50_252674;
$Lt_50_252930:
	.loc	20	126	0
	mul.lo.u64 	%rd12, %rd8, 16;
	add.u64 	%rd13, %rd9, %rd12;
	st.global.v4.f32 	[%rd13+0], {%f5,%f6,%f7,%f9};
$Lt_50_252674:
$Lt_50_208642:
	.loc	38	213	0
	exit;
$LDWend_PointwiseFilterHostKernel4:
	} // PointwiseFilterHostKernel4
	.const .align 4 .b8 kRGB32f_To_601YCbCr[36] = {70,246,130,66,145,141,0,67,94,186,199,65,33,48,23,194,240,103,148,194,0,0,224,66,0,0,224,66,111,146,187,194,70,182,145,193};
	.const .align 4 .b8 k601YCbCr_To_RGB32f[36] = {37,160,149,59,0,0,0,0,182,23,205,59,37,160,149,59,40,15,201,186,156,239,80,187,37,160,149,59,236,155,1,60,0,0,0,0};
	.const .align 4 .b8 kRGB8u_To_601YCbCr[36] = {219,121,131,62,152,14,1,63,18,131,200,61,174,199,23,190,238,252,148,190,197,224,224,62,197,224,224,62,217,78,188,190,174,71,146,189};
	.const .align 4 .b8 k601YCbCr_To_RGB8u[36] = {127,10,149,63,0,0,0,0,160,74,204,63,127,10,149,63,254,148,200,190,184,30,80,191,127,10,149,63,78,26,1,64,0,0,0,0};
	.const .align 4 .b8 kRGB8u_To_601YCbCrFullRange[36] = {135,22,153,62,162,69,22,63,213,120,233,61,166,27,44,190,39,241,168,190,250,254,254,62,250,254,254,62,43,135,213,190,59,223,165,189};
	.const .align 4 .b8 k601YCbCrFullRange_To_RGB8u[36] = {0,0,128,63,0,0,0,0,72,193,178,63,0,0,128,63,143,130,175,190,225,26,54,191,0,0,128,63,20,238,225,63,0,0,0,0};
	.const .align 4 .b8 kRGB32f_To_601YCbCrFullRange[36] = {113,125,152,66,92,175,21,67,92,143,232,65,158,111,43,194,49,72,168,194,0,0,254,66,0,0,254,66,170,177,212,194,88,57,165,193};
	.const .align 4 .b8 k601YCbCrFullRange_To_RGB32f[36] = {129,128,128,59,0,0,0,0,188,116,179,59,129,128,128,59,194,50,176,186,179,209,54,187,129,128,128,59,229,208,226,59,0,0,0,0};
	.const .align 4 .b8 kRGB32f_To_709YPbPr[36] = {208,179,89,62,89,23,55,63,152,221,147,61,186,164,234,189,210,86,197,190,0,0,0,63,0,0,0,63,190,134,232,190,16,202,59,189};
	.const .align 4 .b8 k709YPbPr_To_RGB32f[36] = {0,0,128,63,0,0,0,0,12,147,201,63,0,0,128,63,221,209,63,190,243,173,239,190,0,0,128,63,77,132,237,63,0,0,0,0};
	.const .align 4 .b8 kRGB32f_To_709YCbCr[36] = {106,60,58,66,6,161,28,67,244,253,124,65,223,79,205,193,8,172,172,194,0,0,224,66,0,0,224,66,195,117,203,194,236,81,36,193};
	.const .align 4 .b8 k709YCbCr_To_RGB32f[36] = {37,160,149,59,0,0,0,0,239,94,230,59,37,160,149,59,33,57,91,186,178,245,8,187,37,160,149,59,82,185,7,60,0,0,0,0};
	.const .align 4 .b8 kRGB8u_To_709YCbCr[36] = {207,247,58,62,53,62,29,63,231,251,125,61,147,24,206,61,23,89,173,190,197,224,224,62,197,224,224,62,12,66,204,190,195,245,36,189};
	.const .align 4 .b8 k709YCbCr_To_RGB8u[36] = {127,10,149,63,0,0,0,0,147,120,229,63,127,10,149,63,53,94,90,190,205,108,8,191,127,10,149,63,154,49,7,64,0,0,0,0};
	.const .align 4 .b8 k709YCbCr_To_601YCbCr[36] = {0,0,128,63,23,100,203,61,1,77,68,62,0,0,0,0,18,103,125,63,10,158,226,189,0,0,0,0,61,98,148,189,249,191,123,63};
	.const .align 4 .b8 k601YCbCr_To_709YCbCr[36] = {0,0,128,63,122,165,236,189,179,237,84,190,0,0,0,0,204,98,130,63,216,188,234,61,0,0,0,0,74,179,153,61,234,61,131,63};
	.const .align 4 .b8 kYCbCrOffset[12] = {0,0,128,65,0,0,0,67,0,0,0,67};
	.const .align 4 .b8 kYCbCrFullRangeOffset[12] = {0,0,0,0,0,0,0,67,0,0,0,67};
	.const .align 4 .b8 kRGB32f_To_YIQ[36] = {135,22,153,62,162,69,22,63,213,120,233,61,216,128,24,63,27,133,140,190,149,124,164,190,236,135,88,62,134,200,5,191,22,77,159,62};
	.const .align 4 .b8 kYIQ_To_RGB32f[36] = {0,0,128,63,20,208,116,63,219,249,30,63,0,0,128,63,177,80,139,190,2,188,37,191,0,0,128,63,45,178,141,191,85,48,218,63};

