	.version 2.2
	.target sm_20
	// compiled with ../../../External/3rdParty/NVIDIA/CUDA/win/bin/../open64/lib//be.exe
	// nvopencc 3.2 built on 2010-11-04

	.visible .func (.param .s32 __cudaretf__Z15IntegerMultiplyii) _Z15IntegerMultiplyii (.param .s32 __cudaparmf1__Z15IntegerMultiplyii, .param .s32 __cudaparmf2__Z15IntegerMultiplyii)

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelXv) _Z17Standard2DKernelXv ()

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelYv) _Z17Standard2DKernelYv ()

	.visible .func (.param .align 16 .b8 __cudaretf__Z13Half4ToFloat47ushort4[16]) _Z13Half4ToFloat47ushort4 (.param .align 8 .b8 __cudaparmf1__Z13Half4ToFloat47ushort4[8])

	.visible .func (.param .align 8 .b8 __cudaretf__Z13Float4ToHalf46float4[8]) _Z13Float4ToHalf46float4 (.param .align 16 .b8 __cudaparmf1__Z13Float4ToHalf46float4[16])

	.visible .func (.param .u32 __cudaretf__Z4Mix3RjS_S_) _Z4Mix3RjS_S_ (.param .u64 __cudaparmf1__Z4Mix3RjS_S_, .param .u64 __cudaparmf2__Z4Mix3RjS_S_, .param .u64 __cudaparmf3__Z4Mix3RjS_S_)

	.visible .func (.param .s32 __cudaretf__Z4Randj) _Z4Randj (.param .u32 __cudaparmf1__Z4Randj)

	.visible .func (.param .s32 __cudaretf__Z6Rand2Djjj) _Z6Rand2Djjj (.param .u32 __cudaparmf1__Z6Rand2Djjj, .param .u32 __cudaparmf2__Z6Rand2Djjj, .param .u32 __cudaparmf3__Z6Rand2Djjj)

	.visible .func (.param .s32 __cudaretf__Z6Rand2Dj) _Z6Rand2Dj (.param .u32 __cudaparmf1__Z6Rand2Dj)

	.visible .func (.param .align 8 .b8 __cudaretf__Z6Read2DI7ushort4ET_PKS1_iii[8]) _Z6Read2DI7ushort4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI7ushort4ET_PKS1_iii)

	.visible .func (.param .align 16 .b8 __cudaretf__Z6Read2DI6float4ET_PKS1_iii[16]) _Z6Read2DI6float4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI6float4ET_PKS1_iii)

	.visible .func _Z7Write2DI7ushort4EvT_PS1_iii (.param .align 8 .b8 __cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii[8], .param .u64 __cudaparmf2__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI7ushort4EvT_PS1_iii)

	.visible .func _Z7Write2DI6float4EvT_PS1_iii (.param .align 16 .b8 __cudaparmf1__Z7Write2DI6float4EvT_PS1_iii[16], .param .u64 __cudaparmf2__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI6float4EvT_PS1_iii)

	.visible .func (.param .align 16 .b8 __cudaretf__Z18UnpremultiplyPixel8PixelRGB[16]) _Z18UnpremultiplyPixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z18UnpremultiplyPixel8PixelRGB[16])

	.visible .func (.param .f32 __cudaretf__Z13ToLinearColorf) _Z13ToLinearColorf (.param .f32 __cudaparmf1__Z13ToLinearColorf)

	.visible .func (.param .f32 __cudaretf__Z15FromLinearColorf) _Z15FromLinearColorf (.param .f32 __cudaparmf1__Z15FromLinearColorf)

	.visible .func (.param .align 16 .b8 __cudaretf__Z25PremultiplyLinearizePixel8PixelRGB[16]) _Z25PremultiplyLinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16]) _Z29UnpremultiplyUnlinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z20PremultiplyLinearize6float4[16]) _Z20PremultiplyLinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z20PremultiplyLinearize6float4[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z24UnpremultiplyUnlinearize6float4[16]) _Z24UnpremultiplyUnlinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z24UnpremultiplyUnlinearize6float4[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_[16]) _Z18SwapComponentOrderI6float4ET_RKS1_ (.param .u64 __cudaparmf1__Z18SwapComponentOrderI6float4ET_RKS1_)

	.visible .func (.param .f32 __cudaretf__Z17CalcShadowsWeightfff) _Z17CalcShadowsWeightfff (.param .f32 __cudaparmf1__Z17CalcShadowsWeightfff, .param .f32 __cudaparmf2__Z17CalcShadowsWeightfff, .param .f32 __cudaparmf3__Z17CalcShadowsWeightfff)

	.visible .func (.param .f32 __cudaretf__Z20CalcHighlightsWeightfff) _Z20CalcHighlightsWeightfff (.param .f32 __cudaparmf1__Z20CalcHighlightsWeightfff, .param .f32 __cudaparmf2__Z20CalcHighlightsWeightfff, .param .f32 __cudaparmf3__Z20CalcHighlightsWeightfff)

	.visible .func _Z17ConvertYPbPrToIREfffRfS_ (.param .f32 __cudaparmf1__Z17ConvertYPbPrToIREfffRfS_, .param .f32 __cudaparmf2__Z17ConvertYPbPrToIREfffRfS_, .param .f32 __cudaparmf3__Z17ConvertYPbPrToIREfffRfS_, .param .u64 __cudaparmf4__Z17ConvertYPbPrToIREfffRfS_, .param .u64 __cudaparmf5__Z17ConvertYPbPrToIREfffRfS_)

	.visible .func (.param .f32 __cudaretf__Z14ConvertIREYToYf) _Z14ConvertIREYToYf (.param .f32 __cudaparmf1__Z14ConvertIREYToYf)

	.visible .func (.param .s32 __cudaretf__Z10InHueRangefff) _Z10InHueRangefff (.param .f32 __cudaparmf1__Z10InHueRangefff, .param .f32 __cudaparmf2__Z10InHueRangefff, .param .f32 __cudaparmf3__Z10InHueRangefff)

	.visible .func _Z16ConvertPbPrToHueffRf (.param .f32 __cudaparmf1__Z16ConvertPbPrToHueffRf, .param .f32 __cudaparmf2__Z16ConvertPbPrToHueffRf, .param .u64 __cudaparmf3__Z16ConvertPbPrToHueffRf)

	.visible .func _Z26CalculateSmartLimitWeightsfffRfS_ (.param .f32 __cudaparmf1__Z26CalculateSmartLimitWeightsfffRfS_, .param .f32 __cudaparmf2__Z26CalculateSmartLimitWeightsfffRfS_, .param .f32 __cudaparmf3__Z26CalculateSmartLimitWeightsfffRfS_, .param .u64 __cudaparmf4__Z26CalculateSmartLimitWeightsfffRfS_, .param .u64 __cudaparmf5__Z26CalculateSmartLimitWeightsfffRfS_)

	.visible .func (.param .s32 __cudaretf__Z21SmartLimitRatioMethodfffffffffRfS_S_) _Z21SmartLimitRatioMethodfffffffffRfS_S_ (.param .f32 __cudaparmf1__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .f32 __cudaparmf2__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .f32 __cudaparmf3__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .f32 __cudaparmf4__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .f32 __cudaparmf5__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .f32 __cudaparmf6__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .f32 __cudaparmf7__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .f32 __cudaparmf8__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .f32 __cudaparmf9__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .u64 __cudaparmf10__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .u64 __cudaparmf11__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .u64 __cudaparmf12__Z21SmartLimitRatioMethodfffffffffRfS_S_)

	.visible .func (.param .f32 __cudaretf__Z5ClampIfET_S0_S0_S0_) _Z5ClampIfET_S0_S0_S0_ (.param .f32 __cudaparmf1__Z5ClampIfET_S0_S0_S0_, .param .f32 __cudaparmf2__Z5ClampIfET_S0_S0_S0_, .param .f32 __cudaparmf3__Z5ClampIfET_S0_S0_S0_)

	.visible .func (.param .f32 __cudaretf__Z4LERPIfET_S0_S0_S0_) _Z4LERPIfET_S0_S0_S0_ (.param .f32 __cudaparmf1__Z4LERPIfET_S0_S0_S0_, .param .f32 __cudaparmf2__Z4LERPIfET_S0_S0_S0_, .param .f32 __cudaparmf3__Z4LERPIfET_S0_S0_S0_)

	//-----------------------------------------------------------
	// Compiling C:/Users/dvaeng/AppData/Local/Temp/tmpxft_00003f24_00000000-11_VideoLimiter.cpp3.i (C:/Users/dvaeng/AppData/Local/Temp/ccBI#.a16288)
	//-----------------------------------------------------------

	//-----------------------------------------------------------
	// Options:
	//-----------------------------------------------------------
	//  Target:ptx, ISA:sm_20, Endian:little, Pointer Size:64
	//  -O3	(Optimization level)
	//  -g0	(Debug level)
	//  -m2	(Report advisories)
	//-----------------------------------------------------------

	.file	1	"C:/Users/dvaeng/AppData/Local/Temp/tmpxft_00003f24_00000000-10_VideoLimiter.cudafe2.gpu"
	.file	2	"c:\Mulder64\shared\adobe\MediaCore\Display\Inc\CUDA/Effects/VideoLimiter.h"
	.file	3	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/PixelFormat.h"
	.file	4	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/PixelRGB.h"
	.file	5	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/PixelYUV.h"
	.file	6	"C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\include\crtdefs.h"
	.file	7	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\crt/device_runtime.h"
	.file	8	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\host_defines.h"
	.file	9	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\builtin_types.h"
	.file	10	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\device_types.h"
	.file	11	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\driver_types.h"
	.file	12	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\surface_types.h"
	.file	13	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\texture_types.h"
	.file	14	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\vector_types.h"
	.file	15	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\builtin_types.h"
	.file	16	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\host_defines.h"
	.file	17	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\device_launch_parameters.h"
	.file	18	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\crt\storage_class.h"
	.file	19	"C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\include\time.h"
	.file	20	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/Utils.h"
	.file	21	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/VectorUtils.h"
	.file	22	"c:/Mulder64/shared/adobe/MediaCore/Display/Src/CUDA/Effects/VideoLimiter.cu"
	.file	23	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/Numeric.h"
	.file	24	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\common_functions.h"
	.file	25	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_functions.h"
	.file	26	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_constants.h"
	.file	27	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\device_functions.h"
	.file	28	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_11_atomic_functions.h"
	.file	29	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_12_atomic_functions.h"
	.file	30	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_13_double_functions.h"
	.file	31	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_20_atomic_functions.h"
	.file	32	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_20_intrinsics.h"
	.file	33	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\surface_functions.h"
	.file	34	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\texture_fetch_functions.h"
	.file	35	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_functions_dbl_ptx3.h"
	.file	36	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/ColorSpaceConvert.h"


	.visible .func (.param .s32 __cudaretf__Z15IntegerMultiplyii) _Z15IntegerMultiplyii (.param .s32 __cudaparmf1__Z15IntegerMultiplyii, .param .s32 __cudaparmf2__Z15IntegerMultiplyii)
	{
	.reg .u32 %r<7>;
	.loc	20	60	0
$LDWbegin__Z15IntegerMultiplyii:
	ld.param.u32 	%r1, [__cudaparmf1__Z15IntegerMultiplyii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf2__Z15IntegerMultiplyii];
	mov.s32 	%r4, %r3;
	.loc	20	64	0
	mul.lo.s32 	%r5, %r2, %r4;
	st.param.s32 	[__cudaretf__Z15IntegerMultiplyii], %r5;
	ret;
$LDWend__Z15IntegerMultiplyii:
	} // _Z15IntegerMultiplyii

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelXv) _Z17Standard2DKernelXv ()
	{
	.reg .u32 %r<7>;
	.loc	20	73	0
$LDWbegin__Z17Standard2DKernelXv:
	.loc	20	74	0
	mov.u32 	%r1, %tid.x;
	cvt.s32.u32 	%r2, %ctaid.x;
	cvt.s32.u32 	%r3, %ntid.x;
	mul.lo.s32 	%r4, %r2, %r3;
	add.u32 	%r5, %r1, %r4;
	st.param.s32 	[__cudaretf__Z17Standard2DKernelXv], %r5;
	ret;
$LDWend__Z17Standard2DKernelXv:
	} // _Z17Standard2DKernelXv

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelYv) _Z17Standard2DKernelYv ()
	{
	.reg .u32 %r<7>;
	.loc	20	77	0
$LDWbegin__Z17Standard2DKernelYv:
	.loc	20	78	0
	mov.u32 	%r1, %tid.y;
	cvt.s32.u32 	%r2, %ctaid.y;
	cvt.s32.u32 	%r3, %ntid.y;
	mul.lo.s32 	%r4, %r2, %r3;
	add.u32 	%r5, %r1, %r4;
	st.param.s32 	[__cudaretf__Z17Standard2DKernelYv], %r5;
	ret;
$LDWend__Z17Standard2DKernelYv:
	} // _Z17Standard2DKernelYv

	.visible .func (.param .align 16 .b8 __cudaretf__Z13Half4ToFloat47ushort4[16]) _Z13Half4ToFloat47ushort4 (.param .align 8 .b8 __cudaparmf1__Z13Half4ToFloat47ushort4[8])
	{
	.reg .u32 %r<14>;
	.reg .f32 %f<9>;
	.loc	20	86	0
$LDWbegin__Z13Half4ToFloat47ushort4:
	ld.param.u16 	%r1, [__cudaparmf1__Z13Half4ToFloat47ushort4+0];
	mov.s32 	%r2, %r1;
	ld.param.u16 	%r3, [__cudaparmf1__Z13Half4ToFloat47ushort4+2];
	mov.s32 	%r4, %r3;
	ld.param.u16 	%r5, [__cudaparmf1__Z13Half4ToFloat47ushort4+4];
	mov.s32 	%r6, %r5;
	ld.param.u16 	%r7, [__cudaparmf1__Z13Half4ToFloat47ushort4+6];
	mov.s32 	%r8, %r7;
	.loc	20	87	0
	cvt.u16.u32 	%r9, %r4;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r9;
	cvt.ftz.f32.f16	%f1, %b1; }
	cvt.u16.u32 	%r10, %r6;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r10;
	cvt.ftz.f32.f16	%f2, %b1; }
	cvt.u16.u32 	%r11, %r8;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r11;
	cvt.ftz.f32.f16	%f3, %b1; }
	cvt.u16.u32 	%r12, %r2;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r12;
	cvt.ftz.f32.f16	%f4, %b1; }
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+0], %f4;
	mov.f32 	%f5, %f1;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+4], %f5;
	mov.f32 	%f6, %f2;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+8], %f6;
	mov.f32 	%f7, %f3;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+12], %f7;
	ret;
$LDWend__Z13Half4ToFloat47ushort4:
	} // _Z13Half4ToFloat47ushort4

	.visible .func (.param .align 8 .b8 __cudaretf__Z13Float4ToHalf46float4[8]) _Z13Float4ToHalf46float4 (.param .align 16 .b8 __cudaparmf1__Z13Float4ToHalf46float4[16])
	{
	.reg .u32 %r<13>;
	.reg .f32 %f<10>;
	.loc	20	95	0
$LDWbegin__Z13Float4ToHalf46float4:
	ld.param.f32 	%f1, [__cudaparmf1__Z13Float4ToHalf46float4+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z13Float4ToHalf46float4+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z13Float4ToHalf46float4+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z13Float4ToHalf46float4+12];
	mov.f32 	%f8, %f7;
	.loc	20	96	0
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r1, %b1; }
	cvt.u16.u32 	%r2, %r1;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r3, %b1; }
	cvt.u16.u32 	%r4, %r3;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f8;
	mov.b32		%r5, %b1; }
	cvt.u16.u32 	%r6, %r5;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f2;
	mov.b32		%r7, %b1; }
	cvt.u16.u32 	%r8, %r7;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+0], %r8;
	mov.s32 	%r9, %r2;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+2], %r9;
	mov.s32 	%r10, %r4;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+4], %r10;
	mov.s32 	%r11, %r6;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+6], %r11;
	ret;
$LDWend__Z13Float4ToHalf46float4:
	} // _Z13Float4ToHalf46float4

	.visible .func (.param .u32 __cudaretf__Z4Mix3RjS_S_) _Z4Mix3RjS_S_ (.param .u64 __cudaparmf1__Z4Mix3RjS_S_, .param .u64 __cudaparmf2__Z4Mix3RjS_S_, .param .u64 __cudaparmf3__Z4Mix3RjS_S_)
	{
	.reg .u32 %r<75>;
	.reg .u64 %rd<8>;
	.loc	20	138	0
$LDWbegin__Z4Mix3RjS_S_:
	ld.param.u64 	%rd1, [__cudaparmf1__Z4Mix3RjS_S_];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf2__Z4Mix3RjS_S_];
	mov.s64 	%rd4, %rd3;
	ld.param.u64 	%rd5, [__cudaparmf3__Z4Mix3RjS_S_];
	mov.s64 	%rd6, %rd5;
	.loc	20	139	0
	ld.u32 	%r1, [%rd2+0];
	ld.u32 	%r2, [%rd4+0];
	sub.u32 	%r3, %r1, %r2;
	st.u32 	[%rd2+0], %r3;
	ld.u32 	%r4, [%rd6+0];
	sub.u32 	%r5, %r3, %r4;
	st.u32 	[%rd2+0], %r5;
	ld.u32 	%r6, [%rd6+0];
	shr.u32 	%r7, %r6, 13;
	xor.b32 	%r8, %r5, %r7;
	st.u32 	[%rd2+0], %r8;
	.loc	20	140	0
	ld.u32 	%r9, [%rd4+0];
	ld.u32 	%r10, [%rd6+0];
	sub.u32 	%r11, %r9, %r10;
	st.u32 	[%rd4+0], %r11;
	ld.u32 	%r12, [%rd2+0];
	sub.u32 	%r13, %r11, %r12;
	st.u32 	[%rd4+0], %r13;
	ld.u32 	%r14, [%rd2+0];
	shl.b32 	%r15, %r14, 8;
	xor.b32 	%r16, %r13, %r15;
	st.u32 	[%rd4+0], %r16;
	.loc	20	141	0
	ld.u32 	%r17, [%rd6+0];
	ld.u32 	%r18, [%rd2+0];
	sub.u32 	%r19, %r17, %r18;
	st.u32 	[%rd6+0], %r19;
	ld.u32 	%r20, [%rd4+0];
	sub.u32 	%r21, %r19, %r20;
	st.u32 	[%rd6+0], %r21;
	ld.u32 	%r22, [%rd4+0];
	shr.u32 	%r23, %r22, 13;
	xor.b32 	%r24, %r21, %r23;
	st.u32 	[%rd6+0], %r24;
	.loc	20	142	0
	ld.u32 	%r25, [%rd2+0];
	ld.u32 	%r26, [%rd4+0];
	sub.u32 	%r27, %r25, %r26;
	st.u32 	[%rd2+0], %r27;
	ld.u32 	%r28, [%rd6+0];
	sub.u32 	%r29, %r27, %r28;
	st.u32 	[%rd2+0], %r29;
	ld.u32 	%r30, [%rd6+0];
	shr.u32 	%r31, %r30, 12;
	xor.b32 	%r32, %r29, %r31;
	st.u32 	[%rd2+0], %r32;
	.loc	20	143	0
	ld.u32 	%r33, [%rd4+0];
	ld.u32 	%r34, [%rd6+0];
	sub.u32 	%r35, %r33, %r34;
	st.u32 	[%rd4+0], %r35;
	ld.u32 	%r36, [%rd2+0];
	sub.u32 	%r37, %r35, %r36;
	st.u32 	[%rd4+0], %r37;
	ld.u32 	%r38, [%rd2+0];
	shl.b32 	%r39, %r38, 16;
	xor.b32 	%r40, %r37, %r39;
	st.u32 	[%rd4+0], %r40;
	.loc	20	144	0
	ld.u32 	%r41, [%rd6+0];
	ld.u32 	%r42, [%rd2+0];
	sub.u32 	%r43, %r41, %r42;
	st.u32 	[%rd6+0], %r43;
	ld.u32 	%r44, [%rd4+0];
	sub.u32 	%r45, %r43, %r44;
	st.u32 	[%rd6+0], %r45;
	ld.u32 	%r46, [%rd4+0];
	shr.u32 	%r47, %r46, 5;
	xor.b32 	%r48, %r45, %r47;
	st.u32 	[%rd6+0], %r48;
	.loc	20	145	0
	ld.u32 	%r49, [%rd2+0];
	ld.u32 	%r50, [%rd4+0];
	sub.u32 	%r51, %r49, %r50;
	st.u32 	[%rd2+0], %r51;
	ld.u32 	%r52, [%rd6+0];
	sub.u32 	%r53, %r51, %r52;
	st.u32 	[%rd2+0], %r53;
	ld.u32 	%r54, [%rd6+0];
	shr.u32 	%r55, %r54, 3;
	xor.b32 	%r56, %r53, %r55;
	st.u32 	[%rd2+0], %r56;
	.loc	20	146	0
	ld.u32 	%r57, [%rd4+0];
	ld.u32 	%r58, [%rd6+0];
	sub.u32 	%r59, %r57, %r58;
	st.u32 	[%rd4+0], %r59;
	ld.u32 	%r60, [%rd2+0];
	sub.u32 	%r61, %r59, %r60;
	st.u32 	[%rd4+0], %r61;
	ld.u32 	%r62, [%rd2+0];
	shl.b32 	%r63, %r62, 10;
	xor.b32 	%r64, %r61, %r63;
	st.u32 	[%rd4+0], %r64;
	.loc	20	147	0
	ld.u32 	%r65, [%rd6+0];
	ld.u32 	%r66, [%rd2+0];
	sub.u32 	%r67, %r65, %r66;
	st.u32 	[%rd6+0], %r67;
	ld.u32 	%r68, [%rd4+0];
	sub.u32 	%r69, %r67, %r68;
	st.u32 	[%rd6+0], %r69;
	ld.u32 	%r70, [%rd4+0];
	shr.u32 	%r71, %r70, 15;
	xor.b32 	%r72, %r69, %r71;
	st.u32 	[%rd6+0], %r72;
	.loc	20	148	0
	mov.s32 	%r73, %r72;
	st.param.u32 	[__cudaretf__Z4Mix3RjS_S_], %r73;
	ret;
$LDWend__Z4Mix3RjS_S_:
	} // _Z4Mix3RjS_S_

	.visible .func (.param .s32 __cudaretf__Z4Randj) _Z4Randj (.param .u32 __cudaparmf1__Z4Randj)
	{
	.reg .u32 %r<14>;
	.loc	20	152	0
$LDWbegin__Z4Randj:
	ld.param.u32 	%r1, [__cudaparmf1__Z4Randj];
	mov.s32 	%r2, %r1;
	.loc	20	163	0
	mul.lo.u32 	%r3, %r2, 1103515245;
	add.u32 	%r4, %r3, 12345;
	shr.u32 	%r5, %r4, 16;
	and.b32 	%r6, %r5, 255;
	shl.b32 	%r7, %r6, 7;
	mul.lo.u32 	%r8, %r2, -1029531031;
	sub.u32 	%r9, %r8, 740551042;
	shr.u32 	%r10, %r9, 16;
	and.b32 	%r11, %r10, 255;
	xor.b32 	%r12, %r7, %r11;
	st.param.s32 	[__cudaretf__Z4Randj], %r12;
	ret;
$LDWend__Z4Randj:
	} // _Z4Randj

	.visible .func (.param .s32 __cudaretf__Z6Rand2Djjj) _Z6Rand2Djjj (.param .u32 __cudaparmf1__Z6Rand2Djjj, .param .u32 __cudaparmf2__Z6Rand2Djjj, .param .u32 __cudaparmf3__Z6Rand2Djjj)
	{
	.reg .u32 %r<54>;
	.loc	20	169	0
$LDWbegin__Z6Rand2Djjj:
	ld.param.u32 	%r1, [__cudaparmf1__Z6Rand2Djjj];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf2__Z6Rand2Djjj];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf3__Z6Rand2Djjj];
	mov.s32 	%r6, %r5;
	.loc	20	139	0
	sub.u32 	%r7, %r2, %r4;
	sub.u32 	%r8, %r7, %r6;
	shr.u32 	%r9, %r6, 13;
	xor.b32 	%r10, %r8, %r9;
	.loc	20	140	0
	sub.u32 	%r11, %r4, %r6;
	sub.u32 	%r12, %r11, %r10;
	shl.b32 	%r13, %r10, 8;
	xor.b32 	%r14, %r12, %r13;
	.loc	20	141	0
	sub.u32 	%r15, %r6, %r10;
	sub.u32 	%r16, %r15, %r14;
	shr.u32 	%r17, %r14, 13;
	xor.b32 	%r18, %r16, %r17;
	.loc	20	142	0
	sub.u32 	%r19, %r10, %r14;
	sub.u32 	%r20, %r19, %r18;
	shr.u32 	%r21, %r18, 12;
	xor.b32 	%r22, %r20, %r21;
	.loc	20	143	0
	sub.u32 	%r23, %r14, %r18;
	sub.u32 	%r24, %r23, %r22;
	shl.b32 	%r25, %r22, 16;
	xor.b32 	%r26, %r24, %r25;
	.loc	20	144	0
	sub.u32 	%r27, %r18, %r22;
	sub.u32 	%r28, %r27, %r26;
	shr.u32 	%r29, %r26, 5;
	xor.b32 	%r30, %r28, %r29;
	.loc	20	145	0
	sub.u32 	%r31, %r22, %r26;
	sub.u32 	%r32, %r31, %r30;
	shr.u32 	%r33, %r30, 3;
	xor.b32 	%r34, %r32, %r33;
	.loc	20	146	0
	sub.u32 	%r35, %r26, %r30;
	sub.u32 	%r36, %r35, %r34;
	shl.b32 	%r37, %r34, 10;
	xor.b32 	%r38, %r36, %r37;
	.loc	20	147	0
	sub.u32 	%r39, %r30, %r34;
	sub.u32 	%r40, %r39, %r38;
	shr.u32 	%r41, %r38, 15;
	xor.b32 	%r42, %r40, %r41;
	.loc	20	170	0
	mul.lo.u32 	%r43, %r42, 1103515245;
	add.u32 	%r44, %r43, 12345;
	shr.u32 	%r45, %r44, 16;
	and.b32 	%r46, %r45, 255;
	shl.b32 	%r47, %r46, 7;
	mul.lo.u32 	%r48, %r42, -1029531031;
	sub.u32 	%r49, %r48, 740551042;
	shr.u32 	%r50, %r49, 16;
	and.b32 	%r51, %r50, 255;
	xor.b32 	%r52, %r47, %r51;
	st.param.s32 	[__cudaretf__Z6Rand2Djjj], %r52;
	ret;
$LDWend__Z6Rand2Djjj:
	} // _Z6Rand2Djjj

	.visible .func (.param .s32 __cudaretf__Z6Rand2Dj) _Z6Rand2Dj (.param .u32 __cudaparmf1__Z6Rand2Dj)
	{
	.reg .u32 %r<60>;
	.loc	20	175	0
$LDWbegin__Z6Rand2Dj:
	ld.param.u32 	%r1, [__cudaparmf1__Z6Rand2Dj];
	mov.s32 	%r2, %r1;
	.loc	20	143	0
	cvt.s32.u32 	%r3, %ctaid.y;
	cvt.s32.u32 	%r4, %ntid.y;
	mul.lo.s32 	%r5, %r3, %r4;
	cvt.s32.u32 	%r6, %ctaid.x;
	cvt.s32.u32 	%r7, %ntid.x;
	mul.lo.s32 	%r8, %r6, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r5, %r9;
	mov.u32 	%r11, %tid.x;
	add.u32 	%r12, %r8, %r11;
	shr.u32 	%r13, %r10, 13;
	sub.u32 	%r14, %r2, %r12;
	sub.u32 	%r15, %r12, %r10;
	sub.u32 	%r16, %r14, %r10;
	xor.b32 	%r17, %r13, %r16;
	shl.b32 	%r18, %r17, 8;
	sub.u32 	%r19, %r15, %r17;
	sub.u32 	%r20, %r10, %r17;
	xor.b32 	%r21, %r18, %r19;
	shr.u32 	%r22, %r21, 13;
	sub.u32 	%r23, %r20, %r21;
	sub.u32 	%r24, %r17, %r21;
	xor.b32 	%r25, %r22, %r23;
	shr.u32 	%r26, %r25, 12;
	sub.u32 	%r27, %r24, %r25;
	xor.b32 	%r28, %r26, %r27;
	sub.u32 	%r29, %r21, %r25;
	sub.u32 	%r30, %r29, %r28;
	shl.b32 	%r31, %r28, 16;
	xor.b32 	%r32, %r30, %r31;
	.loc	20	144	0
	sub.u32 	%r33, %r25, %r28;
	sub.u32 	%r34, %r33, %r32;
	shr.u32 	%r35, %r32, 5;
	xor.b32 	%r36, %r34, %r35;
	.loc	20	145	0
	sub.u32 	%r37, %r28, %r32;
	sub.u32 	%r38, %r37, %r36;
	shr.u32 	%r39, %r36, 3;
	xor.b32 	%r40, %r38, %r39;
	.loc	20	146	0
	sub.u32 	%r41, %r32, %r36;
	sub.u32 	%r42, %r41, %r40;
	shl.b32 	%r43, %r40, 10;
	xor.b32 	%r44, %r42, %r43;
	.loc	20	147	0
	sub.u32 	%r45, %r36, %r40;
	sub.u32 	%r46, %r45, %r44;
	shr.u32 	%r47, %r44, 15;
	xor.b32 	%r48, %r46, %r47;
	.loc	20	176	0
	mul.lo.u32 	%r49, %r48, 1103515245;
	add.u32 	%r50, %r49, 12345;
	shr.u32 	%r51, %r50, 16;
	and.b32 	%r52, %r51, 255;
	shl.b32 	%r53, %r52, 7;
	mul.lo.u32 	%r54, %r48, -1029531031;
	sub.u32 	%r55, %r54, 740551042;
	shr.u32 	%r56, %r55, 16;
	and.b32 	%r57, %r56, 255;
	xor.b32 	%r58, %r53, %r57;
	st.param.s32 	[__cudaretf__Z6Rand2Dj], %r58;
	ret;
$LDWend__Z6Rand2Dj:
	} // _Z6Rand2Dj

	.visible .func (.param .align 8 .b8 __cudaretf__Z6Read2DI7ushort4ET_PKS1_iii[8]) _Z6Read2DI7ushort4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI7ushort4ET_PKS1_iii)
	{
	.reg .u32 %r<14>;
	.reg .u64 %rd<7>;
	.loc	20	114	0
$LDWbegin__Z6Read2DI7ushort4ET_PKS1_iii:
	ld.param.u64 	%rd1, [__cudaparmf1__Z6Read2DI7ushort4ET_PKS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf2__Z6Read2DI7ushort4ET_PKS1_iii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf3__Z6Read2DI7ushort4ET_PKS1_iii];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf4__Z6Read2DI7ushort4ET_PKS1_iii];
	mov.s32 	%r6, %r5;
	.loc	20	115	0
	mul.lo.s32 	%r7, %r2, %r6;
	add.s32 	%r8, %r4, %r7;
	cvt.s64.s32 	%rd3, %r8;
	mul.wide.s32 	%rd4, %r8, 8;
	add.u64 	%rd5, %rd2, %rd4;
	ld.v4.u16 	{%r9,%r10,%r11,%r12}, [%rd5+0];
	st.param.u16 	[__cudaretf__Z6Read2DI7ushort4ET_PKS1_iii+0], %r9;
	st.param.u16 	[__cudaretf__Z6Read2DI7ushort4ET_PKS1_iii+2], %r10;
	st.param.u16 	[__cudaretf__Z6Read2DI7ushort4ET_PKS1_iii+4], %r11;
	st.param.u16 	[__cudaretf__Z6Read2DI7ushort4ET_PKS1_iii+6], %r12;
	ret;
$LDWend__Z6Read2DI7ushort4ET_PKS1_iii:
	} // _Z6Read2DI7ushort4ET_PKS1_iii

	.visible .func (.param .align 16 .b8 __cudaretf__Z6Read2DI6float4ET_PKS1_iii[16]) _Z6Read2DI6float4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI6float4ET_PKS1_iii)
	{
	.reg .u32 %r<10>;
	.reg .u64 %rd<7>;
	.reg .f32 %f<6>;
	.loc	20	114	0
$LDWbegin__Z6Read2DI6float4ET_PKS1_iii:
	ld.param.u64 	%rd1, [__cudaparmf1__Z6Read2DI6float4ET_PKS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf2__Z6Read2DI6float4ET_PKS1_iii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf3__Z6Read2DI6float4ET_PKS1_iii];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf4__Z6Read2DI6float4ET_PKS1_iii];
	mov.s32 	%r6, %r5;
	.loc	20	115	0
	mul.lo.s32 	%r7, %r2, %r6;
	add.s32 	%r8, %r4, %r7;
	cvt.s64.s32 	%rd3, %r8;
	mul.wide.s32 	%rd4, %r8, 16;
	add.u64 	%rd5, %rd2, %rd4;
	ld.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd5+0];
	st.param.f32 	[__cudaretf__Z6Read2DI6float4ET_PKS1_iii+0], %f1;
	st.param.f32 	[__cudaretf__Z6Read2DI6float4ET_PKS1_iii+4], %f2;
	st.param.f32 	[__cudaretf__Z6Read2DI6float4ET_PKS1_iii+8], %f3;
	st.param.f32 	[__cudaretf__Z6Read2DI6float4ET_PKS1_iii+12], %f4;
	ret;
$LDWend__Z6Read2DI6float4ET_PKS1_iii:
	} // _Z6Read2DI6float4ET_PKS1_iii

	.visible .func _Z7Write2DI7ushort4EvT_PS1_iii (.param .align 8 .b8 __cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii[8], .param .u64 __cudaparmf2__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI7ushort4EvT_PS1_iii)
	{
	.reg .u32 %r<18>;
	.reg .u64 %rd<7>;
	.loc	20	125	0
$LDWbegin__Z7Write2DI7ushort4EvT_PS1_iii:
	ld.param.u16 	%r1, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+0];
	mov.s32 	%r2, %r1;
	ld.param.u16 	%r3, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+2];
	mov.s32 	%r4, %r3;
	ld.param.u16 	%r5, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+4];
	mov.s32 	%r6, %r5;
	ld.param.u16 	%r7, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+6];
	mov.s32 	%r8, %r7;
	ld.param.u64 	%rd1, [__cudaparmf2__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r9, [__cudaparmf3__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s32 	%r10, %r9;
	ld.param.u32 	%r11, [__cudaparmf4__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s32 	%r12, %r11;
	ld.param.u32 	%r13, [__cudaparmf5__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s32 	%r14, %r13;
	.loc	20	126	0
	mul.lo.s32 	%r15, %r10, %r14;
	add.s32 	%r16, %r12, %r15;
	cvt.s64.s32 	%rd3, %r16;
	mul.wide.s32 	%rd4, %r16, 8;
	add.u64 	%rd5, %rd2, %rd4;
	st.v4.u16 	[%rd5+0], {%r2,%r4,%r6,%r8};
	.loc	20	127	0
	ret;
$LDWend__Z7Write2DI7ushort4EvT_PS1_iii:
	} // _Z7Write2DI7ushort4EvT_PS1_iii

	.visible .func _Z7Write2DI6float4EvT_PS1_iii (.param .align 16 .b8 __cudaparmf1__Z7Write2DI6float4EvT_PS1_iii[16], .param .u64 __cudaparmf2__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI6float4EvT_PS1_iii)
	{
	.reg .u32 %r<10>;
	.reg .u64 %rd<7>;
	.reg .f32 %f<10>;
	.loc	20	125	0
$LDWbegin__Z7Write2DI6float4EvT_PS1_iii:
	ld.param.f32 	%f1, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+12];
	mov.f32 	%f8, %f7;
	ld.param.u64 	%rd1, [__cudaparmf2__Z7Write2DI6float4EvT_PS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf3__Z7Write2DI6float4EvT_PS1_iii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf4__Z7Write2DI6float4EvT_PS1_iii];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf5__Z7Write2DI6float4EvT_PS1_iii];
	mov.s32 	%r6, %r5;
	.loc	20	126	0
	mul.lo.s32 	%r7, %r2, %r6;
	add.s32 	%r8, %r4, %r7;
	cvt.s64.s32 	%rd3, %r8;
	mul.wide.s32 	%rd4, %r8, 16;
	add.u64 	%rd5, %rd2, %rd4;
	st.v4.f32 	[%rd5+0], {%f2,%f4,%f6,%f8};
	.loc	20	127	0
	ret;
$LDWend__Z7Write2DI6float4EvT_PS1_iii:
	} // _Z7Write2DI6float4EvT_PS1_iii

	.visible .func (.param .align 16 .b8 __cudaretf__Z18UnpremultiplyPixel8PixelRGB[16]) _Z18UnpremultiplyPixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z18UnpremultiplyPixel8PixelRGB[16])
	{
	.reg .f32 %f<23>;
	.reg .pred %p<3>;
	.loc	4	206	0
$LDWbegin__Z18UnpremultiplyPixel8PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+12];
	mov.f32 	%f8, %f7;
	.loc	4	208	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f9, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f12, %f13;
	@%p1 bra 	$Lt_13_1282;
	.loc	4	213	0
	rcp.approx.ftz.f32 	%f14, %f9;
	mul.ftz.f32 	%f15, %f14, %f6;
	.loc	4	214	0
	mul.ftz.f32 	%f16, %f14, %f4;
	.loc	4	215	0
	mul.ftz.f32 	%f17, %f14, %f2;
	bra.uni 	$Lt_13_1026;
$Lt_13_1282:
	.loc	4	219	0
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	mov.f32 	%f17, 0f00000000;    	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
$Lt_13_1026:
	.loc	4	224	0
	mov.f32 	%f18, %f17;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+0], %f18;
	mov.f32 	%f19, %f16;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+4], %f19;
	mov.f32 	%f20, %f15;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+8], %f20;
	mov.f32 	%f21, %f10;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+12], %f21;
	ret;
$LDWend__Z18UnpremultiplyPixel8PixelRGB:
	} // _Z18UnpremultiplyPixel8PixelRGB

	.visible .func (.param .f32 __cudaretf__Z13ToLinearColorf) _Z13ToLinearColorf (.param .f32 __cudaparmf1__Z13ToLinearColorf)
	{
	.reg .f32 %f<15>;
	.reg .pred %p<3>;
	.loc	4	231	0
$LDWbegin__Z13ToLinearColorf:
	ld.param.f32 	%f1, [__cudaparmf1__Z13ToLinearColorf];
	mov.f32 	%f2, %f1;
	mov.f32 	%f3, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f3;
	@!%p1 bra 	$Lt_14_1026;
	.loc	4	234	0
	neg.ftz.f32 	%f4, %f2;
	lg2.approx.ftz.f32 	%f5, %f4;
	mov.f32 	%f6, 0f400ccccd;     	// 2.2
	mul.ftz.f32 	%f7, %f5, %f6;
	ex2.approx.ftz.f32 	%f8, %f7;
	neg.ftz.f32 	%f9, %f8;
	bra.uni 	$LBB4__Z13ToLinearColorf;
$Lt_14_1026:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f10, %f2;
	mov.f32 	%f11, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f12, %f10, %f11;
	ex2.approx.ftz.f32 	%f9, %f12;
$LBB4__Z13ToLinearColorf:
	mov.f32 	%f13, %f9;
	st.param.f32 	[__cudaretf__Z13ToLinearColorf], %f13;
	ret;
$LDWend__Z13ToLinearColorf:
	} // _Z13ToLinearColorf

	.visible .func (.param .f32 __cudaretf__Z15FromLinearColorf) _Z15FromLinearColorf (.param .f32 __cudaparmf1__Z15FromLinearColorf)
	{
	.reg .f32 %f<15>;
	.reg .pred %p<3>;
	.loc	4	239	0
$LDWbegin__Z15FromLinearColorf:
	ld.param.f32 	%f1, [__cudaparmf1__Z15FromLinearColorf];
	mov.f32 	%f2, %f1;
	mov.f32 	%f3, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f3;
	@!%p1 bra 	$Lt_15_1026;
	.loc	4	242	0
	neg.ftz.f32 	%f4, %f2;
	lg2.approx.ftz.f32 	%f5, %f4;
	mov.f32 	%f6, 0f3ee8ba2e;     	// 0.454545
	mul.ftz.f32 	%f7, %f5, %f6;
	ex2.approx.ftz.f32 	%f8, %f7;
	neg.ftz.f32 	%f9, %f8;
	bra.uni 	$LBB4__Z15FromLinearColorf;
$Lt_15_1026:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f10, %f2;
	mov.f32 	%f11, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f12, %f10, %f11;
	ex2.approx.ftz.f32 	%f9, %f12;
$LBB4__Z15FromLinearColorf:
	mov.f32 	%f13, %f9;
	st.param.f32 	[__cudaretf__Z15FromLinearColorf], %f13;
	ret;
$LDWend__Z15FromLinearColorf:
	} // _Z15FromLinearColorf

	.visible .func (.param .align 16 .b8 __cudaretf__Z25PremultiplyLinearizePixel8PixelRGB[16]) _Z25PremultiplyLinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB[16])
	{
	.reg .f32 %f<47>;
	.reg .pred %p<5>;
	.loc	4	252	0
$LDWbegin__Z25PremultiplyLinearizePixel8PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+12];
	mov.f32 	%f8, %f7;
	.loc	4	254	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	.loc	4	255	0
	mov.f32 	%f10, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f10;
	@!%p1 bra 	$Lt_16_4098;
	.loc	4	234	0
	neg.ftz.f32 	%f11, %f2;
	lg2.approx.ftz.f32 	%f12, %f11;
	mov.f32 	%f13, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f15, %f14;
	neg.ftz.f32 	%f16, %f15;
	bra.uni 	$LDWendi___log2f_193_5;
$Lt_16_4098:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f17, %f2;
	mov.f32 	%f18, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f16, %f19;
$LDWendi___log2f_193_5:
	.loc	4	256	0
	mov.f32 	%f20, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f4, %f20;
	@!%p2 bra 	$Lt_16_4610;
	.loc	4	234	0
	neg.ftz.f32 	%f21, %f4;
	lg2.approx.ftz.f32 	%f22, %f21;
	mov.f32 	%f23, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f25, %f24;
	neg.ftz.f32 	%f26, %f25;
	bra.uni 	$LDWendi___log2f_193_3;
$Lt_16_4610:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f27, %f4;
	mov.f32 	%f28, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f26, %f29;
$LDWendi___log2f_193_3:
	.loc	4	257	0
	mov.f32 	%f30, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f6, %f30;
	@!%p3 bra 	$Lt_16_5122;
	.loc	4	234	0
	neg.ftz.f32 	%f31, %f6;
	lg2.approx.ftz.f32 	%f32, %f31;
	mov.f32 	%f33, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f35, %f34;
	neg.ftz.f32 	%f36, %f35;
	bra.uni 	$LDWendi___log2f_193_1;
$Lt_16_5122:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f37, %f6;
	mov.f32 	%f38, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f39, %f37, %f38;
	ex2.approx.ftz.f32 	%f36, %f39;
$LDWendi___log2f_193_1:
	.loc	4	259	0
	mul.ftz.f32 	%f40, %f36, %f9;
	mul.ftz.f32 	%f41, %f26, %f9;
	mul.ftz.f32 	%f42, %f16, %f9;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+0], %f42;
	mov.f32 	%f43, %f41;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+4], %f43;
	mov.f32 	%f44, %f40;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+8], %f44;
	mov.f32 	%f45, %f9;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+12], %f45;
	ret;
$LDWend__Z25PremultiplyLinearizePixel8PixelRGB:
	} // _Z25PremultiplyLinearizePixel8PixelRGB

	.visible .func (.param .align 16 .b8 __cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16]) _Z29UnpremultiplyUnlinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16])
	{
	.reg .f32 %f<53>;
	.reg .pred %p<6>;
	.loc	4	263	0
$LDWbegin__Z29UnpremultiplyUnlinearizePixel8PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+12];
	mov.f32 	%f8, %f7;
	.loc	4	208	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f9, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f12, %f13;
	@%p1 bra 	$Lt_17_5122;
	.loc	4	213	0
	rcp.approx.ftz.f32 	%f14, %f9;
	mul.ftz.f32 	%f15, %f14, %f6;
	.loc	4	214	0
	mul.ftz.f32 	%f16, %f14, %f4;
	.loc	4	215	0
	mul.ftz.f32 	%f17, %f14, %f2;
	bra.uni 	$Lt_17_4866;
$Lt_17_5122:
	.loc	4	219	0
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	mov.f32 	%f17, 0f00000000;    	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
$Lt_17_4866:
	.loc	4	266	0
	mov.f32 	%f18, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f17, %f18;
	@!%p2 bra 	$Lt_17_5378;
	.loc	4	242	0
	neg.ftz.f32 	%f19, %f17;
	lg2.approx.ftz.f32 	%f20, %f19;
	mov.f32 	%f21, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f22, %f20, %f21;
	ex2.approx.ftz.f32 	%f23, %f22;
	neg.ftz.f32 	%f24, %f23;
	bra.uni 	$LDWendi___log2f_194_5;
$Lt_17_5378:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f25, %f17;
	mov.f32 	%f26, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f27, %f25, %f26;
	ex2.approx.ftz.f32 	%f24, %f27;
$LDWendi___log2f_194_5:
	.loc	4	267	0
	mov.f32 	%f28, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f16, %f28;
	@!%p3 bra 	$Lt_17_5890;
	.loc	4	242	0
	neg.ftz.f32 	%f29, %f16;
	lg2.approx.ftz.f32 	%f30, %f29;
	mov.f32 	%f31, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f32, %f30, %f31;
	ex2.approx.ftz.f32 	%f33, %f32;
	neg.ftz.f32 	%f34, %f33;
	bra.uni 	$LDWendi___log2f_194_3;
$Lt_17_5890:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f35, %f16;
	mov.f32 	%f36, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f37, %f35, %f36;
	ex2.approx.ftz.f32 	%f34, %f37;
$LDWendi___log2f_194_3:
	.loc	4	268	0
	mov.f32 	%f38, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f15, %f38;
	@!%p4 bra 	$Lt_17_6402;
	.loc	4	242	0
	neg.ftz.f32 	%f39, %f15;
	lg2.approx.ftz.f32 	%f40, %f39;
	mov.f32 	%f41, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f42, %f40, %f41;
	ex2.approx.ftz.f32 	%f43, %f42;
	neg.ftz.f32 	%f44, %f43;
	bra.uni 	$LDWendi___log2f_194_1;
$Lt_17_6402:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f45, %f15;
	mov.f32 	%f46, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f47, %f45, %f46;
	ex2.approx.ftz.f32 	%f44, %f47;
$LDWendi___log2f_194_1:
	.loc	4	269	0
	mov.f32 	%f48, %f24;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+0], %f48;
	mov.f32 	%f49, %f34;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+4], %f49;
	mov.f32 	%f50, %f44;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+8], %f50;
	mov.f32 	%f51, %f10;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+12], %f51;
	ret;
$LDWend__Z29UnpremultiplyUnlinearizePixel8PixelRGB:
	} // _Z29UnpremultiplyUnlinearizePixel8PixelRGB

	.visible .func (.param .align 16 .b8 __cudaretf__Z20PremultiplyLinearize6float4[16]) _Z20PremultiplyLinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z20PremultiplyLinearize6float4[16])
	{
	.reg .f32 %f<47>;
	.reg .pred %p<5>;
	.loc	4	277	0
$LDWbegin__Z20PremultiplyLinearize6float4:
	ld.param.f32 	%f1, [__cudaparmf1__Z20PremultiplyLinearize6float4+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z20PremultiplyLinearize6float4+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z20PremultiplyLinearize6float4+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z20PremultiplyLinearize6float4+12];
	mov.f32 	%f8, %f7;
	.loc	4	254	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	.loc	4	255	0
	mov.f32 	%f10, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f10;
	@!%p1 bra 	$Lt_18_4098;
	.loc	4	234	0
	neg.ftz.f32 	%f11, %f2;
	lg2.approx.ftz.f32 	%f12, %f11;
	mov.f32 	%f13, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f15, %f14;
	neg.ftz.f32 	%f16, %f15;
	bra.uni 	$LDWendi___log2f_195_5;
$Lt_18_4098:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f17, %f2;
	mov.f32 	%f18, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f16, %f19;
$LDWendi___log2f_195_5:
	.loc	4	256	0
	mov.f32 	%f20, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f4, %f20;
	@!%p2 bra 	$Lt_18_4610;
	.loc	4	234	0
	neg.ftz.f32 	%f21, %f4;
	lg2.approx.ftz.f32 	%f22, %f21;
	mov.f32 	%f23, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f25, %f24;
	neg.ftz.f32 	%f26, %f25;
	bra.uni 	$LDWendi___log2f_195_3;
$Lt_18_4610:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f27, %f4;
	mov.f32 	%f28, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f26, %f29;
$LDWendi___log2f_195_3:
	.loc	4	257	0
	mov.f32 	%f30, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f6, %f30;
	@!%p3 bra 	$Lt_18_5122;
	.loc	4	234	0
	neg.ftz.f32 	%f31, %f6;
	lg2.approx.ftz.f32 	%f32, %f31;
	mov.f32 	%f33, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f35, %f34;
	neg.ftz.f32 	%f36, %f35;
	bra.uni 	$LDWendi___log2f_195_1;
$Lt_18_5122:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f37, %f6;
	mov.f32 	%f38, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f39, %f37, %f38;
	ex2.approx.ftz.f32 	%f36, %f39;
$LDWendi___log2f_195_1:
	.loc	4	259	0
	mul.ftz.f32 	%f40, %f36, %f9;
	mul.ftz.f32 	%f41, %f26, %f9;
	.loc	4	278	0
	mul.ftz.f32 	%f42, %f16, %f9;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+0], %f42;
	mov.f32 	%f43, %f41;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+4], %f43;
	mov.f32 	%f44, %f40;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+8], %f44;
	mov.f32 	%f45, %f9;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+12], %f45;
	ret;
$LDWend__Z20PremultiplyLinearize6float4:
	} // _Z20PremultiplyLinearize6float4

	.visible .func (.param .align 16 .b8 __cudaretf__Z24UnpremultiplyUnlinearize6float4[16]) _Z24UnpremultiplyUnlinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z24UnpremultiplyUnlinearize6float4[16])
	{
	.reg .f32 %f<53>;
	.reg .pred %p<6>;
	.loc	4	284	0
$LDWbegin__Z24UnpremultiplyUnlinearize6float4:
	ld.param.f32 	%f1, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+12];
	mov.f32 	%f8, %f7;
	.loc	4	208	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f9, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f12, %f13;
	@%p1 bra 	$Lt_19_5122;
	.loc	4	213	0
	rcp.approx.ftz.f32 	%f14, %f9;
	mul.ftz.f32 	%f15, %f14, %f6;
	.loc	4	214	0
	mul.ftz.f32 	%f16, %f14, %f4;
	.loc	4	215	0
	mul.ftz.f32 	%f17, %f14, %f2;
	bra.uni 	$Lt_19_4866;
$Lt_19_5122:
	.loc	4	219	0
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	mov.f32 	%f17, 0f00000000;    	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
$Lt_19_4866:
	.loc	4	266	0
	mov.f32 	%f18, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f17, %f18;
	@!%p2 bra 	$Lt_19_5378;
	.loc	4	242	0
	neg.ftz.f32 	%f19, %f17;
	lg2.approx.ftz.f32 	%f20, %f19;
	mov.f32 	%f21, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f22, %f20, %f21;
	ex2.approx.ftz.f32 	%f23, %f22;
	neg.ftz.f32 	%f24, %f23;
	bra.uni 	$LDWendi___log2f_196_5;
$Lt_19_5378:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f25, %f17;
	mov.f32 	%f26, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f27, %f25, %f26;
	ex2.approx.ftz.f32 	%f24, %f27;
$LDWendi___log2f_196_5:
	.loc	4	267	0
	mov.f32 	%f28, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f16, %f28;
	@!%p3 bra 	$Lt_19_5890;
	.loc	4	242	0
	neg.ftz.f32 	%f29, %f16;
	lg2.approx.ftz.f32 	%f30, %f29;
	mov.f32 	%f31, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f32, %f30, %f31;
	ex2.approx.ftz.f32 	%f33, %f32;
	neg.ftz.f32 	%f34, %f33;
	bra.uni 	$LDWendi___log2f_196_3;
$Lt_19_5890:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f35, %f16;
	mov.f32 	%f36, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f37, %f35, %f36;
	ex2.approx.ftz.f32 	%f34, %f37;
$LDWendi___log2f_196_3:
	.loc	4	268	0
	mov.f32 	%f38, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f15, %f38;
	@!%p4 bra 	$Lt_19_6402;
	.loc	4	242	0
	neg.ftz.f32 	%f39, %f15;
	lg2.approx.ftz.f32 	%f40, %f39;
	mov.f32 	%f41, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f42, %f40, %f41;
	ex2.approx.ftz.f32 	%f43, %f42;
	neg.ftz.f32 	%f44, %f43;
	bra.uni 	$LDWendi___log2f_196_1;
$Lt_19_6402:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f45, %f15;
	mov.f32 	%f46, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f47, %f45, %f46;
	ex2.approx.ftz.f32 	%f44, %f47;
$LDWendi___log2f_196_1:
	.loc	4	285	0
	mov.f32 	%f48, %f24;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+0], %f48;
	mov.f32 	%f49, %f34;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+4], %f49;
	mov.f32 	%f50, %f44;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+8], %f50;
	mov.f32 	%f51, %f10;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+12], %f51;
	ret;
$LDWend__Z24UnpremultiplyUnlinearize6float4:
	} // _Z24UnpremultiplyUnlinearize6float4

	.visible .func (.param .align 16 .b8 __cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_[16]) _Z18SwapComponentOrderI6float4ET_RKS1_ (.param .u64 __cudaparmf1__Z18SwapComponentOrderI6float4ET_RKS1_)
	{
	.reg .u64 %rd<4>;
	.reg .f32 %f<6>;
	.loc	21	264	0
$LDWbegin__Z18SwapComponentOrderI6float4ET_RKS1_:
	ld.param.u64 	%rd1, [__cudaparmf1__Z18SwapComponentOrderI6float4ET_RKS1_];
	mov.s64 	%rd2, %rd1;
	.loc	21	270	0
	ld.f32 	%f1, [%rd2+12];
	st.param.f32 	[__cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_+0], %f1;
	ld.f32 	%f2, [%rd2+8];
	st.param.f32 	[__cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_+4], %f2;
	ld.f32 	%f3, [%rd2+4];
	st.param.f32 	[__cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_+8], %f3;
	ld.f32 	%f4, [%rd2+0];
	st.param.f32 	[__cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_+12], %f4;
	ret;
$LDWend__Z18SwapComponentOrderI6float4ET_RKS1_:
	} // _Z18SwapComponentOrderI6float4ET_RKS1_

	.visible .func (.param .f32 __cudaretf__Z17CalcShadowsWeightfff) _Z17CalcShadowsWeightfff (.param .f32 __cudaparmf1__Z17CalcShadowsWeightfff, .param .f32 __cudaparmf2__Z17CalcShadowsWeightfff, .param .f32 __cudaparmf3__Z17CalcShadowsWeightfff)
	{
	.reg .f32 %f<12>;
	.reg .pred %p<4>;
	.loc	22	36	0
$LDWbegin__Z17CalcShadowsWeightfff:
	ld.param.f32 	%f1, [__cudaparmf1__Z17CalcShadowsWeightfff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z17CalcShadowsWeightfff];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf3__Z17CalcShadowsWeightfff];
	mov.f32 	%f6, %f5;
	.loc	22	37	0
	setp.le.ftz.f32 	%p1, %f2, %f4;
	@!%p1 bra 	$Lt_21_1282;
	.loc	22	39	0
	mov.f32 	%f7, 0f3f800000;     	// 1
	bra.uni 	$LBB6__Z17CalcShadowsWeightfff;
$Lt_21_1282:
	.loc	22	41	0
	add.ftz.f32 	%f8, %f4, %f6;
	setp.ge.ftz.f32 	%p2, %f2, %f8;
	@!%p2 bra 	$Lt_21_1538;
	.loc	22	43	0
	mov.f32 	%f7, 0f00000000;     	// 0
	bra.uni 	$LBB6__Z17CalcShadowsWeightfff;
$Lt_21_1538:
	.loc	22	47	0
	sub.ftz.f32 	%f9, %f2, %f4;
	div.approx.ftz.f32 	%f7, %f9, %f6;
$LBB6__Z17CalcShadowsWeightfff:
	mov.f32 	%f10, %f7;
	st.param.f32 	[__cudaretf__Z17CalcShadowsWeightfff], %f10;
	ret;
$LDWend__Z17CalcShadowsWeightfff:
	} // _Z17CalcShadowsWeightfff

	.visible .func (.param .f32 __cudaretf__Z20CalcHighlightsWeightfff) _Z20CalcHighlightsWeightfff (.param .f32 __cudaparmf1__Z20CalcHighlightsWeightfff, .param .f32 __cudaparmf2__Z20CalcHighlightsWeightfff, .param .f32 __cudaparmf3__Z20CalcHighlightsWeightfff)
	{
	.reg .f32 %f<12>;
	.reg .pred %p<4>;
	.loc	22	54	0
$LDWbegin__Z20CalcHighlightsWeightfff:
	ld.param.f32 	%f1, [__cudaparmf1__Z20CalcHighlightsWeightfff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z20CalcHighlightsWeightfff];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf3__Z20CalcHighlightsWeightfff];
	mov.f32 	%f6, %f5;
	.loc	22	55	0
	sub.ftz.f32 	%f7, %f4, %f6;
	setp.gt.ftz.f32 	%p1, %f7, %f2;
	@!%p1 bra 	$Lt_22_1282;
	.loc	22	57	0
	mov.f32 	%f8, 0f00000000;     	// 0
	bra.uni 	$LBB6__Z20CalcHighlightsWeightfff;
$Lt_22_1282:
	.loc	22	59	0
	setp.gt.ftz.f32 	%p2, %f2, %f4;
	@!%p2 bra 	$Lt_22_1538;
	.loc	22	61	0
	mov.f32 	%f8, 0f3f800000;     	// 1
	bra.uni 	$LBB6__Z20CalcHighlightsWeightfff;
$Lt_22_1538:
	.loc	22	65	0
	sub.ftz.f32 	%f9, %f2, %f7;
	div.approx.ftz.f32 	%f8, %f9, %f6;
$LBB6__Z20CalcHighlightsWeightfff:
	mov.f32 	%f10, %f8;
	st.param.f32 	[__cudaretf__Z20CalcHighlightsWeightfff], %f10;
	ret;
$LDWend__Z20CalcHighlightsWeightfff:
	} // _Z20CalcHighlightsWeightfff

	.visible .func _Z17ConvertYPbPrToIREfffRfS_ (.param .f32 __cudaparmf1__Z17ConvertYPbPrToIREfffRfS_, .param .f32 __cudaparmf2__Z17ConvertYPbPrToIREfffRfS_, .param .f32 __cudaparmf3__Z17ConvertYPbPrToIREfffRfS_, .param .u64 __cudaparmf4__Z17ConvertYPbPrToIREfffRfS_, .param .u64 __cudaparmf5__Z17ConvertYPbPrToIREfffRfS_)
	{
	.reg .u64 %rd<6>;
	.reg .f32 %f<19>;
	.loc	22	79	0
$LDWbegin__Z17ConvertYPbPrToIREfffRfS_:
	ld.param.f32 	%f1, [__cudaparmf1__Z17ConvertYPbPrToIREfffRfS_];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z17ConvertYPbPrToIREfffRfS_];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf3__Z17ConvertYPbPrToIREfffRfS_];
	mov.f32 	%f6, %f5;
	ld.param.u64 	%rd1, [__cudaparmf4__Z17ConvertYPbPrToIREfffRfS_];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf5__Z17ConvertYPbPrToIREfffRfS_];
	mov.s64 	%rd4, %rd3;
	.loc	22	85	0
	mov.f32 	%f7, 0f42c80000;     	// 100
	mul.ftz.f32 	%f8, %f2, %f7;
	st.f32 	[%rd2+0], %f8;
	.loc	22	86	0
	mov.f32 	%f9, 0f3f9d70a4;     	// 1.23
	mul.ftz.f32 	%f10, %f6, %f9;
	mov.f32 	%f11, 0f3f5f3cb4;    	// 0.87202
	mul.ftz.f32 	%f12, %f4, %f11;
	mul.ftz.f32 	%f13, %f10, %f10;
	fma.rn.ftz.f32 	%f14, %f12, %f12, %f13;
	sqrt.approx.ftz.f32 	%f15, %f14;
	mov.f32 	%f16, 0f42c80000;    	// 100
	mul.ftz.f32 	%f17, %f15, %f16;
	st.f32 	[%rd4+0], %f17;
	.loc	22	87	0
	ret;
$LDWend__Z17ConvertYPbPrToIREfffRfS_:
	} // _Z17ConvertYPbPrToIREfffRfS_

	.visible .func (.param .f32 __cudaretf__Z14ConvertIREYToYf) _Z14ConvertIREYToYf (.param .f32 __cudaparmf1__Z14ConvertIREYToYf)
	{
	.reg .f32 %f<6>;
	.loc	22	90	0
$LDWbegin__Z14ConvertIREYToYf:
	ld.param.f32 	%f1, [__cudaparmf1__Z14ConvertIREYToYf];
	mov.f32 	%f2, %f1;
	.loc	22	91	0
	mov.f32 	%f3, 0f42c80000;     	// 100
	div.approx.ftz.f32 	%f4, %f2, %f3;
	st.param.f32 	[__cudaretf__Z14ConvertIREYToYf], %f4;
	ret;
$LDWend__Z14ConvertIREYToYf:
	} // _Z14ConvertIREYToYf

	.visible .func (.param .s32 __cudaretf__Z10InHueRangefff) _Z10InHueRangefff (.param .f32 __cudaparmf1__Z10InHueRangefff, .param .f32 __cudaparmf2__Z10InHueRangefff, .param .f32 __cudaparmf3__Z10InHueRangefff)
	{
	.reg .u32 %r<7>;
	.reg .f32 %f<8>;
	.loc	22	101	0
$LDWbegin__Z10InHueRangefff:
	ld.param.f32 	%f1, [__cudaparmf1__Z10InHueRangefff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z10InHueRangefff];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf3__Z10InHueRangefff];
	mov.f32 	%f6, %f5;
	.loc	22	102	0
	set.gt.ftz.u32.f32 	%r1, %f4, %f6;
	neg.s32 	%r2, %r1;
	set.le.ftz.u32.f32 	%r3, %f2, %f6;
	neg.s32 	%r4, %r3;
	and.b32 	%r5, %r2, %r4;
	st.param.s32 	[__cudaretf__Z10InHueRangefff], %r5;
	ret;
$LDWend__Z10InHueRangefff:
	} // _Z10InHueRangefff

	.visible .func _Z16ConvertPbPrToHueffRf (.param .f32 __cudaparmf1__Z16ConvertPbPrToHueffRf, .param .f32 __cudaparmf2__Z16ConvertPbPrToHueffRf, .param .u64 __cudaparmf3__Z16ConvertPbPrToHueffRf)
	{
	.reg .u32 %r<30>;
	.reg .u64 %rd<4>;
	.reg .f32 %f<57>;
	.reg .pred %p<10>;
	.loc	22	108	0
$LDWbegin__Z16ConvertPbPrToHueffRf:
	ld.param.f32 	%f1, [__cudaparmf1__Z16ConvertPbPrToHueffRf];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z16ConvertPbPrToHueffRf];
	mov.f32 	%f4, %f3;
	ld.param.u64 	%rd1, [__cudaparmf3__Z16ConvertPbPrToHueffRf];
	mov.s64 	%rd2, %rd1;
	.loc	22	122	0
	mov.f32 	%f5, 0f3f728f61;     	// 0.9475
	mul.ftz.f32 	%f6, %f4, %f5;
	mov.f32 	%f7, 0f3ea3b6e9;     	// 0.319755
	mul.ftz.f32 	%f8, %f4, %f7;
	mov.f32 	%f9, 0fbea3b6e9;     	// -0.319755
	fma.rn.ftz.f32 	%f10, %f9, %f2, %f6;
	mov.f32 	%f11, 0fbf728f61;    	// -0.9475
	mul.ftz.f32 	%f12, %f11, %f2;
	sub.ftz.f32 	%f13, %f12, %f8;
	abs.ftz.f32 	%f14, %f10;
	abs.ftz.f32 	%f15, %f13;
	mov.b32 	%r1, %f13;
	and.b32 	%r2, %r1, -2147483648;
	mov.f32 	%f16, 0f00000000;    	// 0
	set.eq.ftz.u32.f32 	%r3, %f14, %f16;
	neg.s32 	%r4, %r3;
	mov.f32 	%f17, 0f00000000;    	// 0
	set.eq.ftz.u32.f32 	%r5, %f15, %f17;
	neg.s32 	%r6, %r5;
	and.b32 	%r7, %r4, %r6;
	mov.u32 	%r8, 0;
	setp.eq.s32 	%p1, %r7, %r8;
	@%p1 bra 	$Lt_26_7170;
	.loc	25	1842	0
	mov.s32 	%r9, 1078530011;
	mov.s32 	%r10, 0;
	mov.b32 	%r11, %f10;
	mov.s32 	%r12, 0;
	setp.lt.s32 	%p2, %r11, %r12;
	selp.s32 	%r13, %r9, %r10, %p2;
	or.b32 	%r14, %r13, %r2;
	mov.b32 	%f18, %r14;
	bra.uni 	$Lt_26_6914;
$Lt_26_7170:
	mov.f32 	%f19, 0f7f800000;    	// 1.#INF
	set.eq.ftz.u32.f32 	%r15, %f14, %f19;
	neg.s32 	%r16, %r15;
	mov.f32 	%f20, 0f7f800000;    	// 1.#INF
	set.eq.ftz.u32.f32 	%r17, %f15, %f20;
	neg.s32 	%r18, %r17;
	and.b32 	%r19, %r16, %r18;
	mov.u32 	%r20, 0;
	setp.eq.s32 	%p3, %r19, %r20;
	@%p3 bra 	$Lt_26_7682;
	.loc	25	1845	0
	mov.s32 	%r21, 1075235812;
	mov.s32 	%r22, 1061752795;
	mov.b32 	%r23, %f10;
	mov.s32 	%r24, 0;
	setp.lt.s32 	%p4, %r23, %r24;
	selp.s32 	%r25, %r21, %r22, %p4;
	or.b32 	%r26, %r25, %r2;
	mov.b32 	%f18, %r26;
	bra.uni 	$Lt_26_7426;
$Lt_26_7682:
	.loc	25	1207	0
	min.ftz.f32 	%f21, %f14, %f15;
	max.ftz.f32 	%f22, %f14, %f15;
	div.full.ftz.f32 	%f23, %f21, %f22;
	mul.ftz.f32 	%f24, %f23, %f23;
	mov.f32 	%f25, 0fbf52c7ea;    	// -0.823363
	mov.f32 	%f26, 0fc0b59883;    	// -5.67487
	fma.rn.ftz.f32 	%f27, %f24, %f25, %f26;
	mov.f32 	%f28, 0fc0d21907;    	// -6.56556
	fma.rn.ftz.f32 	%f29, %f27, %f24, %f28;
	mul.ftz.f32 	%f30, %f24, %f29;
	mul.ftz.f32 	%f31, %f23, %f30;
	.loc	25	1211	0
	mov.f32 	%f32, 0f41355dc0;    	// 11.3354
	add.ftz.f32 	%f33, %f24, %f32;
	mov.f32 	%f34, 0f41e6bd60;    	// 28.8425
	fma.rn.ftz.f32 	%f35, %f33, %f24, %f34;
	mov.f32 	%f36, 0f419d92c8;    	// 19.6967
	fma.rn.ftz.f32 	%f37, %f35, %f24, %f36;
	rcp.approx.ftz.f32 	%f38, %f37;
	.loc	25	1856	0
	setp.lt.ftz.f32 	%p5, %f14, %f15;
	fma.rn.ftz.f32 	%f39, %f31, %f38, %f23;
	mov.f32 	%f40, 0f3fc90fdb;    	// 1.5708
	sub.ftz.f32 	%f41, %f40, %f39;
	selp.f32 	%f42, %f41, %f39, %p5;
	mov.f32 	%f43, 0f40490fdb;    	// 3.14159
	sub.ftz.f32 	%f44, %f43, %f42;
	mov.f32 	%f45, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p6, %f10, %f45;
	selp.f32 	%f46, %f44, %f42, %p6;
	mov.b32 	%r27, %f46;
	or.b32 	%r28, %r2, %r27;
	mov.b32 	%f47, %r28;
	add.ftz.f32 	%f48, %f10, %f13;
	mov.f32 	%f49, 0f7f800000;    	// 1.#INF
	setp.le.ftz.f32 	%p7, %f48, %f49;
	selp.f32 	%f18, %f47, %f48, %p7;
$Lt_26_7426:
$Lt_26_6914:
	.loc	25	1863	0
	mov.f32 	%f50, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p8, %f18, %f50;
	mov.f32 	%f51, 0f40c90fdb;    	// 6.28319
	add.ftz.f32 	%f52, %f18, %f51;
	selp.f32 	%f53, %f52, %f18, %p8;
	st.f32 	[%rd2+0], %f53;
	.loc	22	137	0
	mov.f32 	%f54, 0f40c90fdb;    	// 6.28319
	div.approx.ftz.f32 	%f55, %f53, %f54;
	st.f32 	[%rd2+0], %f55;
	.loc	22	138	0
	ret;
$LDWend__Z16ConvertPbPrToHueffRf:
	} // _Z16ConvertPbPrToHueffRf

	.visible .func _Z26CalculateSmartLimitWeightsfffRfS_ (.param .f32 __cudaparmf1__Z26CalculateSmartLimitWeightsfffRfS_, .param .f32 __cudaparmf2__Z26CalculateSmartLimitWeightsfffRfS_, .param .f32 __cudaparmf3__Z26CalculateSmartLimitWeightsfffRfS_, .param .u64 __cudaparmf4__Z26CalculateSmartLimitWeightsfffRfS_, .param .u64 __cudaparmf5__Z26CalculateSmartLimitWeightsfffRfS_)
	{
	.reg .u32 %r<72>;
	.reg .u64 %rd<6>;
	.reg .f32 %f<85>;
	.reg .pred %p<17>;
	.loc	22	145	0
$LDWbegin__Z26CalculateSmartLimitWeightsfffRfS_:
	ld.param.f32 	%f1, [__cudaparmf2__Z26CalculateSmartLimitWeightsfffRfS_];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf3__Z26CalculateSmartLimitWeightsfffRfS_];
	mov.f32 	%f4, %f3;
	ld.param.u64 	%rd1, [__cudaparmf4__Z26CalculateSmartLimitWeightsfffRfS_];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf5__Z26CalculateSmartLimitWeightsfffRfS_];
	mov.s64 	%rd4, %rd3;
	.loc	22	122	0
	mov.f32 	%f5, 0f3f728f61;     	// 0.9475
	mul.ftz.f32 	%f6, %f4, %f5;
	mov.f32 	%f7, 0f3ea3b6e9;     	// 0.319755
	mul.ftz.f32 	%f8, %f4, %f7;
	mov.f32 	%f9, 0fbea3b6e9;     	// -0.319755
	fma.rn.ftz.f32 	%f10, %f9, %f2, %f6;
	mov.f32 	%f11, 0fbf728f61;    	// -0.9475
	mul.ftz.f32 	%f12, %f11, %f2;
	sub.ftz.f32 	%f13, %f12, %f8;
	abs.ftz.f32 	%f14, %f10;
	abs.ftz.f32 	%f15, %f13;
	mov.b32 	%r1, %f13;
	and.b32 	%r2, %r1, -2147483648;
	mov.f32 	%f16, 0f00000000;    	// 0
	set.eq.ftz.u32.f32 	%r3, %f14, %f16;
	neg.s32 	%r4, %r3;
	mov.f32 	%f17, 0f00000000;    	// 0
	set.eq.ftz.u32.f32 	%r5, %f15, %f17;
	neg.s32 	%r6, %r5;
	and.b32 	%r7, %r4, %r6;
	mov.u32 	%r8, 0;
	setp.eq.s32 	%p1, %r7, %r8;
	@%p1 bra 	$Lt_27_17410;
	.loc	25	1842	0
	mov.s32 	%r9, 1078530011;
	mov.s32 	%r10, 0;
	mov.b32 	%r11, %f10;
	mov.s32 	%r12, 0;
	setp.lt.s32 	%p2, %r11, %r12;
	selp.s32 	%r13, %r9, %r10, %p2;
	or.b32 	%r14, %r13, %r2;
	mov.b32 	%f18, %r14;
	bra.uni 	$Lt_27_17154;
$Lt_27_17410:
	mov.f32 	%f19, 0f7f800000;    	// 1.#INF
	set.eq.ftz.u32.f32 	%r15, %f14, %f19;
	neg.s32 	%r16, %r15;
	mov.f32 	%f20, 0f7f800000;    	// 1.#INF
	set.eq.ftz.u32.f32 	%r17, %f15, %f20;
	neg.s32 	%r18, %r17;
	and.b32 	%r19, %r16, %r18;
	mov.u32 	%r20, 0;
	setp.eq.s32 	%p3, %r19, %r20;
	@%p3 bra 	$Lt_27_17922;
	.loc	25	1845	0
	mov.s32 	%r21, 1075235812;
	mov.s32 	%r22, 1061752795;
	mov.b32 	%r23, %f10;
	mov.s32 	%r24, 0;
	setp.lt.s32 	%p4, %r23, %r24;
	selp.s32 	%r25, %r21, %r22, %p4;
	or.b32 	%r26, %r25, %r2;
	mov.b32 	%f18, %r26;
	bra.uni 	$Lt_27_17666;
$Lt_27_17922:
	.loc	25	1207	0
	min.ftz.f32 	%f21, %f14, %f15;
	max.ftz.f32 	%f22, %f14, %f15;
	div.full.ftz.f32 	%f23, %f21, %f22;
	mul.ftz.f32 	%f24, %f23, %f23;
	mov.f32 	%f25, 0fbf52c7ea;    	// -0.823363
	mov.f32 	%f26, 0fc0b59883;    	// -5.67487
	fma.rn.ftz.f32 	%f27, %f24, %f25, %f26;
	mov.f32 	%f28, 0fc0d21907;    	// -6.56556
	fma.rn.ftz.f32 	%f29, %f27, %f24, %f28;
	mul.ftz.f32 	%f30, %f24, %f29;
	mul.ftz.f32 	%f31, %f23, %f30;
	.loc	25	1211	0
	mov.f32 	%f32, 0f41355dc0;    	// 11.3354
	add.ftz.f32 	%f33, %f24, %f32;
	mov.f32 	%f34, 0f41e6bd60;    	// 28.8425
	fma.rn.ftz.f32 	%f35, %f33, %f24, %f34;
	mov.f32 	%f36, 0f419d92c8;    	// 19.6967
	fma.rn.ftz.f32 	%f37, %f35, %f24, %f36;
	rcp.approx.ftz.f32 	%f38, %f37;
	.loc	25	1856	0
	setp.lt.ftz.f32 	%p5, %f14, %f15;
	fma.rn.ftz.f32 	%f39, %f31, %f38, %f23;
	mov.f32 	%f40, 0f3fc90fdb;    	// 1.5708
	sub.ftz.f32 	%f41, %f40, %f39;
	selp.f32 	%f42, %f41, %f39, %p5;
	mov.f32 	%f43, 0f40490fdb;    	// 3.14159
	sub.ftz.f32 	%f44, %f43, %f42;
	mov.f32 	%f45, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p6, %f10, %f45;
	selp.f32 	%f46, %f44, %f42, %p6;
	mov.b32 	%r27, %f46;
	or.b32 	%r28, %r2, %r27;
	mov.b32 	%f47, %r28;
	add.ftz.f32 	%f48, %f10, %f13;
	mov.f32 	%f49, 0f7f800000;    	// 1.#INF
	setp.le.ftz.f32 	%p7, %f48, %f49;
	selp.f32 	%f18, %f47, %f48, %p7;
$Lt_27_17666:
$Lt_27_17154:
	.loc	22	179	0
	mov.f32 	%f50, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p8, %f18, %f50;
	mov.f32 	%f51, 0f40c90fdb;    	// 6.28319
	add.ftz.f32 	%f52, %f18, %f51;
	selp.f32 	%f53, %f52, %f18, %p8;
	mov.f32 	%f54, 0f40c90fdb;    	// 6.28319
	div.approx.ftz.f32 	%f55, %f53, %f54;
	mov.f32 	%f56, 0f3f6aacda;    	// 0.9167
	set.ge.ftz.u32.f32 	%r29, %f55, %f56;
	neg.s32 	%r30, %r29;
	mov.f32 	%f57, 0f3f8147ae;    	// 1.01
	set.lt.ftz.u32.f32 	%r31, %f55, %f57;
	neg.s32 	%r32, %r31;
	and.b32 	%r33, %r30, %r32;
	mov.u32 	%r34, 0;
	setp.ne.s32 	%p9, %r33, %r34;
	@%p9 bra 	$Lt_27_11266;
	mov.f32 	%f58, 0f00000000;    	// 0
	set.ge.ftz.u32.f32 	%r35, %f55, %f58;
	neg.s32 	%r36, %r35;
	mov.f32 	%f59, 0f3daa9931;    	// 0.0833
	set.lt.ftz.u32.f32 	%r37, %f55, %f59;
	neg.s32 	%r38, %r37;
	and.b32 	%r39, %r36, %r38;
	mov.u32 	%r40, 0;
	setp.eq.s32 	%p10, %r39, %r40;
	@%p10 bra 	$Lt_27_11522;
$Lt_27_11266:
	.loc	22	182	0
	mov.f32 	%f60, 0f3e4ccccd;    	// 0.2
	st.f32 	[%rd2+0], %f60;
	.loc	22	183	0
	mov.f32 	%f61, 0f3f4ccccd;    	// 0.8
	st.f32 	[%rd4+0], %f61;
	.loc	22	179	0
	bra.uni 	$Lt_27_13570;
$Lt_27_11522:
	.loc	22	185	0
	mov.f32 	%f62, 0f3dab6ae7;    	// 0.0837
	set.ge.ftz.u32.f32 	%r41, %f55, %f62;
	neg.s32 	%r42, %r41;
	mov.f32 	%f63, 0f3e802752;    	// 0.2503
	set.lt.ftz.u32.f32 	%r43, %f55, %f63;
	neg.s32 	%r44, %r43;
	and.b32 	%r45, %r42, %r44;
	mov.u32 	%r46, 0;
	setp.eq.s32 	%p11, %r45, %r46;
	@%p11 bra 	$Lt_27_18434;
	.loc	22	187	0
	mov.f32 	%f64, 0f3f000000;    	// 0.5
	st.f32 	[%rd2+0], %f64;
	.loc	22	188	0
	mov.f32 	%f65, 0f3f000000;    	// 0.5
	st.f32 	[%rd4+0], %f65;
	bra.uni 	$Lt_27_18178;
$Lt_27_18434:
	.loc	22	190	0
	mov.f32 	%f66, 0f3e7fb15c;    	// 0.2497
	set.ge.ftz.u32.f32 	%r47, %f55, %f66;
	neg.s32 	%r48, %r47;
	mov.f32 	%f67, 0f3ed52546;    	// 0.4163
	set.lt.ftz.u32.f32 	%r49, %f55, %f67;
	neg.s32 	%r50, %r49;
	and.b32 	%r51, %r48, %r50;
	mov.u32 	%r52, 0;
	setp.eq.s32 	%p12, %r51, %r52;
	@%p12 bra 	$Lt_27_18946;
	.loc	22	192	0
	mov.f32 	%f68, 0f3e4ccccd;    	// 0.2
	st.f32 	[%rd2+0], %f68;
	.loc	22	193	0
	mov.f32 	%f69, 0f3f4ccccd;    	// 0.8
	st.f32 	[%rd4+0], %f69;
	bra.uni 	$Lt_27_18690;
$Lt_27_18946:
	.loc	22	195	0
	mov.f32 	%f70, 0f3ed559b4;    	// 0.4167
	set.ge.ftz.u32.f32 	%r53, %f55, %f70;
	neg.s32 	%r54, %r53;
	mov.f32 	%f71, 0f3f155326;    	// 0.5833
	set.lt.ftz.u32.f32 	%r55, %f55, %f71;
	neg.s32 	%r56, %r55;
	and.b32 	%r57, %r54, %r56;
	mov.u32 	%r58, 0;
	setp.eq.s32 	%p13, %r57, %r58;
	@%p13 bra 	$Lt_27_19458;
	.loc	22	197	0
	mov.f32 	%f72, 0f3e4ccccd;    	// 0.2
	st.f32 	[%rd2+0], %f72;
	.loc	22	198	0
	mov.f32 	%f73, 0f3f4ccccd;    	// 0.8
	st.f32 	[%rd4+0], %f73;
	bra.uni 	$Lt_27_19202;
$Lt_27_19458:
	.loc	22	200	0
	mov.f32 	%f74, 0f3f156d5d;    	// 0.5837
	set.ge.ftz.u32.f32 	%r59, %f55, %f74;
	neg.s32 	%r60, %r59;
	mov.f32 	%f75, 0f3f4013a9;    	// 0.7503
	set.lt.ftz.u32.f32 	%r61, %f55, %f75;
	neg.s32 	%r62, %r61;
	and.b32 	%r63, %r60, %r62;
	mov.u32 	%r64, 0;
	setp.eq.s32 	%p14, %r63, %r64;
	@%p14 bra 	$Lt_27_19970;
	.loc	22	202	0
	mov.f32 	%f76, 0f3e4ccccd;    	// 0.2
	st.f32 	[%rd2+0], %f76;
	.loc	22	203	0
	mov.f32 	%f77, 0f3f4ccccd;    	// 0.8
	st.f32 	[%rd4+0], %f77;
	bra.uni 	$Lt_27_19714;
$Lt_27_19970:
	.loc	22	205	0
	mov.f32 	%f78, 0f3f3fec57;    	// 0.7497
	set.ge.ftz.u32.f32 	%r65, %f55, %f78;
	neg.s32 	%r66, %r65;
	mov.f32 	%f79, 0f3f6a92a3;    	// 0.9163
	set.lt.ftz.u32.f32 	%r67, %f55, %f79;
	neg.s32 	%r68, %r67;
	and.b32 	%r69, %r66, %r68;
	mov.u32 	%r70, 0;
	setp.eq.s32 	%p15, %r69, %r70;
	@%p15 bra 	$Lt_27_20482;
	.loc	22	207	0
	mov.f32 	%f80, 0f3e4ccccd;    	// 0.2
	st.f32 	[%rd2+0], %f80;
	.loc	22	208	0
	mov.f32 	%f81, 0f3f4ccccd;    	// 0.8
	st.f32 	[%rd4+0], %f81;
	bra.uni 	$Lt_27_20226;
$Lt_27_20482:
	.loc	22	212	0
	mov.f32 	%f82, 0f3e4ccccd;    	// 0.2
	st.f32 	[%rd2+0], %f82;
	.loc	22	213	0
	mov.f32 	%f83, 0f3f4ccccd;    	// 0.8
	st.f32 	[%rd4+0], %f83;
$Lt_27_20226:
$Lt_27_19714:
$Lt_27_19202:
$Lt_27_18690:
$Lt_27_18178:
$Lt_27_13570:
	.loc	22	215	0
	ret;
$LDWend__Z26CalculateSmartLimitWeightsfffRfS_:
	} // _Z26CalculateSmartLimitWeightsfffRfS_

	.visible .func (.param .s32 __cudaretf__Z21SmartLimitRatioMethodfffffffffRfS_S_) _Z21SmartLimitRatioMethodfffffffffRfS_S_ (.param .f32 __cudaparmf1__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .f32 __cudaparmf2__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .f32 __cudaparmf3__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .f32 __cudaparmf4__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .f32 __cudaparmf5__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .f32 __cudaparmf6__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .f32 __cudaparmf7__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .f32 __cudaparmf8__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .f32 __cudaparmf9__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .u64 __cudaparmf10__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .u64 __cudaparmf11__Z21SmartLimitRatioMethodfffffffffRfS_S_, .param .u64 __cudaparmf12__Z21SmartLimitRatioMethodfffffffffRfS_S_)
	{
	.reg .u32 %r<49>;
	.reg .u64 %rd<8>;
	.reg .f32 %f<111>;
	.reg .pred %p<21>;
	.loc	22	229	0
$LDWbegin__Z21SmartLimitRatioMethodfffffffffRfS_S_:
	ld.param.f32 	%f1, [__cudaparmf1__Z21SmartLimitRatioMethodfffffffffRfS_S_];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z21SmartLimitRatioMethodfffffffffRfS_S_];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf3__Z21SmartLimitRatioMethodfffffffffRfS_S_];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf4__Z21SmartLimitRatioMethodfffffffffRfS_S_];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf5__Z21SmartLimitRatioMethodfffffffffRfS_S_];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf7__Z21SmartLimitRatioMethodfffffffffRfS_S_];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf8__Z21SmartLimitRatioMethodfffffffffRfS_S_];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf9__Z21SmartLimitRatioMethodfffffffffRfS_S_];
	mov.f32 	%f16, %f15;
	ld.param.u64 	%rd1, [__cudaparmf10__Z21SmartLimitRatioMethodfffffffffRfS_S_];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf11__Z21SmartLimitRatioMethodfffffffffRfS_S_];
	mov.s64 	%rd4, %rd3;
	ld.param.u64 	%rd5, [__cudaparmf12__Z21SmartLimitRatioMethodfffffffffRfS_S_];
	mov.s64 	%rd6, %rd5;
	.loc	22	122	0
	mov.f32 	%f17, 0f3f728f61;    	// 0.9475
	mul.ftz.f32 	%f18, %f6, %f17;
	mov.f32 	%f19, 0f3ea3b6e9;    	// 0.319755
	mul.ftz.f32 	%f20, %f6, %f19;
	mov.f32 	%f21, 0fbea3b6e9;    	// -0.319755
	fma.rn.ftz.f32 	%f22, %f21, %f4, %f18;
	mov.f32 	%f23, 0fbf728f61;    	// -0.9475
	mul.ftz.f32 	%f24, %f23, %f4;
	sub.ftz.f32 	%f25, %f24, %f20;
	abs.ftz.f32 	%f26, %f22;
	abs.ftz.f32 	%f27, %f25;
	mov.b32 	%r1, %f25;
	and.b32 	%r2, %r1, -2147483648;
	mov.f32 	%f28, 0f00000000;    	// 0
	set.eq.ftz.u32.f32 	%r3, %f26, %f28;
	neg.s32 	%r4, %r3;
	mov.f32 	%f29, 0f00000000;    	// 0
	set.eq.ftz.u32.f32 	%r5, %f27, %f29;
	neg.s32 	%r6, %r5;
	and.b32 	%r7, %r4, %r6;
	mov.u32 	%r8, 0;
	setp.eq.s32 	%p1, %r7, %r8;
	@%p1 bra 	$Lt_28_25602;
	.loc	25	1842	0
	mov.s32 	%r9, 1078530011;
	mov.s32 	%r10, 0;
	mov.b32 	%r11, %f22;
	mov.s32 	%r12, 0;
	setp.lt.s32 	%p2, %r11, %r12;
	selp.s32 	%r13, %r9, %r10, %p2;
	or.b32 	%r14, %r13, %r2;
	mov.b32 	%f30, %r14;
	bra.uni 	$Lt_28_25346;
$Lt_28_25602:
	mov.f32 	%f31, 0f7f800000;    	// 1.#INF
	set.eq.ftz.u32.f32 	%r15, %f26, %f31;
	neg.s32 	%r16, %r15;
	mov.f32 	%f32, 0f7f800000;    	// 1.#INF
	set.eq.ftz.u32.f32 	%r17, %f27, %f32;
	neg.s32 	%r18, %r17;
	and.b32 	%r19, %r16, %r18;
	mov.u32 	%r20, 0;
	setp.eq.s32 	%p3, %r19, %r20;
	@%p3 bra 	$Lt_28_26114;
	.loc	25	1845	0
	mov.s32 	%r21, 1075235812;
	mov.s32 	%r22, 1061752795;
	mov.b32 	%r23, %f22;
	mov.s32 	%r24, 0;
	setp.lt.s32 	%p4, %r23, %r24;
	selp.s32 	%r25, %r21, %r22, %p4;
	or.b32 	%r26, %r25, %r2;
	mov.b32 	%f30, %r26;
	bra.uni 	$Lt_28_25858;
$Lt_28_26114:
	.loc	25	1207	0
	min.ftz.f32 	%f33, %f26, %f27;
	max.ftz.f32 	%f34, %f26, %f27;
	div.full.ftz.f32 	%f35, %f33, %f34;
	mul.ftz.f32 	%f36, %f35, %f35;
	mov.f32 	%f37, 0fbf52c7ea;    	// -0.823363
	mov.f32 	%f38, 0fc0b59883;    	// -5.67487
	fma.rn.ftz.f32 	%f39, %f36, %f37, %f38;
	mov.f32 	%f40, 0fc0d21907;    	// -6.56556
	fma.rn.ftz.f32 	%f41, %f39, %f36, %f40;
	mul.ftz.f32 	%f42, %f36, %f41;
	mul.ftz.f32 	%f43, %f35, %f42;
	.loc	25	1211	0
	mov.f32 	%f44, 0f41355dc0;    	// 11.3354
	add.ftz.f32 	%f45, %f36, %f44;
	mov.f32 	%f46, 0f41e6bd60;    	// 28.8425
	fma.rn.ftz.f32 	%f47, %f45, %f36, %f46;
	mov.f32 	%f48, 0f419d92c8;    	// 19.6967
	fma.rn.ftz.f32 	%f49, %f47, %f36, %f48;
	rcp.approx.ftz.f32 	%f50, %f49;
	.loc	25	1856	0
	setp.lt.ftz.f32 	%p5, %f26, %f27;
	fma.rn.ftz.f32 	%f51, %f43, %f50, %f35;
	mov.f32 	%f52, 0f3fc90fdb;    	// 1.5708
	sub.ftz.f32 	%f53, %f52, %f51;
	selp.f32 	%f54, %f53, %f51, %p5;
	mov.f32 	%f55, 0f40490fdb;    	// 3.14159
	sub.ftz.f32 	%f56, %f55, %f54;
	mov.f32 	%f57, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p6, %f22, %f57;
	selp.f32 	%f58, %f56, %f54, %p6;
	mov.b32 	%r27, %f58;
	or.b32 	%r28, %r2, %r27;
	mov.b32 	%f59, %r28;
	add.ftz.f32 	%f60, %f22, %f25;
	mov.f32 	%f61, 0f7f800000;    	// 1.#INF
	setp.le.ftz.f32 	%p7, %f60, %f61;
	selp.f32 	%f30, %f59, %f60, %p7;
$Lt_28_25858:
$Lt_28_25346:
	.loc	22	179	0
	mov.f32 	%f62, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p8, %f30, %f62;
	mov.f32 	%f63, 0f40c90fdb;    	// 6.28319
	add.ftz.f32 	%f64, %f30, %f63;
	selp.f32 	%f65, %f64, %f30, %p8;
	mov.f32 	%f66, 0f40c90fdb;    	// 6.28319
	div.approx.ftz.f32 	%f67, %f65, %f66;
	mov.f32 	%f68, 0f3f6aacda;    	// 0.9167
	set.ge.ftz.u32.f32 	%r29, %f67, %f68;
	neg.s32 	%r30, %r29;
	mov.f32 	%f69, 0f3f8147ae;    	// 1.01
	set.lt.ftz.u32.f32 	%r31, %f67, %f69;
	neg.s32 	%r32, %r31;
	and.b32 	%r33, %r30, %r32;
	mov.u32 	%r34, 0;
	setp.ne.s32 	%p9, %r33, %r34;
	@%p9 bra 	$Lt_28_11266;
	mov.f32 	%f70, 0f00000000;    	// 0
	set.ge.ftz.u32.f32 	%r35, %f67, %f70;
	neg.s32 	%r36, %r35;
	mov.f32 	%f71, 0f3daa9931;    	// 0.0833
	set.lt.ftz.u32.f32 	%r37, %f67, %f71;
	neg.s32 	%r38, %r37;
	and.b32 	%r39, %r36, %r38;
	mov.u32 	%r40, 0;
	setp.eq.s32 	%p10, %r39, %r40;
	@%p10 bra 	$Lt_28_11522;
$Lt_28_11266:
	mov.f32 	%f72, 0f3f4ccccd;    	// 0.8
	mov.f32 	%f73, 0f3e4ccccd;    	// 0.2
	bra.uni 	$Lt_28_13570;
$Lt_28_11522:
	.loc	22	185	0
	mov.f32 	%f74, 0f3dab6ae7;    	// 0.0837
	set.ge.ftz.u32.f32 	%r41, %f67, %f74;
	neg.s32 	%r42, %r41;
	mov.f32 	%f75, 0f3e802752;    	// 0.2503
	set.lt.ftz.u32.f32 	%r43, %f67, %f75;
	neg.s32 	%r44, %r43;
	and.b32 	%r45, %r42, %r44;
	mov.u32 	%r46, 0;
	setp.eq.s32 	%p11, %r45, %r46;
	@%p11 bra 	$Lt_28_26626;
	mov.f32 	%f72, 0f3f000000;    	// 0.5
	mov.f32 	%f73, 0f3f000000;    	// 0.5
	bra.uni 	$Lt_28_26370;
$Lt_28_26626:
	.loc	22	190	0
	mov.f32 	%f72, 0f3f4ccccd;    	// 0.8
	mov.f32 	%f73, 0f3e4ccccd;    	// 0.2
$Lt_28_26370:
$Lt_28_13570:
	.loc	22	233	0
	setp.lt.ftz.f32 	%p12, %f10, %f12;
	sub.ftz.f32 	%f76, %f12, %f10;
	mov.f32 	%f77, 0f00000000;    	// 0
	selp.f32 	%f78, %f76, %f77, %p12;
	mov.f32 	%f79, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p13, %f78, %f79;
	@!%p13 bra 	$Lt_28_28930;
	mov.f32 	%f80, 0f00000000;    	// 0
	setp.neu.ftz.f32 	%p14, %f16, %f80;
	@!%p14 bra 	$Lt_28_29698;
	.loc	22	249	0
	mov.f32 	%f81, 0f3f800000;    	// 1
	mul.ftz.f32 	%f82, %f78, %f72;
	div.approx.ftz.f32 	%f83, %f82, %f16;
	sub.ftz.f32 	%f84, %f81, %f83;
	.loc	22	250	0
	bra.uni 	$Lt_28_29442;
$Lt_28_29698:
	mov.f32 	%f84, 0f3f800000;    	// 1
$Lt_28_29442:
	.loc	22	260	0
	mul.ftz.f32 	%f85, %f78, %f73;
	mov.f32 	%f86, 0f3f7d70a4;    	// 0.99
	setp.ge.ftz.f32 	%p15, %f84, %f86;
	selp.f32 	%f87, %f78, %f85, %p15;
	mov.f32 	%f88, 0f42c80000;    	// 100
	div.approx.ftz.f32 	%f89, %f87, %f88;
	sub.ftz.f32 	%f90, %f2, %f89;
	st.f32 	[%rd2+0], %f90;
	.loc	22	261	0
	mul.ftz.f32 	%f91, %f4, %f84;
	st.f32 	[%rd4+0], %f91;
	.loc	22	262	0
	mul.ftz.f32 	%f92, %f6, %f84;
	st.f32 	[%rd6+0], %f92;
$Lt_28_28930:
	setp.gt.ftz.f32 	%p16, %f8, %f14;
	sub.ftz.f32 	%f93, %f8, %f14;
	mov.f32 	%f94, 0f00000000;    	// 0
	selp.f32 	%f95, %f93, %f94, %p16;
	mov.f32 	%f96, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p17, %f95, %f96;
	@!%p17 bra 	$Lt_28_29954;
	mov.f32 	%f97, 0f00000000;    	// 0
	setp.neu.ftz.f32 	%p18, %f16, %f97;
	@!%p18 bra 	$Lt_28_30722;
	.loc	22	275	0
	mov.f32 	%f98, 0f3f800000;    	// 1
	mul.ftz.f32 	%f99, %f95, %f72;
	div.approx.ftz.f32 	%f100, %f99, %f16;
	sub.ftz.f32 	%f101, %f98, %f100;
	.loc	22	276	0
	bra.uni 	$Lt_28_30466;
$Lt_28_30722:
	mov.f32 	%f101, 0f3f800000;   	// 1
$Lt_28_30466:
	.loc	22	286	0
	mul.ftz.f32 	%f102, %f95, %f73;
	mov.f32 	%f103, 0f3f7d70a4;   	// 0.99
	setp.ge.ftz.f32 	%p19, %f101, %f103;
	selp.f32 	%f104, %f95, %f102, %p19;
	mov.f32 	%f105, 0f42c80000;   	// 100
	div.approx.ftz.f32 	%f106, %f104, %f105;
	add.ftz.f32 	%f107, %f2, %f106;
	st.f32 	[%rd2+0], %f107;
	.loc	22	287	0
	mul.ftz.f32 	%f108, %f4, %f101;
	st.f32 	[%rd4+0], %f108;
	.loc	22	288	0
	mul.ftz.f32 	%f109, %f6, %f101;
	st.f32 	[%rd6+0], %f109;
$Lt_28_29954:
	.loc	22	291	0
	mov.s32 	%r47, 1;
	st.param.s32 	[__cudaretf__Z21SmartLimitRatioMethodfffffffffRfS_S_], %r47;
	ret;
$LDWend__Z21SmartLimitRatioMethodfffffffffRfS_S_:
	} // _Z21SmartLimitRatioMethodfffffffffRfS_S_

	.visible .func (.param .f32 __cudaretf__Z5ClampIfET_S0_S0_S0_) _Z5ClampIfET_S0_S0_S0_ (.param .f32 __cudaparmf1__Z5ClampIfET_S0_S0_S0_, .param .f32 __cudaparmf2__Z5ClampIfET_S0_S0_S0_, .param .f32 __cudaparmf3__Z5ClampIfET_S0_S0_S0_)
	{
	.reg .f32 %f<10>;
	.loc	23	72	0
$LDWbegin__Z5ClampIfET_S0_S0_S0_:
	ld.param.f32 	%f1, [__cudaparmf1__Z5ClampIfET_S0_S0_S0_];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z5ClampIfET_S0_S0_S0_];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf3__Z5ClampIfET_S0_S0_S0_];
	mov.f32 	%f6, %f5;
	.loc	23	73	0
	max.ftz.f32 	%f7, %f2, %f4;
	min.ftz.f32 	%f8, %f6, %f7;
	st.param.f32 	[__cudaretf__Z5ClampIfET_S0_S0_S0_], %f8;
	ret;
$LDWend__Z5ClampIfET_S0_S0_S0_:
	} // _Z5ClampIfET_S0_S0_S0_

	.visible .func (.param .f32 __cudaretf__Z4LERPIfET_S0_S0_S0_) _Z4LERPIfET_S0_S0_S0_ (.param .f32 __cudaparmf1__Z4LERPIfET_S0_S0_S0_, .param .f32 __cudaparmf2__Z4LERPIfET_S0_S0_S0_, .param .f32 __cudaparmf3__Z4LERPIfET_S0_S0_S0_)
	{
	.reg .f32 %f<10>;
	.loc	23	78	0
$LDWbegin__Z4LERPIfET_S0_S0_S0_:
	ld.param.f32 	%f1, [__cudaparmf1__Z4LERPIfET_S0_S0_S0_];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z4LERPIfET_S0_S0_S0_];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf3__Z4LERPIfET_S0_S0_S0_];
	mov.f32 	%f6, %f5;
	.loc	23	79	0
	sub.ftz.f32 	%f7, %f4, %f2;
	fma.rn.ftz.f32 	%f8, %f6, %f7, %f2;
	st.param.f32 	[__cudaretf__Z4LERPIfET_S0_S0_S0_], %f8;
	ret;
$LDWend__Z4LERPIfET_S0_S0_S0_:
	} // _Z4LERPIfET_S0_S0_S0_
	.const .align 4 .b8 kRGB32f_To_601YPbPr[36] = {135,22,153,62,162,69,22,63,213,120,233,61,33,201,44,190,111,155,169,190,0,0,0,63,0,0,0,63,70,94,214,190,232,134,166,189};
	.const .align 4 .b8 k601YPbPr_To_RGB32f[36] = {0,0,128,63,0,0,0,0,188,116,179,63,0,0,128,63,152,50,176,190,158,209,54,191,0,0,128,63,229,208,226,63,0,0,0,0};

	.entry VideoLimiter (
		.param .u64 __cudaparm_VideoLimiter_inImage,
		.param .s32 __cudaparm_VideoLimiter_inPitch,
		.param .u32 __cudaparm_VideoLimiter_inDeviceFormat,
		.param .s32 __cudaparm_VideoLimiter_inWidth,
		.param .s32 __cudaparm_VideoLimiter_inHeight,
		.param .u32 __cudaparm_VideoLimiter_inReductionAxis,
		.param .u32 __cudaparm_VideoLimiter_inReductionMethod,
		.param .f32 __cudaparm_VideoLimiter_inLumaMin,
		.param .f32 __cudaparm_VideoLimiter_inLumaMax,
		.param .f32 __cudaparm_VideoLimiter_inChromaMin,
		.param .f32 __cudaparm_VideoLimiter_inChromaMax,
		.param .f32 __cudaparm_VideoLimiter_inSignalMin,
		.param .f32 __cudaparm_VideoLimiter_inSignalMax,
		.param .f32 __cudaparm_VideoLimiter_inShadowThreshold,
		.param .f32 __cudaparm_VideoLimiter_inShadowThresholdSoftness,
		.param .f32 __cudaparm_VideoLimiter_inHighlightThreshold,
		.param .f32 __cudaparm_VideoLimiter_inHighlightThresholdSoftness)
	{
	.reg .u32 %r<321>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<657>;
	.reg .pred %p<145>;
	.loc	22	315	0
$LDWbegin_VideoLimiter:
	.loc	22	318	0
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_VideoLimiter_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_VideoLimiter_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_31_227330;
	ld.param.s32 	%r19, [__cudaparm_VideoLimiter_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_VideoLimiter_inPitch];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_VideoLimiter_inImage];
	@!%p2 bra 	$Lt_31_175362;
	.loc	20	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	321	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_31_175106;
$Lt_31_175362:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_31_175106:
	ld.const.f32 	%f5, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f6, %f5, %f2;
	ld.const.f32 	%f7, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f8, %f7, %f3, %f6;
	ld.const.f32 	%f9, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f10, %f9, %f1, %f8;
	mov.f32 	%f11, %f10;
	ld.const.f32 	%f12, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f13, %f12, %f2;
	ld.const.f32 	%f14, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f15, %f14, %f3, %f13;
	ld.const.f32 	%f16, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f17, %f16, %f1, %f15;
	mov.f32 	%f18, %f17;
	ld.const.f32 	%f19, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f20, %f19, %f2;
	ld.const.f32 	%f21, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f22, %f21, %f3, %f20;
	ld.const.f32 	%f23, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f24, %f23, %f1, %f22;
	mov.f32 	%f25, %f24;
	.loc	22	37	0
	ld.param.f32 	%f26, [__cudaparm_VideoLimiter_inHighlightThresholdSoftness];
	ld.param.f32 	%f27, [__cudaparm_VideoLimiter_inHighlightThreshold];
	setp.le.ftz.f32 	%p3, %f24, %f27;
	@!%p3 bra 	$Lt_31_133122;
	.loc	22	39	0
	mov.f32 	%f28, 0f3f800000;    	// 1
	bra.uni 	$LDWendi_fdividef_208_11;
$Lt_31_133122:
	.loc	22	41	0
	add.ftz.f32 	%f29, %f26, %f27;
	setp.ge.ftz.f32 	%p4, %f24, %f29;
	@!%p4 bra 	$Lt_31_133378;
	.loc	22	43	0
	mov.f32 	%f28, 0f00000000;    	// 0
	bra.uni 	$LDWendi_fdividef_208_11;
$Lt_31_133378:
	.loc	22	47	0
	sub.ftz.f32 	%f30, %f24, %f27;
	div.approx.ftz.f32 	%f28, %f30, %f26;
$LDWendi_fdividef_208_11:
	.loc	22	55	0
	sub.ftz.f32 	%f31, %f27, %f26;
	setp.gt.ftz.f32 	%p5, %f31, %f24;
	@!%p5 bra 	$Lt_31_133634;
	.loc	22	57	0
	mov.f32 	%f32, 0f00000000;    	// 0
	bra.uni 	$LDWendi_fdividef_208_9;
$Lt_31_133634:
	.loc	22	59	0
	setp.gt.ftz.f32 	%p6, %f24, %f27;
	@!%p6 bra 	$Lt_31_133890;
	.loc	22	61	0
	mov.f32 	%f32, 0f3f800000;    	// 1
	bra.uni 	$LDWendi_fdividef_208_9;
$Lt_31_133890:
	.loc	22	65	0
	sub.ftz.f32 	%f33, %f24, %f31;
	div.approx.ftz.f32 	%f32, %f33, %f26;
$LDWendi_fdividef_208_9:
	.loc	22	351	0
	ld.param.s32 	%r28, [__cudaparm_VideoLimiter_inReductionMethod];
	mov.u32 	%r29, 0;
	setp.eq.s32 	%p7, %r28, %r29;
	@%p7 bra 	$Lt_31_258;
	mov.u32 	%r30, 1;
	setp.eq.s32 	%p8, %r28, %r30;
	@%p8 bra 	$Lt_31_770;
	mov.u32 	%r31, 2;
	setp.eq.s32 	%p9, %r28, %r31;
	@%p9 bra 	$Lt_31_1026;
	mov.u32 	%r32, 3;
	setp.eq.s32 	%p10, %r28, %r32;
	@%p10 bra 	$Lt_31_1282;
	mov.u32 	%r33, 4;
	setp.eq.s32 	%p11, %r28, %r33;
	@%p11 bra 	$Lt_31_1538;
	bra.uni 	$Lt_31_221186;
$Lt_31_258:
	ld.param.s32 	%r34, [__cudaparm_VideoLimiter_inReductionAxis];
	mov.s32 	%r35, 0;
	set.eq.u32.s32 	%r36, %r34, %r35;
	neg.s32 	%r37, %r36;
	mov.s32 	%r38, 2;
	set.eq.u32.s32 	%r39, %r34, %r38;
	neg.s32 	%r40, %r39;
	or.b32 	%r41, %r37, %r40;
	mov.u32 	%r42, 0;
	setp.eq.s32 	%p12, %r41, %r42;
	@%p12 bra 	$Lt_31_176642;
	mov.f32 	%f34, 0f42c80000;    	// 100
	mul.ftz.f32 	%f35, %f24, %f34;
	ld.param.f32 	%f36, [__cudaparm_VideoLimiter_inLumaMax];
	setp.gt.ftz.f32 	%p13, %f35, %f36;
	@!%p13 bra 	$Lt_31_176386;
	.loc	22	360	0
	mov.f32 	%f37, 0f42c80000;    	// 100
	div.approx.ftz.f32 	%f38, %f36, %f37;
	sub.ftz.f32 	%f39, %f24, %f38;
	mov.f32 	%f40, 0f40000000;    	// 2
	mov.f32 	%f41, 0f3f800000;    	// 1
	ld.param.f32 	%f42, [__cudaparm_VideoLimiter_inShadowThresholdSoftness];
	setp.gt.ftz.f32 	%p14, %f42, %f26;
	selp.f32 	%f43, %f40, %f41, %p14;
	div.approx.ftz.f32 	%f44, %f32, %f43;
	mul.ftz.f32 	%f45, %f39, %f44;
	sub.ftz.f32 	%f25, %f24, %f45;
	bra.uni 	$Lt_31_176642;
$Lt_31_176386:
	ld.param.f32 	%f46, [__cudaparm_VideoLimiter_inLumaMin];
	setp.lt.ftz.f32 	%p15, %f35, %f46;
	@!%p15 bra 	$Lt_31_176642;
	.loc	22	365	0
	mov.f32 	%f47, 0f42c80000;    	// 100
	div.approx.ftz.f32 	%f48, %f46, %f47;
	sub.ftz.f32 	%f49, %f48, %f24;
	mov.f32 	%f50, 0f40000000;    	// 2
	mov.f32 	%f51, 0f3f800000;    	// 1
	ld.param.f32 	%f52, [__cudaparm_VideoLimiter_inShadowThresholdSoftness];
	setp.gt.ftz.f32 	%p16, %f52, %f26;
	selp.f32 	%f53, %f50, %f51, %p16;
	div.approx.ftz.f32 	%f54, %f32, %f53;
	fma.rn.ftz.f32 	%f25, %f49, %f54, %f24;
$Lt_31_176642:
$Lt_31_176130:
$Lt_31_175618:
	sub.u32 	%r43, %r34, 1;
	mov.u32 	%r44, 1;
	setp.gt.u32 	%p17, %r43, %r44;
	@%p17 bra 	$Lt_31_178690;
	mov.f32 	%f55, 0f3f9d70a4;    	// 1.23
	mul.ftz.f32 	%f56, %f10, %f55;
	mov.f32 	%f57, 0f3f5f3cb4;    	// 0.87202
	mul.ftz.f32 	%f58, %f17, %f57;
	mov.f32 	%f59, 0f42c80000;    	// 100
	mul.ftz.f32 	%f35, %f24, %f59;
	mul.ftz.f32 	%f60, %f56, %f56;
	fma.rn.ftz.f32 	%f61, %f58, %f58, %f60;
	sqrt.approx.ftz.f32 	%f62, %f61;
	mov.f32 	%f63, 0f42c80000;    	// 100
	mul.ftz.f32 	%f64, %f62, %f63;
	add.ftz.f32 	%f65, %f64, %f35;
	ld.param.f32 	%f66, [__cudaparm_VideoLimiter_inChromaMax];
	setp.gt.ftz.f32 	%p18, %f65, %f66;
	@!%p18 bra 	$Lt_31_177666;
	mov.f32 	%f67, 0f00000000;    	// 0
	setp.neu.ftz.f32 	%p19, %f64, %f67;
	@!%p19 bra 	$Lt_31_178434;
	.loc	27	529	0
	sub.ftz.f32 	%f68, %f65, %f66;
	div.approx.ftz.f32 	%f69, %f68, %f64;
	.loc	22	375	0
	mov.f32 	%f70, 0f3f800000;    	// 1
	sub.ftz.f32 	%f71, %f70, %f69;
	bra.uni 	$Lt_31_178178;
$Lt_31_178434:
	mov.f32 	%f71, 0f00000000;    	// 0
$Lt_31_178178:
	.loc	22	377	0
	ld.param.f32 	%f72, [__cudaparm_VideoLimiter_inShadowThresholdSoftness];
	setp.gt.ftz.f32 	%p20, %f72, %f26;
	mov.f32 	%f73, 0f40000000;    	// 2
	mov.f32 	%f74, 0f3f800000;    	// 1
	selp.f32 	%f75, %f73, %f74, %p20;
	div.approx.ftz.f32 	%f76, %f32, %f75;
	mul.ftz.f32 	%f77, %f17, %f71;
	sub.ftz.f32 	%f78, %f17, %f77;
	mul.ftz.f32 	%f79, %f76, %f78;
	sub.ftz.f32 	%f18, %f17, %f79;
	.loc	22	378	0
	mul.ftz.f32 	%f80, %f10, %f71;
	sub.ftz.f32 	%f81, %f10, %f80;
	mul.ftz.f32 	%f82, %f76, %f81;
	sub.ftz.f32 	%f11, %f10, %f82;
$Lt_31_177666:
	sub.ftz.f32 	%f83, %f35, %f64;
	ld.param.f32 	%f84, [__cudaparm_VideoLimiter_inChromaMin];
	setp.lt.ftz.f32 	%p21, %f83, %f84;
	@!%p21 bra 	$Lt_31_178690;
	mov.f32 	%f85, 0f00000000;    	// 0
	setp.neu.ftz.f32 	%p22, %f64, %f85;
	@!%p22 bra 	$Lt_31_179458;
	.loc	27	529	0
	sub.ftz.f32 	%f86, %f84, %f83;
	div.approx.ftz.f32 	%f87, %f86, %f64;
	.loc	22	384	0
	mov.f32 	%f88, 0f3f800000;    	// 1
	sub.ftz.f32 	%f89, %f88, %f87;
	bra.uni 	$Lt_31_179202;
$Lt_31_179458:
	mov.f32 	%f89, 0f00000000;    	// 0
$Lt_31_179202:
	.loc	22	386	0
	ld.param.f32 	%f90, [__cudaparm_VideoLimiter_inShadowThresholdSoftness];
	setp.gt.ftz.f32 	%p20, %f90, %f26;
	mov.f32 	%f91, 0f40000000;    	// 2
	mov.f32 	%f92, 0f3f800000;    	// 1
	selp.f32 	%f75, %f91, %f92, %p20;
	div.approx.ftz.f32 	%f76, %f32, %f75;
	mul.ftz.f32 	%f93, %f18, %f89;
	sub.ftz.f32 	%f94, %f18, %f93;
	fma.rn.ftz.f32 	%f18, %f76, %f94, %f18;
	.loc	22	387	0
	mul.ftz.f32 	%f95, %f11, %f89;
	sub.ftz.f32 	%f96, %f11, %f95;
	fma.rn.ftz.f32 	%f11, %f76, %f96, %f11;
$Lt_31_178690:
$Lt_31_177154:
	mov.u32 	%r45, 3;
	setp.ne.s32 	%p23, %r34, %r45;
	@%p23 bra 	$Lt_31_221186;
	.loc	22	393	0
	mov.f32 	%f97, %f25;
	.loc	22	394	0
	mov.f32 	%f98, %f18;
	.loc	22	395	0
	mov.f32 	%f99, %f11;
	.loc	22	122	0
	mov.f32 	%f100, 0f3f728f61;   	// 0.9475
	mul.ftz.f32 	%f101, %f11, %f100;
	mov.f32 	%f102, 0f3ea3b6e9;   	// 0.319755
	mul.ftz.f32 	%f103, %f11, %f102;
	mov.f32 	%f104, 0fbea3b6e9;   	// -0.319755
	fma.rn.ftz.f32 	%f105, %f104, %f18, %f101;
	mov.f32 	%f106, 0fbf728f61;   	// -0.9475
	mul.ftz.f32 	%f107, %f106, %f18;
	sub.ftz.f32 	%f108, %f107, %f103;
	abs.ftz.f32 	%f109, %f105;
	abs.ftz.f32 	%f110, %f108;
	mov.b32 	%r46, %f108;
	and.b32 	%r47, %r46, -2147483648;
	mov.f32 	%f111, 0f00000000;   	// 0
	set.eq.ftz.u32.f32 	%r48, %f109, %f111;
	neg.s32 	%r49, %r48;
	mov.f32 	%f112, 0f00000000;   	// 0
	set.eq.ftz.u32.f32 	%r50, %f110, %f112;
	neg.s32 	%r51, %r50;
	and.b32 	%r52, %r49, %r51;
	mov.u32 	%r53, 0;
	setp.eq.s32 	%p24, %r52, %r53;
	@%p24 bra 	$Lt_31_180482;
	.loc	25	1842	0
	mov.s32 	%r54, 1078530011;
	mov.s32 	%r55, 0;
	mov.b32 	%r56, %f105;
	mov.s32 	%r57, 0;
	setp.lt.s32 	%p25, %r56, %r57;
	selp.s32 	%r58, %r54, %r55, %p25;
	or.b32 	%r59, %r58, %r47;
	mov.b32 	%f113, %r59;
	bra.uni 	$Lt_31_180738;
$Lt_31_180482:
	mov.f32 	%f114, 0f7f800000;   	// 1.#INF
	set.eq.ftz.u32.f32 	%r60, %f109, %f114;
	neg.s32 	%r61, %r60;
	mov.f32 	%f115, 0f7f800000;   	// 1.#INF
	set.eq.ftz.u32.f32 	%r62, %f110, %f115;
	neg.s32 	%r63, %r62;
	and.b32 	%r64, %r61, %r63;
	mov.u32 	%r65, 0;
	setp.eq.s32 	%p26, %r64, %r65;
	@%p26 bra 	$Lt_31_180994;
	.loc	25	1845	0
	mov.s32 	%r66, 1075235812;
	mov.s32 	%r67, 1061752795;
	mov.b32 	%r68, %f105;
	mov.s32 	%r69, 0;
	setp.lt.s32 	%p27, %r68, %r69;
	selp.s32 	%r70, %r66, %r67, %p27;
	or.b32 	%r71, %r70, %r47;
	mov.b32 	%f113, %r71;
	bra.uni 	$Lt_31_180738;
$Lt_31_180994:
	.loc	25	1207	0
	min.ftz.f32 	%f116, %f109, %f110;
	max.ftz.f32 	%f117, %f109, %f110;
	div.full.ftz.f32 	%f118, %f116, %f117;
	mul.ftz.f32 	%f119, %f118, %f118;
	mov.f32 	%f120, 0fbf52c7ea;   	// -0.823363
	mov.f32 	%f121, 0fc0b59883;   	// -5.67487
	fma.rn.ftz.f32 	%f122, %f119, %f120, %f121;
	mov.f32 	%f123, 0fc0d21907;   	// -6.56556
	fma.rn.ftz.f32 	%f124, %f122, %f119, %f123;
	mul.ftz.f32 	%f125, %f119, %f124;
	mul.ftz.f32 	%f126, %f118, %f125;
	.loc	25	1211	0
	mov.f32 	%f127, 0f41355dc0;   	// 11.3354
	add.ftz.f32 	%f128, %f119, %f127;
	mov.f32 	%f129, 0f41e6bd60;   	// 28.8425
	fma.rn.ftz.f32 	%f130, %f128, %f119, %f129;
	mov.f32 	%f131, 0f419d92c8;   	// 19.6967
	fma.rn.ftz.f32 	%f132, %f130, %f119, %f131;
	rcp.approx.ftz.f32 	%f133, %f132;
	.loc	25	1856	0
	setp.lt.ftz.f32 	%p28, %f109, %f110;
	fma.rn.ftz.f32 	%f134, %f126, %f133, %f118;
	mov.f32 	%f135, 0f3fc90fdb;   	// 1.5708
	sub.ftz.f32 	%f136, %f135, %f134;
	selp.f32 	%f137, %f136, %f134, %p28;
	mov.f32 	%f138, 0f40490fdb;   	// 3.14159
	sub.ftz.f32 	%f139, %f138, %f137;
	mov.f32 	%f140, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p29, %f105, %f140;
	selp.f32 	%f141, %f139, %f137, %p29;
	mov.b32 	%r72, %f141;
	or.b32 	%r73, %r47, %r72;
	mov.b32 	%f142, %r73;
	add.ftz.f32 	%f143, %f105, %f108;
	mov.f32 	%f144, 0f7f800000;   	// 1.#INF
	setp.le.ftz.f32 	%p30, %f143, %f144;
	selp.f32 	%f113, %f142, %f143, %p30;
$Lt_31_180738:
$Lt_31_180226:
	.loc	22	179	0
	mov.f32 	%f145, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p31, %f113, %f145;
	mov.f32 	%f146, 0f40c90fdb;   	// 6.28319
	add.ftz.f32 	%f147, %f113, %f146;
	selp.f32 	%f148, %f147, %f113, %p31;
	mov.f32 	%f149, 0f40c90fdb;   	// 6.28319
	div.approx.ftz.f32 	%f150, %f148, %f149;
	mov.f32 	%f151, 0f3f6aacda;   	// 0.9167
	set.ge.ftz.u32.f32 	%r74, %f150, %f151;
	neg.s32 	%r75, %r74;
	mov.f32 	%f152, 0f3f8147ae;   	// 1.01
	set.lt.ftz.u32.f32 	%r76, %f150, %f152;
	neg.s32 	%r77, %r76;
	and.b32 	%r78, %r75, %r77;
	mov.u32 	%r79, 0;
	setp.ne.s32 	%p32, %r78, %r79;
	@%p32 bra 	$Lt_31_21762;
	mov.f32 	%f153, 0f00000000;   	// 0
	set.ge.ftz.u32.f32 	%r80, %f150, %f153;
	neg.s32 	%r81, %r80;
	mov.f32 	%f154, 0f3daa9931;   	// 0.0833
	set.lt.ftz.u32.f32 	%r82, %f150, %f154;
	neg.s32 	%r83, %r82;
	and.b32 	%r84, %r81, %r83;
	mov.u32 	%r85, 0;
	setp.eq.s32 	%p33, %r84, %r85;
	@%p33 bra 	$Lt_31_22018;
$Lt_31_21762:
	mov.f32 	%f155, 0f3f4ccccd;   	// 0.8
	mov.f32 	%f156, 0f3e4ccccd;   	// 0.2
	bra.uni 	$Lt_31_181250;
$Lt_31_22018:
	.loc	22	185	0
	mov.f32 	%f157, 0f3dab6ae7;   	// 0.0837
	set.ge.ftz.u32.f32 	%r86, %f150, %f157;
	neg.s32 	%r87, %r86;
	mov.f32 	%f158, 0f3e802752;   	// 0.2503
	set.lt.ftz.u32.f32 	%r88, %f150, %f158;
	neg.s32 	%r89, %r88;
	and.b32 	%r90, %r87, %r89;
	mov.u32 	%r91, 0;
	setp.eq.s32 	%p34, %r90, %r91;
	@%p34 bra 	$Lt_31_181506;
	mov.f32 	%f155, 0f3f000000;   	// 0.5
	mov.f32 	%f156, 0f3f000000;   	// 0.5
	bra.uni 	$Lt_31_181250;
$Lt_31_181506:
	.loc	22	190	0
	mov.f32 	%f155, 0f3f4ccccd;   	// 0.8
	mov.f32 	%f156, 0f3e4ccccd;   	// 0.2
$Lt_31_181250:
$Lt_31_24066:
	.loc	22	233	0
	mov.f32 	%f159, 0f3f9d70a4;   	// 1.23
	mul.ftz.f32 	%f56, %f10, %f159;
	mov.f32 	%f160, 0f3f5f3cb4;   	// 0.87202
	mul.ftz.f32 	%f58, %f17, %f160;
	mov.f32 	%f161, 0f42c80000;   	// 100
	mul.ftz.f32 	%f35, %f24, %f161;
	mul.ftz.f32 	%f60, %f56, %f56;
	fma.rn.ftz.f32 	%f61, %f58, %f58, %f60;
	sqrt.approx.ftz.f32 	%f62, %f61;
	mov.f32 	%f162, 0f42c80000;   	// 100
	mul.ftz.f32 	%f64, %f62, %f162;
	add.ftz.f32 	%f65, %f64, %f35;
	ld.param.f32 	%f163, [__cudaparm_VideoLimiter_inSignalMax];
	setp.gt.ftz.f32 	%p35, %f65, %f163;
	sub.ftz.f32 	%f164, %f65, %f163;
	mov.f32 	%f165, 0f00000000;   	// 0
	selp.f32 	%f166, %f164, %f165, %p35;
	mov.f32 	%f167, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p36, %f166, %f167;
	@!%p36 bra 	$Lt_31_183810;
	.loc	22	241	0
	mul.ftz.f32 	%f168, %f166, %f156;
	.loc	22	242	0
	mul.ftz.f32 	%f169, %f166, %f155;
	mov.f32 	%f170, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p37, %f64, %f170;
	@!%p37 bra 	$Lt_31_184578;
	.loc	22	249	0
	mov.f32 	%f171, 0f3f800000;   	// 1
	div.approx.ftz.f32 	%f172, %f169, %f64;
	sub.ftz.f32 	%f173, %f171, %f172;
	.loc	22	250	0
	bra.uni 	$Lt_31_184322;
$Lt_31_184578:
	mov.f32 	%f173, 0f3f800000;   	// 1
$Lt_31_184322:
	.loc	22	255	0
	mov.f32 	%f174, 0f3f7d70a4;   	// 0.99
	setp.ge.ftz.f32 	%p38, %f173, %f174;
	selp.f32 	%f168, %f166, %f168, %p38;
	.loc	22	260	0
	mov.f32 	%f175, 0f42c80000;   	// 100
	div.approx.ftz.f32 	%f176, %f168, %f175;
	sub.ftz.f32 	%f97, %f25, %f176;
	.loc	22	261	0
	mul.ftz.f32 	%f98, %f18, %f173;
	.loc	22	262	0
	mul.ftz.f32 	%f99, %f11, %f173;
$Lt_31_183810:
	sub.ftz.f32 	%f83, %f35, %f64;
	ld.param.f32 	%f177, [__cudaparm_VideoLimiter_inSignalMin];
	setp.lt.ftz.f32 	%p39, %f83, %f177;
	sub.ftz.f32 	%f178, %f177, %f83;
	mov.f32 	%f179, 0f00000000;   	// 0
	selp.f32 	%f180, %f178, %f179, %p39;
	mov.f32 	%f181, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p40, %f180, %f181;
	@!%p40 bra 	$Lt_31_184834;
	.loc	22	267	0
	mul.ftz.f32 	%f182, %f180, %f156;
	.loc	22	268	0
	mul.ftz.f32 	%f183, %f180, %f155;
	mov.f32 	%f184, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p41, %f64, %f184;
	@!%p41 bra 	$Lt_31_185602;
	.loc	22	275	0
	mov.f32 	%f185, 0f3f800000;   	// 1
	div.approx.ftz.f32 	%f186, %f183, %f64;
	sub.ftz.f32 	%f187, %f185, %f186;
	.loc	22	276	0
	bra.uni 	$Lt_31_185346;
$Lt_31_185602:
	mov.f32 	%f187, 0f3f800000;   	// 1
$Lt_31_185346:
	.loc	22	281	0
	mov.f32 	%f188, 0f3f7d70a4;   	// 0.99
	setp.ge.ftz.f32 	%p42, %f187, %f188;
	selp.f32 	%f182, %f180, %f182, %p42;
	.loc	22	286	0
	mov.f32 	%f189, 0f42c80000;   	// 100
	div.approx.ftz.f32 	%f190, %f182, %f189;
	add.ftz.f32 	%f97, %f25, %f190;
	.loc	22	287	0
	mul.ftz.f32 	%f98, %f18, %f187;
	.loc	22	288	0
	mul.ftz.f32 	%f99, %f11, %f187;
$Lt_31_184834:
	.loc	22	411	0
	ld.param.f32 	%f191, [__cudaparm_VideoLimiter_inShadowThresholdSoftness];
	setp.gt.ftz.f32 	%p20, %f191, %f26;
	mov.f32 	%f192, 0f40000000;   	// 2
	mov.f32 	%f193, 0f3f800000;   	// 1
	selp.f32 	%f75, %f192, %f193, %p20;
	div.approx.ftz.f32 	%f76, %f32, %f75;
	sub.ftz.f32 	%f194, %f97, %f25;
	fma.rn.ftz.f32 	%f25, %f76, %f194, %f25;
	.loc	22	412	0
	sub.ftz.f32 	%f195, %f98, %f18;
	fma.rn.ftz.f32 	%f18, %f76, %f195, %f18;
	.loc	22	413	0
	sub.ftz.f32 	%f196, %f99, %f11;
	fma.rn.ftz.f32 	%f11, %f76, %f196, %f11;
	bra.uni 	$Lt_31_221186;
$Lt_31_770:
	.loc	22	418	0
	ld.param.s32 	%r34, [__cudaparm_VideoLimiter_inReductionAxis];
	mov.s32 	%r92, 0;
	set.eq.u32.s32 	%r93, %r34, %r92;
	neg.s32 	%r94, %r93;
	mov.s32 	%r95, 2;
	set.eq.u32.s32 	%r96, %r34, %r95;
	neg.s32 	%r97, %r96;
	or.b32 	%r98, %r94, %r97;
	mov.u32 	%r99, 0;
	setp.eq.s32 	%p43, %r98, %r99;
	@%p43 bra 	$Lt_31_186882;
	mov.f32 	%f197, 0f42c80000;   	// 100
	mul.ftz.f32 	%f35, %f24, %f197;
	ld.param.f32 	%f36, [__cudaparm_VideoLimiter_inLumaMax];
	setp.gt.ftz.f32 	%p44, %f35, %f36;
	@!%p44 bra 	$Lt_31_186626;
	.loc	22	425	0
	ld.param.f32 	%f198, [__cudaparm_VideoLimiter_inShadowThresholdSoftness];
	setp.gt.ftz.f32 	%p20, %f198, %f26;
	mov.f32 	%f199, 0f40000000;   	// 2
	mov.f32 	%f200, 0f3f800000;   	// 1
	selp.f32 	%f75, %f199, %f200, %p20;
	mov.f32 	%f201, 0f42c80000;   	// 100
	div.approx.ftz.f32 	%f202, %f36, %f201;
	sub.ftz.f32 	%f203, %f24, %f202;
	sub.ftz.f32 	%f204, %f75, %f28;
	sub.ftz.f32 	%f205, %f204, %f32;
	div.approx.ftz.f32 	%f206, %f205, %f75;
	mul.ftz.f32 	%f207, %f203, %f206;
	sub.ftz.f32 	%f25, %f24, %f207;
	bra.uni 	$Lt_31_186882;
$Lt_31_186626:
	ld.param.f32 	%f46, [__cudaparm_VideoLimiter_inLumaMin];
	setp.lt.ftz.f32 	%p45, %f35, %f46;
	@!%p45 bra 	$Lt_31_186882;
	.loc	22	430	0
	ld.param.f32 	%f208, [__cudaparm_VideoLimiter_inShadowThresholdSoftness];
	setp.gt.ftz.f32 	%p20, %f208, %f26;
	mov.f32 	%f209, 0f40000000;   	// 2
	mov.f32 	%f210, 0f3f800000;   	// 1
	selp.f32 	%f75, %f209, %f210, %p20;
	mov.f32 	%f211, 0f42c80000;   	// 100
	div.approx.ftz.f32 	%f212, %f46, %f211;
	sub.ftz.f32 	%f213, %f212, %f24;
	sub.ftz.f32 	%f214, %f75, %f28;
	sub.ftz.f32 	%f215, %f214, %f32;
	div.approx.ftz.f32 	%f216, %f215, %f75;
	fma.rn.ftz.f32 	%f25, %f213, %f216, %f24;
$Lt_31_186882:
$Lt_31_186370:
$Lt_31_185858:
	sub.u32 	%r100, %r34, 1;
	mov.u32 	%r101, 1;
	setp.gt.u32 	%p46, %r100, %r101;
	@%p46 bra 	$Lt_31_188930;
	mov.f32 	%f217, 0f3f9d70a4;   	// 1.23
	mul.ftz.f32 	%f56, %f10, %f217;
	mov.f32 	%f218, 0f3f5f3cb4;   	// 0.87202
	mul.ftz.f32 	%f58, %f17, %f218;
	mov.f32 	%f219, 0f42c80000;   	// 100
	mul.ftz.f32 	%f35, %f24, %f219;
	mul.ftz.f32 	%f60, %f56, %f56;
	fma.rn.ftz.f32 	%f61, %f58, %f58, %f60;
	sqrt.approx.ftz.f32 	%f62, %f61;
	mov.f32 	%f220, 0f42c80000;   	// 100
	mul.ftz.f32 	%f64, %f62, %f220;
	add.ftz.f32 	%f65, %f64, %f35;
	ld.param.f32 	%f66, [__cudaparm_VideoLimiter_inChromaMax];
	setp.gt.ftz.f32 	%p47, %f65, %f66;
	@!%p47 bra 	$Lt_31_187906;
	mov.f32 	%f221, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p48, %f64, %f221;
	@!%p48 bra 	$Lt_31_188674;
	.loc	27	529	0
	sub.ftz.f32 	%f222, %f65, %f66;
	div.approx.ftz.f32 	%f69, %f222, %f64;
	.loc	22	440	0
	mov.f32 	%f223, 0f3f800000;   	// 1
	sub.ftz.f32 	%f224, %f223, %f69;
	bra.uni 	$Lt_31_188418;
$Lt_31_188674:
	mov.f32 	%f224, 0f00000000;   	// 0
$Lt_31_188418:
	.loc	22	442	0
	ld.param.f32 	%f225, [__cudaparm_VideoLimiter_inShadowThresholdSoftness];
	setp.gt.ftz.f32 	%p20, %f225, %f26;
	mov.f32 	%f226, 0f40000000;   	// 2
	mov.f32 	%f227, 0f3f800000;   	// 1
	selp.f32 	%f75, %f226, %f227, %p20;
	sub.ftz.f32 	%f228, %f75, %f28;
	sub.ftz.f32 	%f229, %f228, %f32;
	div.approx.ftz.f32 	%f230, %f229, %f75;
	mul.ftz.f32 	%f231, %f17, %f224;
	sub.ftz.f32 	%f232, %f17, %f231;
	mul.ftz.f32 	%f233, %f230, %f232;
	sub.ftz.f32 	%f18, %f17, %f233;
	.loc	22	443	0
	mul.ftz.f32 	%f234, %f10, %f224;
	sub.ftz.f32 	%f235, %f10, %f234;
	mul.ftz.f32 	%f236, %f230, %f235;
	sub.ftz.f32 	%f11, %f10, %f236;
$Lt_31_187906:
	sub.ftz.f32 	%f83, %f35, %f64;
	ld.param.f32 	%f84, [__cudaparm_VideoLimiter_inChromaMin];
	setp.lt.ftz.f32 	%p49, %f83, %f84;
	@!%p49 bra 	$Lt_31_188930;
	mov.f32 	%f237, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p50, %f64, %f237;
	@!%p50 bra 	$Lt_31_189698;
	.loc	27	529	0
	sub.ftz.f32 	%f238, %f84, %f83;
	div.approx.ftz.f32 	%f87, %f238, %f64;
	.loc	22	449	0
	mov.f32 	%f239, 0f3f800000;   	// 1
	sub.ftz.f32 	%f240, %f239, %f87;
	bra.uni 	$Lt_31_189442;
$Lt_31_189698:
	mov.f32 	%f240, 0f00000000;   	// 0
$Lt_31_189442:
	.loc	22	451	0
	ld.param.f32 	%f241, [__cudaparm_VideoLimiter_inShadowThresholdSoftness];
	setp.gt.ftz.f32 	%p20, %f241, %f26;
	mov.f32 	%f242, 0f40000000;   	// 2
	mov.f32 	%f243, 0f3f800000;   	// 1
	selp.f32 	%f75, %f242, %f243, %p20;
	sub.ftz.f32 	%f228, %f75, %f28;
	sub.ftz.f32 	%f229, %f228, %f32;
	div.approx.ftz.f32 	%f230, %f229, %f75;
	mul.ftz.f32 	%f244, %f18, %f240;
	sub.ftz.f32 	%f245, %f18, %f244;
	fma.rn.ftz.f32 	%f18, %f230, %f245, %f18;
	.loc	22	452	0
	mul.ftz.f32 	%f246, %f11, %f240;
	sub.ftz.f32 	%f247, %f11, %f246;
	fma.rn.ftz.f32 	%f11, %f230, %f247, %f11;
$Lt_31_188930:
$Lt_31_187394:
	mov.u32 	%r102, 3;
	setp.ne.s32 	%p51, %r34, %r102;
	@%p51 bra 	$Lt_31_221186;
	.loc	22	458	0
	mov.f32 	%f248, %f25;
	.loc	22	459	0
	mov.f32 	%f249, %f18;
	.loc	22	460	0
	mov.f32 	%f250, %f11;
	.loc	22	122	0
	mov.f32 	%f251, 0f3f728f61;   	// 0.9475
	mul.ftz.f32 	%f101, %f11, %f251;
	mov.f32 	%f252, 0f3ea3b6e9;   	// 0.319755
	mul.ftz.f32 	%f103, %f11, %f252;
	mov.f32 	%f253, 0fbea3b6e9;   	// -0.319755
	fma.rn.ftz.f32 	%f105, %f253, %f18, %f101;
	mov.f32 	%f254, 0fbf728f61;   	// -0.9475
	mul.ftz.f32 	%f255, %f254, %f18;
	sub.ftz.f32 	%f108, %f255, %f103;
	abs.ftz.f32 	%f109, %f105;
	abs.ftz.f32 	%f110, %f108;
	mov.b32 	%r46, %f108;
	and.b32 	%r47, %r46, -2147483648;
	mov.f32 	%f256, 0f00000000;   	// 0
	set.eq.ftz.u32.f32 	%r103, %f109, %f256;
	neg.s32 	%r104, %r103;
	mov.f32 	%f257, 0f00000000;   	// 0
	set.eq.ftz.u32.f32 	%r105, %f110, %f257;
	neg.s32 	%r106, %r105;
	and.b32 	%r107, %r104, %r106;
	mov.u32 	%r108, 0;
	setp.eq.s32 	%p52, %r107, %r108;
	@%p52 bra 	$Lt_31_190722;
	.loc	25	1842	0
	mov.s32 	%r109, 1078530011;
	mov.s32 	%r110, 0;
	mov.b32 	%r111, %f105;
	mov.s32 	%r112, 0;
	setp.lt.s32 	%p53, %r111, %r112;
	selp.s32 	%r113, %r109, %r110, %p53;
	or.b32 	%r114, %r113, %r47;
	mov.b32 	%f113, %r114;
	bra.uni 	$Lt_31_190978;
$Lt_31_190722:
	mov.f32 	%f258, 0f7f800000;   	// 1.#INF
	set.eq.ftz.u32.f32 	%r115, %f109, %f258;
	neg.s32 	%r116, %r115;
	mov.f32 	%f259, 0f7f800000;   	// 1.#INF
	set.eq.ftz.u32.f32 	%r117, %f110, %f259;
	neg.s32 	%r118, %r117;
	and.b32 	%r119, %r116, %r118;
	mov.u32 	%r120, 0;
	setp.eq.s32 	%p54, %r119, %r120;
	@%p54 bra 	$Lt_31_191234;
	.loc	25	1845	0
	mov.s32 	%r121, 1075235812;
	mov.s32 	%r122, 1061752795;
	mov.b32 	%r123, %f105;
	mov.s32 	%r124, 0;
	setp.lt.s32 	%p55, %r123, %r124;
	selp.s32 	%r125, %r121, %r122, %p55;
	or.b32 	%r126, %r125, %r47;
	mov.b32 	%f113, %r126;
	bra.uni 	$Lt_31_190978;
$Lt_31_191234:
	.loc	25	1207	0
	min.ftz.f32 	%f116, %f109, %f110;
	max.ftz.f32 	%f117, %f109, %f110;
	div.full.ftz.f32 	%f118, %f116, %f117;
	mul.ftz.f32 	%f119, %f118, %f118;
	mov.f32 	%f260, 0fbf52c7ea;   	// -0.823363
	mov.f32 	%f261, 0fc0b59883;   	// -5.67487
	fma.rn.ftz.f32 	%f262, %f119, %f260, %f261;
	mov.f32 	%f263, 0fc0d21907;   	// -6.56556
	fma.rn.ftz.f32 	%f264, %f262, %f119, %f263;
	mul.ftz.f32 	%f265, %f119, %f264;
	mul.ftz.f32 	%f126, %f118, %f265;
	.loc	25	1211	0
	mov.f32 	%f266, 0f41355dc0;   	// 11.3354
	add.ftz.f32 	%f267, %f119, %f266;
	mov.f32 	%f268, 0f41e6bd60;   	// 28.8425
	fma.rn.ftz.f32 	%f269, %f267, %f119, %f268;
	mov.f32 	%f270, 0f419d92c8;   	// 19.6967
	fma.rn.ftz.f32 	%f271, %f269, %f119, %f270;
	rcp.approx.ftz.f32 	%f133, %f271;
	.loc	25	1856	0
	setp.lt.ftz.f32 	%p28, %f109, %f110;
	fma.rn.ftz.f32 	%f134, %f126, %f133, %f118;
	mov.f32 	%f272, 0f3fc90fdb;   	// 1.5708
	sub.ftz.f32 	%f136, %f272, %f134;
	selp.f32 	%f137, %f136, %f134, %p28;
	mov.f32 	%f273, 0f40490fdb;   	// 3.14159
	sub.ftz.f32 	%f274, %f273, %f137;
	mov.f32 	%f275, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p56, %f105, %f275;
	selp.f32 	%f276, %f274, %f137, %p56;
	mov.b32 	%r127, %f276;
	or.b32 	%r128, %r47, %r127;
	mov.b32 	%f277, %r128;
	add.ftz.f32 	%f143, %f105, %f108;
	mov.f32 	%f278, 0f7f800000;   	// 1.#INF
	setp.le.ftz.f32 	%p57, %f143, %f278;
	selp.f32 	%f113, %f277, %f143, %p57;
$Lt_31_190978:
$Lt_31_190466:
	.loc	22	179	0
	mov.f32 	%f279, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p31, %f113, %f279;
	mov.f32 	%f280, 0f40c90fdb;   	// 6.28319
	add.ftz.f32 	%f147, %f113, %f280;
	selp.f32 	%f148, %f147, %f113, %p31;
	mov.f32 	%f281, 0f40c90fdb;   	// 6.28319
	div.approx.ftz.f32 	%f150, %f148, %f281;
	mov.f32 	%f282, 0f3f6aacda;   	// 0.9167
	set.ge.ftz.u32.f32 	%r129, %f150, %f282;
	neg.s32 	%r130, %r129;
	mov.f32 	%f283, 0f3f8147ae;   	// 1.01
	set.lt.ftz.u32.f32 	%r131, %f150, %f283;
	neg.s32 	%r132, %r131;
	and.b32 	%r133, %r130, %r132;
	mov.u32 	%r134, 0;
	setp.ne.s32 	%p58, %r133, %r134;
	@%p58 bra 	$Lt_31_46338;
	mov.f32 	%f284, 0f00000000;   	// 0
	set.ge.ftz.u32.f32 	%r135, %f150, %f284;
	neg.s32 	%r136, %r135;
	mov.f32 	%f285, 0f3daa9931;   	// 0.0833
	set.lt.ftz.u32.f32 	%r137, %f150, %f285;
	neg.s32 	%r138, %r137;
	and.b32 	%r139, %r136, %r138;
	mov.u32 	%r140, 0;
	setp.eq.s32 	%p59, %r139, %r140;
	@%p59 bra 	$Lt_31_46594;
$Lt_31_46338:
	mov.f32 	%f155, 0f3f4ccccd;   	// 0.8
	mov.f32 	%f156, 0f3e4ccccd;   	// 0.2
	bra.uni 	$Lt_31_191490;
$Lt_31_46594:
	.loc	22	185	0
	mov.f32 	%f286, 0f3dab6ae7;   	// 0.0837
	set.ge.ftz.u32.f32 	%r141, %f150, %f286;
	neg.s32 	%r142, %r141;
	mov.f32 	%f287, 0f3e802752;   	// 0.2503
	set.lt.ftz.u32.f32 	%r143, %f150, %f287;
	neg.s32 	%r144, %r143;
	and.b32 	%r145, %r142, %r144;
	mov.u32 	%r146, 0;
	setp.eq.s32 	%p60, %r145, %r146;
	@%p60 bra 	$Lt_31_191746;
	mov.f32 	%f155, 0f3f000000;   	// 0.5
	mov.f32 	%f156, 0f3f000000;   	// 0.5
	bra.uni 	$Lt_31_191490;
$Lt_31_191746:
	.loc	22	190	0
	mov.f32 	%f155, 0f3f4ccccd;   	// 0.8
	mov.f32 	%f156, 0f3e4ccccd;   	// 0.2
$Lt_31_191490:
$Lt_31_48642:
	.loc	22	233	0
	mov.f32 	%f288, 0f3f9d70a4;   	// 1.23
	mul.ftz.f32 	%f56, %f10, %f288;
	mov.f32 	%f289, 0f3f5f3cb4;   	// 0.87202
	mul.ftz.f32 	%f58, %f17, %f289;
	mov.f32 	%f290, 0f42c80000;   	// 100
	mul.ftz.f32 	%f35, %f24, %f290;
	mul.ftz.f32 	%f60, %f56, %f56;
	fma.rn.ftz.f32 	%f61, %f58, %f58, %f60;
	sqrt.approx.ftz.f32 	%f62, %f61;
	mov.f32 	%f291, 0f42c80000;   	// 100
	mul.ftz.f32 	%f64, %f62, %f291;
	add.ftz.f32 	%f65, %f64, %f35;
	ld.param.f32 	%f163, [__cudaparm_VideoLimiter_inSignalMax];
	setp.gt.ftz.f32 	%p35, %f65, %f163;
	sub.ftz.f32 	%f164, %f65, %f163;
	mov.f32 	%f292, 0f00000000;   	// 0
	selp.f32 	%f166, %f164, %f292, %p35;
	mov.f32 	%f293, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p61, %f166, %f293;
	@!%p61 bra 	$Lt_31_194050;
	.loc	22	241	0
	mul.ftz.f32 	%f168, %f166, %f156;
	.loc	22	242	0
	mul.ftz.f32 	%f169, %f166, %f155;
	mov.f32 	%f294, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p62, %f64, %f294;
	@!%p62 bra 	$Lt_31_194818;
	.loc	22	249	0
	mov.f32 	%f295, 0f3f800000;   	// 1
	div.approx.ftz.f32 	%f296, %f169, %f64;
	sub.ftz.f32 	%f173, %f295, %f296;
	.loc	22	250	0
	bra.uni 	$Lt_31_194562;
$Lt_31_194818:
	mov.f32 	%f173, 0f3f800000;   	// 1
$Lt_31_194562:
	.loc	22	255	0
	mov.f32 	%f297, 0f3f7d70a4;   	// 0.99
	setp.ge.ftz.f32 	%p63, %f173, %f297;
	selp.f32 	%f168, %f166, %f168, %p63;
	.loc	22	260	0
	mov.f32 	%f298, 0f42c80000;   	// 100
	div.approx.ftz.f32 	%f299, %f168, %f298;
	sub.ftz.f32 	%f248, %f25, %f299;
	.loc	22	261	0
	mul.ftz.f32 	%f249, %f18, %f173;
	.loc	22	262	0
	mul.ftz.f32 	%f250, %f11, %f173;
$Lt_31_194050:
	sub.ftz.f32 	%f83, %f35, %f64;
	ld.param.f32 	%f177, [__cudaparm_VideoLimiter_inSignalMin];
	setp.lt.ftz.f32 	%p39, %f83, %f177;
	sub.ftz.f32 	%f178, %f177, %f83;
	mov.f32 	%f300, 0f00000000;   	// 0
	selp.f32 	%f180, %f178, %f300, %p39;
	mov.f32 	%f301, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p64, %f180, %f301;
	@!%p64 bra 	$Lt_31_195074;
	.loc	22	267	0
	mul.ftz.f32 	%f182, %f180, %f156;
	.loc	22	268	0
	mul.ftz.f32 	%f183, %f180, %f155;
	mov.f32 	%f302, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p65, %f64, %f302;
	@!%p65 bra 	$Lt_31_195842;
	.loc	22	275	0
	mov.f32 	%f303, 0f3f800000;   	// 1
	div.approx.ftz.f32 	%f304, %f183, %f64;
	sub.ftz.f32 	%f187, %f303, %f304;
	.loc	22	276	0
	bra.uni 	$Lt_31_195586;
$Lt_31_195842:
	mov.f32 	%f187, 0f3f800000;   	// 1
$Lt_31_195586:
	.loc	22	281	0
	mov.f32 	%f305, 0f3f7d70a4;   	// 0.99
	setp.ge.ftz.f32 	%p66, %f187, %f305;
	selp.f32 	%f182, %f180, %f182, %p66;
	.loc	22	286	0
	mov.f32 	%f306, 0f42c80000;   	// 100
	div.approx.ftz.f32 	%f307, %f182, %f306;
	add.ftz.f32 	%f248, %f25, %f307;
	.loc	22	287	0
	mul.ftz.f32 	%f249, %f18, %f187;
	.loc	22	288	0
	mul.ftz.f32 	%f250, %f11, %f187;
$Lt_31_195074:
	.loc	22	476	0
	ld.param.f32 	%f308, [__cudaparm_VideoLimiter_inShadowThresholdSoftness];
	setp.gt.ftz.f32 	%p20, %f308, %f26;
	mov.f32 	%f309, 0f40000000;   	// 2
	mov.f32 	%f310, 0f3f800000;   	// 1
	selp.f32 	%f75, %f309, %f310, %p20;
	sub.ftz.f32 	%f228, %f75, %f28;
	sub.ftz.f32 	%f229, %f228, %f32;
	div.approx.ftz.f32 	%f230, %f229, %f75;
	sub.ftz.f32 	%f311, %f248, %f25;
	fma.rn.ftz.f32 	%f25, %f230, %f311, %f25;
	.loc	22	477	0
	sub.ftz.f32 	%f312, %f249, %f18;
	fma.rn.ftz.f32 	%f18, %f230, %f312, %f18;
	.loc	22	478	0
	sub.ftz.f32 	%f313, %f250, %f11;
	fma.rn.ftz.f32 	%f11, %f230, %f313, %f11;
	bra.uni 	$Lt_31_221186;
$Lt_31_1026:
	.loc	22	483	0
	ld.param.s32 	%r34, [__cudaparm_VideoLimiter_inReductionAxis];
	mov.s32 	%r147, 0;
	set.eq.u32.s32 	%r148, %r34, %r147;
	neg.s32 	%r149, %r148;
	mov.s32 	%r150, 2;
	set.eq.u32.s32 	%r151, %r34, %r150;
	neg.s32 	%r152, %r151;
	or.b32 	%r153, %r149, %r152;
	mov.u32 	%r154, 0;
	setp.eq.s32 	%p67, %r153, %r154;
	@%p67 bra 	$Lt_31_197122;
	mov.f32 	%f314, 0f42c80000;   	// 100
	mul.ftz.f32 	%f35, %f24, %f314;
	ld.param.f32 	%f36, [__cudaparm_VideoLimiter_inLumaMax];
	setp.gt.ftz.f32 	%p68, %f35, %f36;
	@!%p68 bra 	$Lt_31_196866;
	.loc	22	490	0
	mov.f32 	%f315, 0f42c80000;   	// 100
	div.approx.ftz.f32 	%f316, %f36, %f315;
	sub.ftz.f32 	%f317, %f24, %f316;
	mov.f32 	%f318, 0f40000000;   	// 2
	mov.f32 	%f319, 0f3f800000;   	// 1
	ld.param.f32 	%f320, [__cudaparm_VideoLimiter_inShadowThresholdSoftness];
	setp.gt.ftz.f32 	%p69, %f320, %f26;
	selp.f32 	%f321, %f318, %f319, %p69;
	div.approx.ftz.f32 	%f322, %f28, %f321;
	mul.ftz.f32 	%f323, %f317, %f322;
	sub.ftz.f32 	%f25, %f24, %f323;
	bra.uni 	$Lt_31_197122;
$Lt_31_196866:
	ld.param.f32 	%f46, [__cudaparm_VideoLimiter_inLumaMin];
	setp.lt.ftz.f32 	%p70, %f35, %f46;
	@!%p70 bra 	$Lt_31_197122;
	.loc	22	495	0
	mov.f32 	%f324, 0f42c80000;   	// 100
	div.approx.ftz.f32 	%f325, %f46, %f324;
	sub.ftz.f32 	%f326, %f325, %f24;
	mov.f32 	%f327, 0f40000000;   	// 2
	mov.f32 	%f328, 0f3f800000;   	// 1
	ld.param.f32 	%f329, [__cudaparm_VideoLimiter_inShadowThresholdSoftness];
	setp.gt.ftz.f32 	%p71, %f329, %f26;
	selp.f32 	%f330, %f327, %f328, %p71;
	div.approx.ftz.f32 	%f331, %f28, %f330;
	fma.rn.ftz.f32 	%f25, %f326, %f331, %f24;
$Lt_31_197122:
$Lt_31_196610:
$Lt_31_196098:
	sub.u32 	%r155, %r34, 1;
	mov.u32 	%r156, 1;
	setp.gt.u32 	%p72, %r155, %r156;
	@%p72 bra 	$Lt_31_199170;
	mov.f32 	%f332, 0f3f9d70a4;   	// 1.23
	mul.ftz.f32 	%f56, %f10, %f332;
	mov.f32 	%f333, 0f3f5f3cb4;   	// 0.87202
	mul.ftz.f32 	%f58, %f17, %f333;
	mov.f32 	%f334, 0f42c80000;   	// 100
	mul.ftz.f32 	%f35, %f24, %f334;
	mul.ftz.f32 	%f60, %f56, %f56;
	fma.rn.ftz.f32 	%f61, %f58, %f58, %f60;
	sqrt.approx.ftz.f32 	%f62, %f61;
	mov.f32 	%f335, 0f42c80000;   	// 100
	mul.ftz.f32 	%f64, %f62, %f335;
	add.ftz.f32 	%f65, %f64, %f35;
	ld.param.f32 	%f66, [__cudaparm_VideoLimiter_inChromaMax];
	setp.gt.ftz.f32 	%p73, %f65, %f66;
	@!%p73 bra 	$Lt_31_198146;
	mov.f32 	%f336, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p74, %f64, %f336;
	@!%p74 bra 	$Lt_31_198914;
	.loc	27	529	0
	sub.ftz.f32 	%f337, %f65, %f66;
	div.approx.ftz.f32 	%f69, %f337, %f64;
	.loc	22	506	0
	mov.f32 	%f338, 0f3f800000;   	// 1
	sub.ftz.f32 	%f339, %f338, %f69;
	bra.uni 	$Lt_31_198658;
$Lt_31_198914:
	mov.f32 	%f339, 0f00000000;   	// 0
$Lt_31_198658:
	.loc	22	508	0
	ld.param.f32 	%f340, [__cudaparm_VideoLimiter_inShadowThresholdSoftness];
	setp.gt.ftz.f32 	%p20, %f340, %f26;
	mov.f32 	%f341, 0f40000000;   	// 2
	mov.f32 	%f342, 0f3f800000;   	// 1
	selp.f32 	%f75, %f341, %f342, %p20;
	div.approx.ftz.f32 	%f343, %f28, %f75;
	mul.ftz.f32 	%f344, %f17, %f339;
	sub.ftz.f32 	%f345, %f17, %f344;
	mul.ftz.f32 	%f346, %f343, %f345;
	sub.ftz.f32 	%f18, %f17, %f346;
	.loc	22	509	0
	mul.ftz.f32 	%f347, %f10, %f339;
	sub.ftz.f32 	%f348, %f10, %f347;
	mul.ftz.f32 	%f349, %f343, %f348;
	sub.ftz.f32 	%f11, %f10, %f349;
$Lt_31_198146:
	sub.ftz.f32 	%f83, %f35, %f64;
	ld.param.f32 	%f84, [__cudaparm_VideoLimiter_inChromaMin];
	setp.lt.ftz.f32 	%p75, %f83, %f84;
	@!%p75 bra 	$Lt_31_199170;
	mov.f32 	%f350, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p76, %f64, %f350;
	@!%p76 bra 	$Lt_31_199938;
	.loc	27	529	0
	sub.ftz.f32 	%f351, %f84, %f83;
	div.approx.ftz.f32 	%f87, %f351, %f64;
	.loc	22	515	0
	mov.f32 	%f352, 0f3f800000;   	// 1
	sub.ftz.f32 	%f353, %f352, %f87;
	bra.uni 	$Lt_31_199682;
$Lt_31_199938:
	mov.f32 	%f353, 0f00000000;   	// 0
$Lt_31_199682:
	.loc	22	517	0
	ld.param.f32 	%f354, [__cudaparm_VideoLimiter_inShadowThresholdSoftness];
	setp.gt.ftz.f32 	%p20, %f354, %f26;
	mov.f32 	%f355, 0f40000000;   	// 2
	mov.f32 	%f356, 0f3f800000;   	// 1
	selp.f32 	%f75, %f355, %f356, %p20;
	div.approx.ftz.f32 	%f343, %f28, %f75;
	mul.ftz.f32 	%f357, %f18, %f353;
	sub.ftz.f32 	%f358, %f18, %f357;
	fma.rn.ftz.f32 	%f18, %f343, %f358, %f18;
	.loc	22	518	0
	mul.ftz.f32 	%f359, %f11, %f353;
	sub.ftz.f32 	%f360, %f11, %f359;
	fma.rn.ftz.f32 	%f11, %f343, %f360, %f11;
$Lt_31_199170:
$Lt_31_197634:
	mov.u32 	%r157, 3;
	setp.ne.s32 	%p77, %r34, %r157;
	@%p77 bra 	$Lt_31_221186;
	.loc	22	524	0
	mov.f32 	%f361, %f25;
	.loc	22	525	0
	mov.f32 	%f362, %f18;
	.loc	22	526	0
	mov.f32 	%f363, %f11;
	.loc	22	122	0
	mov.f32 	%f364, 0f3f728f61;   	// 0.9475
	mul.ftz.f32 	%f101, %f11, %f364;
	mov.f32 	%f365, 0f3ea3b6e9;   	// 0.319755
	mul.ftz.f32 	%f103, %f11, %f365;
	mov.f32 	%f366, 0fbea3b6e9;   	// -0.319755
	fma.rn.ftz.f32 	%f105, %f366, %f18, %f101;
	mov.f32 	%f367, 0fbf728f61;   	// -0.9475
	mul.ftz.f32 	%f368, %f367, %f18;
	sub.ftz.f32 	%f108, %f368, %f103;
	abs.ftz.f32 	%f109, %f105;
	abs.ftz.f32 	%f110, %f108;
	mov.b32 	%r46, %f108;
	and.b32 	%r47, %r46, -2147483648;
	mov.f32 	%f369, 0f00000000;   	// 0
	set.eq.ftz.u32.f32 	%r158, %f109, %f369;
	neg.s32 	%r159, %r158;
	mov.f32 	%f370, 0f00000000;   	// 0
	set.eq.ftz.u32.f32 	%r160, %f110, %f370;
	neg.s32 	%r161, %r160;
	and.b32 	%r162, %r159, %r161;
	mov.u32 	%r163, 0;
	setp.eq.s32 	%p78, %r162, %r163;
	@%p78 bra 	$Lt_31_200962;
	.loc	25	1842	0
	mov.s32 	%r164, 1078530011;
	mov.s32 	%r165, 0;
	mov.b32 	%r166, %f105;
	mov.s32 	%r167, 0;
	setp.lt.s32 	%p79, %r166, %r167;
	selp.s32 	%r168, %r164, %r165, %p79;
	or.b32 	%r169, %r168, %r47;
	mov.b32 	%f113, %r169;
	bra.uni 	$Lt_31_201218;
$Lt_31_200962:
	mov.f32 	%f371, 0f7f800000;   	// 1.#INF
	set.eq.ftz.u32.f32 	%r170, %f109, %f371;
	neg.s32 	%r171, %r170;
	mov.f32 	%f372, 0f7f800000;   	// 1.#INF
	set.eq.ftz.u32.f32 	%r172, %f110, %f372;
	neg.s32 	%r173, %r172;
	and.b32 	%r174, %r171, %r173;
	mov.u32 	%r175, 0;
	setp.eq.s32 	%p80, %r174, %r175;
	@%p80 bra 	$Lt_31_201474;
	.loc	25	1845	0
	mov.s32 	%r176, 1075235812;
	mov.s32 	%r177, 1061752795;
	mov.b32 	%r178, %f105;
	mov.s32 	%r179, 0;
	setp.lt.s32 	%p81, %r178, %r179;
	selp.s32 	%r180, %r176, %r177, %p81;
	or.b32 	%r181, %r180, %r47;
	mov.b32 	%f113, %r181;
	bra.uni 	$Lt_31_201218;
$Lt_31_201474:
	.loc	25	1207	0
	min.ftz.f32 	%f116, %f109, %f110;
	max.ftz.f32 	%f117, %f109, %f110;
	div.full.ftz.f32 	%f118, %f116, %f117;
	mul.ftz.f32 	%f119, %f118, %f118;
	mov.f32 	%f373, 0fbf52c7ea;   	// -0.823363
	mov.f32 	%f374, 0fc0b59883;   	// -5.67487
	fma.rn.ftz.f32 	%f375, %f119, %f373, %f374;
	mov.f32 	%f376, 0fc0d21907;   	// -6.56556
	fma.rn.ftz.f32 	%f377, %f375, %f119, %f376;
	mul.ftz.f32 	%f378, %f119, %f377;
	mul.ftz.f32 	%f126, %f118, %f378;
	.loc	25	1211	0
	mov.f32 	%f379, 0f41355dc0;   	// 11.3354
	add.ftz.f32 	%f380, %f119, %f379;
	mov.f32 	%f381, 0f41e6bd60;   	// 28.8425
	fma.rn.ftz.f32 	%f382, %f380, %f119, %f381;
	mov.f32 	%f383, 0f419d92c8;   	// 19.6967
	fma.rn.ftz.f32 	%f384, %f382, %f119, %f383;
	rcp.approx.ftz.f32 	%f133, %f384;
	.loc	25	1856	0
	setp.lt.ftz.f32 	%p28, %f109, %f110;
	fma.rn.ftz.f32 	%f134, %f126, %f133, %f118;
	mov.f32 	%f385, 0f3fc90fdb;   	// 1.5708
	sub.ftz.f32 	%f136, %f385, %f134;
	selp.f32 	%f137, %f136, %f134, %p28;
	mov.f32 	%f386, 0f40490fdb;   	// 3.14159
	sub.ftz.f32 	%f387, %f386, %f137;
	mov.f32 	%f388, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p82, %f105, %f388;
	selp.f32 	%f389, %f387, %f137, %p82;
	mov.b32 	%r182, %f389;
	or.b32 	%r183, %r47, %r182;
	mov.b32 	%f390, %r183;
	add.ftz.f32 	%f143, %f105, %f108;
	mov.f32 	%f391, 0f7f800000;   	// 1.#INF
	setp.le.ftz.f32 	%p83, %f143, %f391;
	selp.f32 	%f113, %f390, %f143, %p83;
$Lt_31_201218:
$Lt_31_200706:
	.loc	22	179	0
	mov.f32 	%f392, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p31, %f113, %f392;
	mov.f32 	%f393, 0f40c90fdb;   	// 6.28319
	add.ftz.f32 	%f147, %f113, %f393;
	selp.f32 	%f148, %f147, %f113, %p31;
	mov.f32 	%f394, 0f40c90fdb;   	// 6.28319
	div.approx.ftz.f32 	%f150, %f148, %f394;
	mov.f32 	%f395, 0f3f6aacda;   	// 0.9167
	set.ge.ftz.u32.f32 	%r184, %f150, %f395;
	neg.s32 	%r185, %r184;
	mov.f32 	%f396, 0f3f8147ae;   	// 1.01
	set.lt.ftz.u32.f32 	%r186, %f150, %f396;
	neg.s32 	%r187, %r186;
	and.b32 	%r188, %r185, %r187;
	mov.u32 	%r189, 0;
	setp.ne.s32 	%p84, %r188, %r189;
	@%p84 bra 	$Lt_31_70914;
	mov.f32 	%f397, 0f00000000;   	// 0
	set.ge.ftz.u32.f32 	%r190, %f150, %f397;
	neg.s32 	%r191, %r190;
	mov.f32 	%f398, 0f3daa9931;   	// 0.0833
	set.lt.ftz.u32.f32 	%r192, %f150, %f398;
	neg.s32 	%r193, %r192;
	and.b32 	%r194, %r191, %r193;
	mov.u32 	%r195, 0;
	setp.eq.s32 	%p85, %r194, %r195;
	@%p85 bra 	$Lt_31_71170;
$Lt_31_70914:
	mov.f32 	%f155, 0f3f4ccccd;   	// 0.8
	mov.f32 	%f156, 0f3e4ccccd;   	// 0.2
	bra.uni 	$Lt_31_201730;
$Lt_31_71170:
	.loc	22	185	0
	mov.f32 	%f399, 0f3dab6ae7;   	// 0.0837
	set.ge.ftz.u32.f32 	%r196, %f150, %f399;
	neg.s32 	%r197, %r196;
	mov.f32 	%f400, 0f3e802752;   	// 0.2503
	set.lt.ftz.u32.f32 	%r198, %f150, %f400;
	neg.s32 	%r199, %r198;
	and.b32 	%r200, %r197, %r199;
	mov.u32 	%r201, 0;
	setp.eq.s32 	%p86, %r200, %r201;
	@%p86 bra 	$Lt_31_201986;
	mov.f32 	%f155, 0f3f000000;   	// 0.5
	mov.f32 	%f156, 0f3f000000;   	// 0.5
	bra.uni 	$Lt_31_201730;
$Lt_31_201986:
	.loc	22	190	0
	mov.f32 	%f155, 0f3f4ccccd;   	// 0.8
	mov.f32 	%f156, 0f3e4ccccd;   	// 0.2
$Lt_31_201730:
$Lt_31_73218:
	.loc	22	233	0
	mov.f32 	%f401, 0f3f9d70a4;   	// 1.23
	mul.ftz.f32 	%f56, %f10, %f401;
	mov.f32 	%f402, 0f3f5f3cb4;   	// 0.87202
	mul.ftz.f32 	%f58, %f17, %f402;
	mov.f32 	%f403, 0f42c80000;   	// 100
	mul.ftz.f32 	%f35, %f24, %f403;
	mul.ftz.f32 	%f60, %f56, %f56;
	fma.rn.ftz.f32 	%f61, %f58, %f58, %f60;
	sqrt.approx.ftz.f32 	%f62, %f61;
	mov.f32 	%f404, 0f42c80000;   	// 100
	mul.ftz.f32 	%f64, %f62, %f404;
	add.ftz.f32 	%f65, %f64, %f35;
	ld.param.f32 	%f163, [__cudaparm_VideoLimiter_inSignalMax];
	setp.gt.ftz.f32 	%p35, %f65, %f163;
	sub.ftz.f32 	%f164, %f65, %f163;
	mov.f32 	%f405, 0f00000000;   	// 0
	selp.f32 	%f166, %f164, %f405, %p35;
	mov.f32 	%f406, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p87, %f166, %f406;
	@!%p87 bra 	$Lt_31_204290;
	.loc	22	241	0
	mul.ftz.f32 	%f168, %f166, %f156;
	.loc	22	242	0
	mul.ftz.f32 	%f169, %f166, %f155;
	mov.f32 	%f407, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p88, %f64, %f407;
	@!%p88 bra 	$Lt_31_205058;
	.loc	22	249	0
	mov.f32 	%f408, 0f3f800000;   	// 1
	div.approx.ftz.f32 	%f409, %f169, %f64;
	sub.ftz.f32 	%f173, %f408, %f409;
	.loc	22	250	0
	bra.uni 	$Lt_31_204802;
$Lt_31_205058:
	mov.f32 	%f173, 0f3f800000;   	// 1
$Lt_31_204802:
	.loc	22	255	0
	mov.f32 	%f410, 0f3f7d70a4;   	// 0.99
	setp.ge.ftz.f32 	%p89, %f173, %f410;
	selp.f32 	%f168, %f166, %f168, %p89;
	.loc	22	260	0
	mov.f32 	%f411, 0f42c80000;   	// 100
	div.approx.ftz.f32 	%f412, %f168, %f411;
	sub.ftz.f32 	%f361, %f25, %f412;
	.loc	22	261	0
	mul.ftz.f32 	%f362, %f18, %f173;
	.loc	22	262	0
	mul.ftz.f32 	%f363, %f11, %f173;
$Lt_31_204290:
	sub.ftz.f32 	%f83, %f35, %f64;
	ld.param.f32 	%f177, [__cudaparm_VideoLimiter_inSignalMin];
	setp.lt.ftz.f32 	%p39, %f83, %f177;
	sub.ftz.f32 	%f178, %f177, %f83;
	mov.f32 	%f413, 0f00000000;   	// 0
	selp.f32 	%f180, %f178, %f413, %p39;
	mov.f32 	%f414, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p90, %f180, %f414;
	@!%p90 bra 	$Lt_31_205314;
	.loc	22	267	0
	mul.ftz.f32 	%f182, %f180, %f156;
	.loc	22	268	0
	mul.ftz.f32 	%f183, %f180, %f155;
	mov.f32 	%f415, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p91, %f64, %f415;
	@!%p91 bra 	$Lt_31_206082;
	.loc	22	275	0
	mov.f32 	%f416, 0f3f800000;   	// 1
	div.approx.ftz.f32 	%f417, %f183, %f64;
	sub.ftz.f32 	%f187, %f416, %f417;
	.loc	22	276	0
	bra.uni 	$Lt_31_205826;
$Lt_31_206082:
	mov.f32 	%f187, 0f3f800000;   	// 1
$Lt_31_205826:
	.loc	22	281	0
	mov.f32 	%f418, 0f3f7d70a4;   	// 0.99
	setp.ge.ftz.f32 	%p92, %f187, %f418;
	selp.f32 	%f182, %f180, %f182, %p92;
	.loc	22	286	0
	mov.f32 	%f419, 0f42c80000;   	// 100
	div.approx.ftz.f32 	%f420, %f182, %f419;
	add.ftz.f32 	%f361, %f25, %f420;
	.loc	22	287	0
	mul.ftz.f32 	%f362, %f18, %f187;
	.loc	22	288	0
	mul.ftz.f32 	%f363, %f11, %f187;
$Lt_31_205314:
	.loc	22	542	0
	ld.param.f32 	%f421, [__cudaparm_VideoLimiter_inShadowThresholdSoftness];
	setp.gt.ftz.f32 	%p20, %f421, %f26;
	mov.f32 	%f422, 0f40000000;   	// 2
	mov.f32 	%f423, 0f3f800000;   	// 1
	selp.f32 	%f75, %f422, %f423, %p20;
	div.approx.ftz.f32 	%f343, %f28, %f75;
	sub.ftz.f32 	%f424, %f361, %f25;
	fma.rn.ftz.f32 	%f25, %f343, %f424, %f25;
	.loc	22	543	0
	sub.ftz.f32 	%f425, %f362, %f18;
	fma.rn.ftz.f32 	%f18, %f343, %f425, %f18;
	.loc	22	544	0
	sub.ftz.f32 	%f426, %f363, %f11;
	fma.rn.ftz.f32 	%f11, %f343, %f426, %f11;
	bra.uni 	$Lt_31_221186;
$Lt_31_1282:
	.loc	22	549	0
	ld.param.s32 	%r34, [__cudaparm_VideoLimiter_inReductionAxis];
	mov.s32 	%r202, 0;
	set.eq.u32.s32 	%r203, %r34, %r202;
	neg.s32 	%r204, %r203;
	mov.s32 	%r205, 2;
	set.eq.u32.s32 	%r206, %r34, %r205;
	neg.s32 	%r207, %r206;
	or.b32 	%r208, %r204, %r207;
	mov.u32 	%r209, 0;
	setp.eq.s32 	%p93, %r208, %r209;
	@%p93 bra 	$Lt_31_207362;
	mov.f32 	%f427, 0f42c80000;   	// 100
	mul.ftz.f32 	%f35, %f24, %f427;
	ld.param.f32 	%f36, [__cudaparm_VideoLimiter_inLumaMax];
	setp.gt.ftz.f32 	%p94, %f35, %f36;
	@!%p94 bra 	$Lt_31_207106;
	.loc	22	555	0
	mov.f32 	%f428, 0f42c80000;   	// 100
	div.approx.ftz.f32 	%f429, %f36, %f428;
	sub.ftz.f32 	%f430, %f24, %f429;
	.loc	22	556	0
	ld.param.f32 	%f431, [__cudaparm_VideoLimiter_inShadowThresholdSoftness];
	setp.gt.ftz.f32 	%p20, %f431, %f26;
	mov.f32 	%f432, 0f40000000;   	// 2
	mov.f32 	%f433, 0f3f800000;   	// 1
	selp.f32 	%f75, %f432, %f433, %p20;
	div.approx.ftz.f32 	%f434, %f32, %f75;
	mul.ftz.f32 	%f435, %f430, %f434;
	sub.ftz.f32 	%f436, %f24, %f435;
	.loc	22	557	0
	div.approx.ftz.f32 	%f437, %f28, %f75;
	mul.ftz.f32 	%f438, %f430, %f437;
	sub.ftz.f32 	%f25, %f436, %f438;
	bra.uni 	$Lt_31_207362;
$Lt_31_207106:
	ld.param.f32 	%f46, [__cudaparm_VideoLimiter_inLumaMin];
	setp.lt.ftz.f32 	%p95, %f35, %f46;
	@!%p95 bra 	$Lt_31_207362;
	.loc	22	561	0
	mov.f32 	%f439, 0f42c80000;   	// 100
	div.approx.ftz.f32 	%f440, %f46, %f439;
	sub.ftz.f32 	%f441, %f440, %f24;
	.loc	22	562	0
	ld.param.f32 	%f442, [__cudaparm_VideoLimiter_inShadowThresholdSoftness];
	setp.gt.ftz.f32 	%p20, %f442, %f26;
	mov.f32 	%f443, 0f40000000;   	// 2
	mov.f32 	%f444, 0f3f800000;   	// 1
	selp.f32 	%f75, %f443, %f444, %p20;
	div.approx.ftz.f32 	%f445, %f32, %f75;
	fma.rn.ftz.f32 	%f446, %f441, %f445, %f24;
	.loc	22	563	0
	div.approx.ftz.f32 	%f447, %f28, %f75;
	fma.rn.ftz.f32 	%f25, %f441, %f447, %f446;
$Lt_31_207362:
$Lt_31_206850:
$Lt_31_206338:
	sub.u32 	%r210, %r34, 1;
	mov.u32 	%r211, 1;
	setp.gt.u32 	%p96, %r210, %r211;
	@%p96 bra 	$Lt_31_209410;
	mov.f32 	%f448, 0f3f9d70a4;   	// 1.23
	mul.ftz.f32 	%f56, %f10, %f448;
	mov.f32 	%f449, 0f3f5f3cb4;   	// 0.87202
	mul.ftz.f32 	%f58, %f17, %f449;
	mov.f32 	%f450, 0f42c80000;   	// 100
	mul.ftz.f32 	%f35, %f24, %f450;
	mul.ftz.f32 	%f60, %f56, %f56;
	fma.rn.ftz.f32 	%f61, %f58, %f58, %f60;
	sqrt.approx.ftz.f32 	%f62, %f61;
	mov.f32 	%f451, 0f42c80000;   	// 100
	mul.ftz.f32 	%f64, %f62, %f451;
	add.ftz.f32 	%f65, %f64, %f35;
	ld.param.f32 	%f66, [__cudaparm_VideoLimiter_inChromaMax];
	setp.gt.ftz.f32 	%p97, %f65, %f66;
	@!%p97 bra 	$Lt_31_208386;
	mov.f32 	%f452, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p98, %f64, %f452;
	@!%p98 bra 	$Lt_31_209154;
	.loc	27	529	0
	sub.ftz.f32 	%f453, %f65, %f66;
	div.approx.ftz.f32 	%f69, %f453, %f64;
	.loc	22	573	0
	mov.f32 	%f454, 0f3f800000;   	// 1
	sub.ftz.f32 	%f455, %f454, %f69;
	bra.uni 	$Lt_31_208898;
$Lt_31_209154:
	mov.f32 	%f455, 0f00000000;   	// 0
$Lt_31_208898:
	.loc	22	575	0
	ld.param.f32 	%f456, [__cudaparm_VideoLimiter_inShadowThresholdSoftness];
	setp.gt.ftz.f32 	%p20, %f456, %f26;
	mov.f32 	%f457, 0f40000000;   	// 2
	mov.f32 	%f458, 0f3f800000;   	// 1
	selp.f32 	%f75, %f457, %f458, %p20;
	div.approx.ftz.f32 	%f76, %f32, %f75;
	mul.ftz.f32 	%f459, %f17, %f455;
	sub.ftz.f32 	%f460, %f17, %f459;
	mul.ftz.f32 	%f461, %f76, %f460;
	sub.ftz.f32 	%f462, %f17, %f461;
	.loc	22	576	0
	mul.ftz.f32 	%f463, %f10, %f455;
	sub.ftz.f32 	%f464, %f10, %f463;
	mul.ftz.f32 	%f465, %f76, %f464;
	sub.ftz.f32 	%f466, %f10, %f465;
	.loc	22	577	0
	div.approx.ftz.f32 	%f343, %f28, %f75;
	mul.ftz.f32 	%f467, %f462, %f455;
	sub.ftz.f32 	%f468, %f462, %f467;
	mul.ftz.f32 	%f469, %f343, %f468;
	sub.ftz.f32 	%f18, %f462, %f469;
	.loc	22	578	0
	mul.ftz.f32 	%f470, %f466, %f455;
	sub.ftz.f32 	%f471, %f466, %f470;
	mul.ftz.f32 	%f472, %f343, %f471;
	sub.ftz.f32 	%f11, %f466, %f472;
$Lt_31_208386:
	sub.ftz.f32 	%f83, %f35, %f64;
	ld.param.f32 	%f84, [__cudaparm_VideoLimiter_inChromaMin];
	setp.lt.ftz.f32 	%p99, %f83, %f84;
	@!%p99 bra 	$Lt_31_209410;
	mov.f32 	%f473, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p100, %f64, %f473;
	@!%p100 bra 	$Lt_31_210178;
	.loc	27	529	0
	sub.ftz.f32 	%f474, %f84, %f83;
	div.approx.ftz.f32 	%f87, %f474, %f64;
	.loc	22	584	0
	mov.f32 	%f475, 0f3f800000;   	// 1
	sub.ftz.f32 	%f476, %f475, %f87;
	bra.uni 	$Lt_31_209922;
$Lt_31_210178:
	mov.f32 	%f476, 0f00000000;   	// 0
$Lt_31_209922:
	.loc	22	586	0
	ld.param.f32 	%f477, [__cudaparm_VideoLimiter_inShadowThresholdSoftness];
	setp.gt.ftz.f32 	%p20, %f477, %f26;
	mov.f32 	%f478, 0f40000000;   	// 2
	mov.f32 	%f479, 0f3f800000;   	// 1
	selp.f32 	%f75, %f478, %f479, %p20;
	div.approx.ftz.f32 	%f76, %f32, %f75;
	mul.ftz.f32 	%f480, %f18, %f476;
	sub.ftz.f32 	%f481, %f18, %f480;
	fma.rn.ftz.f32 	%f482, %f76, %f481, %f18;
	.loc	22	587	0
	mul.ftz.f32 	%f483, %f11, %f476;
	sub.ftz.f32 	%f484, %f11, %f483;
	fma.rn.ftz.f32 	%f485, %f76, %f484, %f11;
	.loc	22	588	0
	div.approx.ftz.f32 	%f343, %f28, %f75;
	mul.ftz.f32 	%f486, %f482, %f476;
	sub.ftz.f32 	%f487, %f482, %f486;
	fma.rn.ftz.f32 	%f18, %f343, %f487, %f482;
	.loc	22	589	0
	mul.ftz.f32 	%f488, %f485, %f476;
	sub.ftz.f32 	%f489, %f485, %f488;
	fma.rn.ftz.f32 	%f11, %f343, %f489, %f485;
$Lt_31_209410:
$Lt_31_207874:
	mov.u32 	%r212, 3;
	setp.ne.s32 	%p101, %r34, %r212;
	@%p101 bra 	$Lt_31_221186;
	.loc	22	595	0
	mov.f32 	%f490, %f25;
	.loc	22	596	0
	mov.f32 	%f491, %f18;
	.loc	22	597	0
	mov.f32 	%f492, %f11;
	.loc	22	122	0
	mov.f32 	%f493, 0f3f728f61;   	// 0.9475
	mul.ftz.f32 	%f101, %f11, %f493;
	mov.f32 	%f494, 0f3ea3b6e9;   	// 0.319755
	mul.ftz.f32 	%f103, %f11, %f494;
	mov.f32 	%f495, 0fbea3b6e9;   	// -0.319755
	fma.rn.ftz.f32 	%f105, %f495, %f18, %f101;
	mov.f32 	%f496, 0fbf728f61;   	// -0.9475
	mul.ftz.f32 	%f497, %f496, %f18;
	sub.ftz.f32 	%f108, %f497, %f103;
	abs.ftz.f32 	%f109, %f105;
	abs.ftz.f32 	%f110, %f108;
	mov.b32 	%r46, %f108;
	and.b32 	%r47, %r46, -2147483648;
	mov.f32 	%f498, 0f00000000;   	// 0
	set.eq.ftz.u32.f32 	%r213, %f109, %f498;
	neg.s32 	%r214, %r213;
	mov.f32 	%f499, 0f00000000;   	// 0
	set.eq.ftz.u32.f32 	%r215, %f110, %f499;
	neg.s32 	%r216, %r215;
	and.b32 	%r217, %r214, %r216;
	mov.u32 	%r218, 0;
	setp.eq.s32 	%p102, %r217, %r218;
	@%p102 bra 	$Lt_31_211202;
	.loc	25	1842	0
	mov.s32 	%r219, 1078530011;
	mov.s32 	%r220, 0;
	mov.b32 	%r221, %f105;
	mov.s32 	%r222, 0;
	setp.lt.s32 	%p103, %r221, %r222;
	selp.s32 	%r223, %r219, %r220, %p103;
	or.b32 	%r224, %r223, %r47;
	mov.b32 	%f113, %r224;
	bra.uni 	$Lt_31_211458;
$Lt_31_211202:
	mov.f32 	%f500, 0f7f800000;   	// 1.#INF
	set.eq.ftz.u32.f32 	%r225, %f109, %f500;
	neg.s32 	%r226, %r225;
	mov.f32 	%f501, 0f7f800000;   	// 1.#INF
	set.eq.ftz.u32.f32 	%r227, %f110, %f501;
	neg.s32 	%r228, %r227;
	and.b32 	%r229, %r226, %r228;
	mov.u32 	%r230, 0;
	setp.eq.s32 	%p104, %r229, %r230;
	@%p104 bra 	$Lt_31_211714;
	.loc	25	1845	0
	mov.s32 	%r231, 1075235812;
	mov.s32 	%r232, 1061752795;
	mov.b32 	%r233, %f105;
	mov.s32 	%r234, 0;
	setp.lt.s32 	%p105, %r233, %r234;
	selp.s32 	%r235, %r231, %r232, %p105;
	or.b32 	%r236, %r235, %r47;
	mov.b32 	%f113, %r236;
	bra.uni 	$Lt_31_211458;
$Lt_31_211714:
	.loc	25	1207	0
	min.ftz.f32 	%f116, %f109, %f110;
	max.ftz.f32 	%f117, %f109, %f110;
	div.full.ftz.f32 	%f118, %f116, %f117;
	mul.ftz.f32 	%f119, %f118, %f118;
	mov.f32 	%f502, 0fbf52c7ea;   	// -0.823363
	mov.f32 	%f503, 0fc0b59883;   	// -5.67487
	fma.rn.ftz.f32 	%f504, %f119, %f502, %f503;
	mov.f32 	%f505, 0fc0d21907;   	// -6.56556
	fma.rn.ftz.f32 	%f506, %f504, %f119, %f505;
	mul.ftz.f32 	%f507, %f119, %f506;
	mul.ftz.f32 	%f126, %f118, %f507;
	.loc	25	1211	0
	mov.f32 	%f508, 0f41355dc0;   	// 11.3354
	add.ftz.f32 	%f509, %f119, %f508;
	mov.f32 	%f510, 0f41e6bd60;   	// 28.8425
	fma.rn.ftz.f32 	%f511, %f509, %f119, %f510;
	mov.f32 	%f512, 0f419d92c8;   	// 19.6967
	fma.rn.ftz.f32 	%f513, %f511, %f119, %f512;
	rcp.approx.ftz.f32 	%f133, %f513;
	.loc	25	1856	0
	setp.lt.ftz.f32 	%p28, %f109, %f110;
	fma.rn.ftz.f32 	%f134, %f126, %f133, %f118;
	mov.f32 	%f514, 0f3fc90fdb;   	// 1.5708
	sub.ftz.f32 	%f136, %f514, %f134;
	selp.f32 	%f137, %f136, %f134, %p28;
	mov.f32 	%f515, 0f40490fdb;   	// 3.14159
	sub.ftz.f32 	%f516, %f515, %f137;
	mov.f32 	%f517, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p106, %f105, %f517;
	selp.f32 	%f518, %f516, %f137, %p106;
	mov.b32 	%r237, %f518;
	or.b32 	%r238, %r47, %r237;
	mov.b32 	%f519, %r238;
	add.ftz.f32 	%f143, %f105, %f108;
	mov.f32 	%f520, 0f7f800000;   	// 1.#INF
	setp.le.ftz.f32 	%p107, %f143, %f520;
	selp.f32 	%f113, %f519, %f143, %p107;
$Lt_31_211458:
$Lt_31_210946:
	.loc	22	179	0
	mov.f32 	%f521, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p31, %f113, %f521;
	mov.f32 	%f522, 0f40c90fdb;   	// 6.28319
	add.ftz.f32 	%f147, %f113, %f522;
	selp.f32 	%f148, %f147, %f113, %p31;
	mov.f32 	%f523, 0f40c90fdb;   	// 6.28319
	div.approx.ftz.f32 	%f150, %f148, %f523;
	mov.f32 	%f524, 0f3f6aacda;   	// 0.9167
	set.ge.ftz.u32.f32 	%r239, %f150, %f524;
	neg.s32 	%r240, %r239;
	mov.f32 	%f525, 0f3f8147ae;   	// 1.01
	set.lt.ftz.u32.f32 	%r241, %f150, %f525;
	neg.s32 	%r242, %r241;
	and.b32 	%r243, %r240, %r242;
	mov.u32 	%r244, 0;
	setp.ne.s32 	%p108, %r243, %r244;
	@%p108 bra 	$Lt_31_95490;
	mov.f32 	%f526, 0f00000000;   	// 0
	set.ge.ftz.u32.f32 	%r245, %f150, %f526;
	neg.s32 	%r246, %r245;
	mov.f32 	%f527, 0f3daa9931;   	// 0.0833
	set.lt.ftz.u32.f32 	%r247, %f150, %f527;
	neg.s32 	%r248, %r247;
	and.b32 	%r249, %r246, %r248;
	mov.u32 	%r250, 0;
	setp.eq.s32 	%p109, %r249, %r250;
	@%p109 bra 	$Lt_31_95746;
$Lt_31_95490:
	mov.f32 	%f155, 0f3f4ccccd;   	// 0.8
	mov.f32 	%f156, 0f3e4ccccd;   	// 0.2
	bra.uni 	$Lt_31_211970;
$Lt_31_95746:
	.loc	22	185	0
	mov.f32 	%f528, 0f3dab6ae7;   	// 0.0837
	set.ge.ftz.u32.f32 	%r251, %f150, %f528;
	neg.s32 	%r252, %r251;
	mov.f32 	%f529, 0f3e802752;   	// 0.2503
	set.lt.ftz.u32.f32 	%r253, %f150, %f529;
	neg.s32 	%r254, %r253;
	and.b32 	%r255, %r252, %r254;
	mov.u32 	%r256, 0;
	setp.eq.s32 	%p110, %r255, %r256;
	@%p110 bra 	$Lt_31_212226;
	mov.f32 	%f155, 0f3f000000;   	// 0.5
	mov.f32 	%f156, 0f3f000000;   	// 0.5
	bra.uni 	$Lt_31_211970;
$Lt_31_212226:
	.loc	22	190	0
	mov.f32 	%f155, 0f3f4ccccd;   	// 0.8
	mov.f32 	%f156, 0f3e4ccccd;   	// 0.2
$Lt_31_211970:
$Lt_31_97794:
	.loc	22	233	0
	mov.f32 	%f530, 0f3f9d70a4;   	// 1.23
	mul.ftz.f32 	%f56, %f10, %f530;
	mov.f32 	%f531, 0f3f5f3cb4;   	// 0.87202
	mul.ftz.f32 	%f58, %f17, %f531;
	mov.f32 	%f532, 0f42c80000;   	// 100
	mul.ftz.f32 	%f35, %f24, %f532;
	mul.ftz.f32 	%f60, %f56, %f56;
	fma.rn.ftz.f32 	%f61, %f58, %f58, %f60;
	sqrt.approx.ftz.f32 	%f62, %f61;
	mov.f32 	%f533, 0f42c80000;   	// 100
	mul.ftz.f32 	%f64, %f62, %f533;
	add.ftz.f32 	%f65, %f64, %f35;
	ld.param.f32 	%f163, [__cudaparm_VideoLimiter_inSignalMax];
	setp.gt.ftz.f32 	%p35, %f65, %f163;
	sub.ftz.f32 	%f164, %f65, %f163;
	mov.f32 	%f534, 0f00000000;   	// 0
	selp.f32 	%f166, %f164, %f534, %p35;
	mov.f32 	%f535, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p111, %f166, %f535;
	@!%p111 bra 	$Lt_31_214530;
	.loc	22	241	0
	mul.ftz.f32 	%f168, %f166, %f156;
	.loc	22	242	0
	mul.ftz.f32 	%f169, %f166, %f155;
	mov.f32 	%f536, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p112, %f64, %f536;
	@!%p112 bra 	$Lt_31_215298;
	.loc	22	249	0
	mov.f32 	%f537, 0f3f800000;   	// 1
	div.approx.ftz.f32 	%f538, %f169, %f64;
	sub.ftz.f32 	%f173, %f537, %f538;
	.loc	22	250	0
	bra.uni 	$Lt_31_215042;
$Lt_31_215298:
	mov.f32 	%f173, 0f3f800000;   	// 1
$Lt_31_215042:
	.loc	22	255	0
	mov.f32 	%f539, 0f3f7d70a4;   	// 0.99
	setp.ge.ftz.f32 	%p113, %f173, %f539;
	selp.f32 	%f168, %f166, %f168, %p113;
	.loc	22	260	0
	mov.f32 	%f540, 0f42c80000;   	// 100
	div.approx.ftz.f32 	%f541, %f168, %f540;
	sub.ftz.f32 	%f490, %f25, %f541;
	.loc	22	261	0
	mul.ftz.f32 	%f491, %f18, %f173;
	.loc	22	262	0
	mul.ftz.f32 	%f492, %f11, %f173;
$Lt_31_214530:
	sub.ftz.f32 	%f83, %f35, %f64;
	ld.param.f32 	%f177, [__cudaparm_VideoLimiter_inSignalMin];
	setp.lt.ftz.f32 	%p39, %f83, %f177;
	sub.ftz.f32 	%f178, %f177, %f83;
	mov.f32 	%f542, 0f00000000;   	// 0
	selp.f32 	%f180, %f178, %f542, %p39;
	mov.f32 	%f543, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p114, %f180, %f543;
	@!%p114 bra 	$Lt_31_215554;
	.loc	22	267	0
	mul.ftz.f32 	%f182, %f180, %f156;
	.loc	22	268	0
	mul.ftz.f32 	%f183, %f180, %f155;
	mov.f32 	%f544, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p115, %f64, %f544;
	@!%p115 bra 	$Lt_31_216322;
	.loc	22	275	0
	mov.f32 	%f545, 0f3f800000;   	// 1
	div.approx.ftz.f32 	%f546, %f183, %f64;
	sub.ftz.f32 	%f187, %f545, %f546;
	.loc	22	276	0
	bra.uni 	$Lt_31_216066;
$Lt_31_216322:
	mov.f32 	%f187, 0f3f800000;   	// 1
$Lt_31_216066:
	.loc	22	281	0
	mov.f32 	%f547, 0f3f7d70a4;   	// 0.99
	setp.ge.ftz.f32 	%p116, %f187, %f547;
	selp.f32 	%f182, %f180, %f182, %p116;
	.loc	22	286	0
	mov.f32 	%f548, 0f42c80000;   	// 100
	div.approx.ftz.f32 	%f549, %f182, %f548;
	add.ftz.f32 	%f490, %f25, %f549;
	.loc	22	287	0
	mul.ftz.f32 	%f491, %f18, %f187;
	.loc	22	288	0
	mul.ftz.f32 	%f492, %f11, %f187;
$Lt_31_215554:
	.loc	22	613	0
	ld.param.f32 	%f550, [__cudaparm_VideoLimiter_inShadowThresholdSoftness];
	setp.gt.ftz.f32 	%p20, %f550, %f26;
	mov.f32 	%f551, 0f40000000;   	// 2
	mov.f32 	%f552, 0f3f800000;   	// 1
	selp.f32 	%f75, %f551, %f552, %p20;
	div.approx.ftz.f32 	%f76, %f32, %f75;
	div.approx.ftz.f32 	%f343, %f28, %f75;
	add.ftz.f32 	%f553, %f343, %f76;
	sub.ftz.f32 	%f554, %f490, %f25;
	fma.rn.ftz.f32 	%f25, %f553, %f554, %f25;
	.loc	22	614	0
	sub.ftz.f32 	%f555, %f491, %f18;
	fma.rn.ftz.f32 	%f18, %f553, %f555, %f18;
	.loc	22	615	0
	sub.ftz.f32 	%f556, %f492, %f11;
	fma.rn.ftz.f32 	%f11, %f553, %f556, %f11;
	bra.uni 	$Lt_31_221186;
$Lt_31_1538:
	.loc	22	620	0
	ld.param.s32 	%r34, [__cudaparm_VideoLimiter_inReductionAxis];
	mov.s32 	%r257, 0;
	set.eq.u32.s32 	%r258, %r34, %r257;
	neg.s32 	%r259, %r258;
	mov.s32 	%r260, 2;
	set.eq.u32.s32 	%r261, %r34, %r260;
	neg.s32 	%r262, %r261;
	or.b32 	%r263, %r259, %r262;
	mov.u32 	%r264, 0;
	setp.eq.s32 	%p117, %r263, %r264;
	@%p117 bra 	$Lt_31_217602;
	mov.f32 	%f557, 0f42c80000;   	// 100
	mul.ftz.f32 	%f35, %f24, %f557;
	ld.param.f32 	%f36, [__cudaparm_VideoLimiter_inLumaMax];
	setp.gt.ftz.f32 	%p118, %f35, %f36;
	@!%p118 bra 	$Lt_31_217346;
	.loc	22	626	0
	mov.f32 	%f558, 0f42c80000;   	// 100
	div.approx.ftz.f32 	%f25, %f36, %f558;
	bra.uni 	$Lt_31_217602;
$Lt_31_217346:
	ld.param.f32 	%f46, [__cudaparm_VideoLimiter_inLumaMin];
	setp.lt.ftz.f32 	%p119, %f35, %f46;
	@!%p119 bra 	$Lt_31_217602;
	.loc	22	630	0
	mov.f32 	%f559, 0f42c80000;   	// 100
	div.approx.ftz.f32 	%f25, %f46, %f559;
$Lt_31_217602:
$Lt_31_217090:
$Lt_31_216578:
	sub.u32 	%r265, %r34, 1;
	mov.u32 	%r266, 1;
	setp.gt.u32 	%p120, %r265, %r266;
	@%p120 bra 	$Lt_31_219650;
	mov.f32 	%f560, 0f3f9d70a4;   	// 1.23
	mul.ftz.f32 	%f56, %f10, %f560;
	mov.f32 	%f561, 0f3f5f3cb4;   	// 0.87202
	mul.ftz.f32 	%f58, %f17, %f561;
	mov.f32 	%f562, 0f42c80000;   	// 100
	mul.ftz.f32 	%f35, %f24, %f562;
	mul.ftz.f32 	%f60, %f56, %f56;
	fma.rn.ftz.f32 	%f61, %f58, %f58, %f60;
	sqrt.approx.ftz.f32 	%f62, %f61;
	mov.f32 	%f563, 0f42c80000;   	// 100
	mul.ftz.f32 	%f64, %f62, %f563;
	add.ftz.f32 	%f65, %f64, %f35;
	ld.param.f32 	%f66, [__cudaparm_VideoLimiter_inChromaMax];
	setp.gt.ftz.f32 	%p121, %f65, %f66;
	@!%p121 bra 	$Lt_31_218626;
	mov.f32 	%f564, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p122, %f64, %f564;
	@!%p122 bra 	$Lt_31_219394;
	.loc	27	529	0
	sub.ftz.f32 	%f565, %f65, %f66;
	div.approx.ftz.f32 	%f69, %f565, %f64;
	.loc	22	640	0
	mov.f32 	%f566, 0f3f800000;   	// 1
	sub.ftz.f32 	%f567, %f566, %f69;
	bra.uni 	$Lt_31_219138;
$Lt_31_219394:
	mov.f32 	%f567, 0f3f800000;   	// 1
$Lt_31_219138:
	.loc	22	645	0
	mov.f32 	%f568, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p123, %f567, %f568;
	mov.f32 	%f569, 0f00000000;   	// 0
	selp.f32 	%f570, %f569, %f567, %p123;
	mul.ftz.f32 	%f18, %f570, %f17;
	.loc	22	646	0
	mul.ftz.f32 	%f11, %f570, %f10;
$Lt_31_218626:
	sub.ftz.f32 	%f83, %f35, %f64;
	ld.param.f32 	%f84, [__cudaparm_VideoLimiter_inChromaMin];
	setp.lt.ftz.f32 	%p124, %f83, %f84;
	@!%p124 bra 	$Lt_31_219650;
	mov.f32 	%f571, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p125, %f64, %f571;
	@!%p125 bra 	$Lt_31_220418;
	.loc	27	529	0
	sub.ftz.f32 	%f572, %f84, %f83;
	div.approx.ftz.f32 	%f87, %f572, %f64;
	.loc	22	652	0
	mov.f32 	%f573, 0f3f800000;   	// 1
	sub.ftz.f32 	%f574, %f573, %f87;
	bra.uni 	$Lt_31_220162;
$Lt_31_220418:
	mov.f32 	%f574, 0f3f800000;   	// 1
$Lt_31_220162:
	.loc	22	657	0
	mov.f32 	%f575, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p126, %f574, %f575;
	mov.f32 	%f576, 0f00000000;   	// 0
	selp.f32 	%f577, %f576, %f574, %p126;
	mul.ftz.f32 	%f18, %f577, %f18;
	.loc	22	658	0
	mul.ftz.f32 	%f11, %f577, %f11;
$Lt_31_219650:
$Lt_31_218114:
	mov.u32 	%r267, 3;
	setp.ne.s32 	%p127, %r34, %r267;
	@%p127 bra 	$Lt_31_221186;
	mov.f32 	%f578, 0f3f9d70a4;   	// 1.23
	mul.ftz.f32 	%f56, %f10, %f578;
	mov.f32 	%f579, 0f3f5f3cb4;   	// 0.87202
	mul.ftz.f32 	%f58, %f17, %f579;
	mov.f32 	%f580, 0f42c80000;   	// 100
	mul.ftz.f32 	%f35, %f24, %f580;
	mul.ftz.f32 	%f60, %f56, %f56;
	fma.rn.ftz.f32 	%f61, %f58, %f58, %f60;
	sqrt.approx.ftz.f32 	%f62, %f61;
	mov.f32 	%f581, 0f42c80000;   	// 100
	mul.ftz.f32 	%f64, %f62, %f581;
	add.ftz.f32 	%f65, %f64, %f35;
	sub.ftz.f32 	%f83, %f35, %f64;
	ld.param.f32 	%f163, [__cudaparm_VideoLimiter_inSignalMax];
	setp.gt.ftz.f32 	%p35, %f65, %f163;
	ld.param.f32 	%f177, [__cudaparm_VideoLimiter_inSignalMin];
	setp.lt.ftz.f32 	%p39, %f83, %f177;
	selp.s32 	%r268, 1, 0, %p39;
	selp.s32 	%r269, 1, 0, %p35;
	or.b32 	%r270, %r268, %r269;
	mov.u32 	%r271, 0;
	setp.eq.s32 	%p128, %r270, %r271;
	@%p128 bra 	$Lt_31_221186;
	.loc	22	666	0
	mov.f32 	%f582, %f25;
	.loc	22	667	0
	mov.f32 	%f583, %f18;
	.loc	22	668	0
	mov.f32 	%f584, %f11;
	.loc	22	122	0
	mov.f32 	%f585, 0f3f728f61;   	// 0.9475
	mul.ftz.f32 	%f101, %f11, %f585;
	mov.f32 	%f586, 0f3ea3b6e9;   	// 0.319755
	mul.ftz.f32 	%f103, %f11, %f586;
	mov.f32 	%f587, 0fbea3b6e9;   	// -0.319755
	fma.rn.ftz.f32 	%f105, %f587, %f18, %f101;
	mov.f32 	%f588, 0fbf728f61;   	// -0.9475
	mul.ftz.f32 	%f589, %f588, %f18;
	sub.ftz.f32 	%f108, %f589, %f103;
	abs.ftz.f32 	%f109, %f105;
	abs.ftz.f32 	%f110, %f108;
	mov.b32 	%r46, %f108;
	and.b32 	%r47, %r46, -2147483648;
	mov.f32 	%f590, 0f00000000;   	// 0
	set.eq.ftz.u32.f32 	%r272, %f109, %f590;
	neg.s32 	%r273, %r272;
	mov.f32 	%f591, 0f00000000;   	// 0
	set.eq.ftz.u32.f32 	%r274, %f110, %f591;
	neg.s32 	%r275, %r274;
	and.b32 	%r276, %r273, %r275;
	mov.u32 	%r277, 0;
	setp.eq.s32 	%p129, %r276, %r277;
	@%p129 bra 	$Lt_31_221954;
	.loc	25	1842	0
	mov.s32 	%r278, 1078530011;
	mov.s32 	%r279, 0;
	mov.b32 	%r280, %f105;
	mov.s32 	%r281, 0;
	setp.lt.s32 	%p130, %r280, %r281;
	selp.s32 	%r282, %r278, %r279, %p130;
	or.b32 	%r283, %r282, %r47;
	mov.b32 	%f113, %r283;
	bra.uni 	$Lt_31_222210;
$Lt_31_221954:
	mov.f32 	%f592, 0f7f800000;   	// 1.#INF
	set.eq.ftz.u32.f32 	%r284, %f109, %f592;
	neg.s32 	%r285, %r284;
	mov.f32 	%f593, 0f7f800000;   	// 1.#INF
	set.eq.ftz.u32.f32 	%r286, %f110, %f593;
	neg.s32 	%r287, %r286;
	and.b32 	%r288, %r285, %r287;
	mov.u32 	%r289, 0;
	setp.eq.s32 	%p131, %r288, %r289;
	@%p131 bra 	$Lt_31_222466;
	.loc	25	1845	0
	mov.s32 	%r290, 1075235812;
	mov.s32 	%r291, 1061752795;
	mov.b32 	%r292, %f105;
	mov.s32 	%r293, 0;
	setp.lt.s32 	%p132, %r292, %r293;
	selp.s32 	%r294, %r290, %r291, %p132;
	or.b32 	%r295, %r294, %r47;
	mov.b32 	%f113, %r295;
	bra.uni 	$Lt_31_222210;
$Lt_31_222466:
	.loc	25	1207	0
	min.ftz.f32 	%f116, %f109, %f110;
	max.ftz.f32 	%f117, %f109, %f110;
	div.full.ftz.f32 	%f118, %f116, %f117;
	mul.ftz.f32 	%f119, %f118, %f118;
	mov.f32 	%f594, 0fbf52c7ea;   	// -0.823363
	mov.f32 	%f595, 0fc0b59883;   	// -5.67487
	fma.rn.ftz.f32 	%f596, %f119, %f594, %f595;
	mov.f32 	%f597, 0fc0d21907;   	// -6.56556
	fma.rn.ftz.f32 	%f598, %f596, %f119, %f597;
	mul.ftz.f32 	%f599, %f119, %f598;
	mul.ftz.f32 	%f126, %f118, %f599;
	.loc	25	1211	0
	mov.f32 	%f600, 0f41355dc0;   	// 11.3354
	add.ftz.f32 	%f601, %f119, %f600;
	mov.f32 	%f602, 0f41e6bd60;   	// 28.8425
	fma.rn.ftz.f32 	%f603, %f601, %f119, %f602;
	mov.f32 	%f604, 0f419d92c8;   	// 19.6967
	fma.rn.ftz.f32 	%f605, %f603, %f119, %f604;
	rcp.approx.ftz.f32 	%f133, %f605;
	.loc	25	1856	0
	setp.lt.ftz.f32 	%p28, %f109, %f110;
	fma.rn.ftz.f32 	%f134, %f126, %f133, %f118;
	mov.f32 	%f606, 0f3fc90fdb;   	// 1.5708
	sub.ftz.f32 	%f136, %f606, %f134;
	selp.f32 	%f137, %f136, %f134, %p28;
	mov.f32 	%f607, 0f40490fdb;   	// 3.14159
	sub.ftz.f32 	%f608, %f607, %f137;
	mov.f32 	%f609, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p133, %f105, %f609;
	selp.f32 	%f610, %f608, %f137, %p133;
	mov.b32 	%r296, %f610;
	or.b32 	%r297, %r47, %r296;
	mov.b32 	%f611, %r297;
	add.ftz.f32 	%f143, %f105, %f108;
	mov.f32 	%f612, 0f7f800000;   	// 1.#INF
	setp.le.ftz.f32 	%p134, %f143, %f612;
	selp.f32 	%f113, %f611, %f143, %p134;
$Lt_31_222210:
$Lt_31_221698:
	.loc	22	179	0
	mov.f32 	%f613, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p31, %f113, %f613;
	mov.f32 	%f614, 0f40c90fdb;   	// 6.28319
	add.ftz.f32 	%f147, %f113, %f614;
	selp.f32 	%f148, %f147, %f113, %p31;
	mov.f32 	%f615, 0f40c90fdb;   	// 6.28319
	div.approx.ftz.f32 	%f150, %f148, %f615;
	mov.f32 	%f616, 0f3f6aacda;   	// 0.9167
	set.ge.ftz.u32.f32 	%r298, %f150, %f616;
	neg.s32 	%r299, %r298;
	mov.f32 	%f617, 0f3f8147ae;   	// 1.01
	set.lt.ftz.u32.f32 	%r300, %f150, %f617;
	neg.s32 	%r301, %r300;
	and.b32 	%r302, %r299, %r301;
	mov.u32 	%r303, 0;
	setp.ne.s32 	%p135, %r302, %r303;
	@%p135 bra 	$Lt_31_121090;
	mov.f32 	%f618, 0f00000000;   	// 0
	set.ge.ftz.u32.f32 	%r304, %f150, %f618;
	neg.s32 	%r305, %r304;
	mov.f32 	%f619, 0f3daa9931;   	// 0.0833
	set.lt.ftz.u32.f32 	%r306, %f150, %f619;
	neg.s32 	%r307, %r306;
	and.b32 	%r308, %r305, %r307;
	mov.u32 	%r309, 0;
	setp.eq.s32 	%p136, %r308, %r309;
	@%p136 bra 	$Lt_31_121346;
$Lt_31_121090:
	mov.f32 	%f155, 0f3f4ccccd;   	// 0.8
	mov.f32 	%f156, 0f3e4ccccd;   	// 0.2
	bra.uni 	$Lt_31_222722;
$Lt_31_121346:
	.loc	22	185	0
	mov.f32 	%f620, 0f3dab6ae7;   	// 0.0837
	set.ge.ftz.u32.f32 	%r310, %f150, %f620;
	neg.s32 	%r311, %r310;
	mov.f32 	%f621, 0f3e802752;   	// 0.2503
	set.lt.ftz.u32.f32 	%r312, %f150, %f621;
	neg.s32 	%r313, %r312;
	and.b32 	%r314, %r311, %r313;
	mov.u32 	%r315, 0;
	setp.eq.s32 	%p137, %r314, %r315;
	@%p137 bra 	$Lt_31_222978;
	mov.f32 	%f155, 0f3f000000;   	// 0.5
	mov.f32 	%f156, 0f3f000000;   	// 0.5
	bra.uni 	$Lt_31_222722;
$Lt_31_222978:
	.loc	22	190	0
	mov.f32 	%f155, 0f3f4ccccd;   	// 0.8
	mov.f32 	%f156, 0f3e4ccccd;   	// 0.2
$Lt_31_222722:
$Lt_31_123394:
	.loc	22	233	0
	sub.ftz.f32 	%f164, %f65, %f163;
	mov.f32 	%f622, 0f00000000;   	// 0
	selp.f32 	%f166, %f164, %f622, %p35;
	mov.f32 	%f623, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p138, %f166, %f623;
	@!%p138 bra 	$Lt_31_225282;
	.loc	22	241	0
	mul.ftz.f32 	%f168, %f166, %f156;
	.loc	22	242	0
	mul.ftz.f32 	%f169, %f166, %f155;
	mov.f32 	%f624, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p139, %f64, %f624;
	@!%p139 bra 	$Lt_31_226050;
	.loc	22	249	0
	mov.f32 	%f625, 0f3f800000;   	// 1
	div.approx.ftz.f32 	%f626, %f169, %f64;
	sub.ftz.f32 	%f173, %f625, %f626;
	.loc	22	250	0
	bra.uni 	$Lt_31_225794;
$Lt_31_226050:
	mov.f32 	%f173, 0f3f800000;   	// 1
$Lt_31_225794:
	.loc	22	255	0
	mov.f32 	%f627, 0f3f7d70a4;   	// 0.99
	setp.ge.ftz.f32 	%p140, %f173, %f627;
	selp.f32 	%f168, %f166, %f168, %p140;
	.loc	22	260	0
	mov.f32 	%f628, 0f42c80000;   	// 100
	div.approx.ftz.f32 	%f629, %f168, %f628;
	sub.ftz.f32 	%f582, %f25, %f629;
	.loc	22	261	0
	mul.ftz.f32 	%f583, %f18, %f173;
	.loc	22	262	0
	mul.ftz.f32 	%f584, %f11, %f173;
$Lt_31_225282:
	sub.ftz.f32 	%f178, %f177, %f83;
	mov.f32 	%f630, 0f00000000;   	// 0
	selp.f32 	%f180, %f178, %f630, %p39;
	mov.f32 	%f631, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p141, %f180, %f631;
	@!%p141 bra 	$Lt_31_226306;
	.loc	22	267	0
	mul.ftz.f32 	%f182, %f180, %f156;
	.loc	22	268	0
	mul.ftz.f32 	%f183, %f180, %f155;
	mov.f32 	%f632, 0f00000000;   	// 0
	setp.neu.ftz.f32 	%p142, %f64, %f632;
	@!%p142 bra 	$Lt_31_227074;
	.loc	22	275	0
	mov.f32 	%f633, 0f3f800000;   	// 1
	div.approx.ftz.f32 	%f634, %f183, %f64;
	sub.ftz.f32 	%f187, %f633, %f634;
	.loc	22	276	0
	bra.uni 	$Lt_31_226818;
$Lt_31_227074:
	mov.f32 	%f187, 0f3f800000;   	// 1
$Lt_31_226818:
	.loc	22	281	0
	mov.f32 	%f635, 0f3f7d70a4;   	// 0.99
	setp.ge.ftz.f32 	%p143, %f187, %f635;
	selp.f32 	%f182, %f180, %f182, %p143;
	.loc	22	286	0
	mov.f32 	%f636, 0f42c80000;   	// 100
	div.approx.ftz.f32 	%f637, %f182, %f636;
	add.ftz.f32 	%f582, %f25, %f637;
	.loc	22	287	0
	mul.ftz.f32 	%f583, %f18, %f187;
	.loc	22	288	0
	mul.ftz.f32 	%f584, %f11, %f187;
$Lt_31_226306:
	.loc	22	684	0
	mov.f32 	%f25, %f582;
	.loc	22	685	0
	mov.f32 	%f18, %f583;
	.loc	22	686	0
	mov.f32 	%f11, %f584;
$Lt_31_221186:
$Lt_31_220674:
$Lt_31_514:
	.loc	22	693	0
	ld.const.f32 	%f638, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f639, %f638, %f18;
	ld.const.f32 	%f640, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f641, %f640, %f18;
	ld.const.f32 	%f642, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f643, %f642, %f18;
	ld.const.f32 	%f644, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f645, %f644, %f25, %f639;
	ld.const.f32 	%f646, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f647, %f646, %f25, %f641;
	ld.const.f32 	%f648, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f649, %f648, %f25, %f643;
	ld.const.f32 	%f650, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f651, %f650, %f11, %f645;
	ld.const.f32 	%f652, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f653, %f652, %f11, %f647;
	ld.const.f32 	%f654, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f655, %f654, %f11, %f649;
	@!%p2 bra 	$Lt_31_227586;
	.loc	20	126	0
	mul.lo.u64 	%rd7, %rd1, 8;
	add.u64 	%rd8, %rd2, %rd7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f651;
	mov.b32		%r316, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f653;
	mov.b32		%r317, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f655;
	mov.b32		%r318, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r319, %b1; }
	st.global.v4.u16 	[%rd8+0], {%r316,%r317,%r318,%r319};
	.loc	22	693	0
	bra.uni 	$Lt_31_227330;
$Lt_31_227586:
	.loc	20	126	0
	mul.lo.u64 	%rd9, %rd1, 16;
	add.u64 	%rd10, %rd2, %rd9;
	st.global.v4.f32 	[%rd10+0], {%f651,%f653,%f655,%f4};
$Lt_31_227330:
$Lt_31_174594:
	.loc	22	695	0
	exit;
$LDWend_VideoLimiter:
	} // VideoLimiter
	.const .align 4 .b8 kRGB32f_To_601YCbCr[36] = {70,246,130,66,145,141,0,67,94,186,199,65,33,48,23,194,240,103,148,194,0,0,224,66,0,0,224,66,111,146,187,194,70,182,145,193};
	.const .align 4 .b8 k601YCbCr_To_RGB32f[36] = {37,160,149,59,0,0,0,0,182,23,205,59,37,160,149,59,40,15,201,186,156,239,80,187,37,160,149,59,236,155,1,60,0,0,0,0};
	.const .align 4 .b8 kRGB8u_To_601YCbCr[36] = {219,121,131,62,152,14,1,63,18,131,200,61,174,199,23,190,238,252,148,190,197,224,224,62,197,224,224,62,217,78,188,190,174,71,146,189};
	.const .align 4 .b8 k601YCbCr_To_RGB8u[36] = {127,10,149,63,0,0,0,0,160,74,204,63,127,10,149,63,254,148,200,190,184,30,80,191,127,10,149,63,78,26,1,64,0,0,0,0};
	.const .align 4 .b8 kRGB8u_To_601YCbCrFullRange[36] = {135,22,153,62,162,69,22,63,213,120,233,61,166,27,44,190,39,241,168,190,250,254,254,62,250,254,254,62,43,135,213,190,59,223,165,189};
	.const .align 4 .b8 k601YCbCrFullRange_To_RGB8u[36] = {0,0,128,63,0,0,0,0,72,193,178,63,0,0,128,63,143,130,175,190,225,26,54,191,0,0,128,63,20,238,225,63,0,0,0,0};
	.const .align 4 .b8 kRGB32f_To_601YCbCrFullRange[36] = {113,125,152,66,92,175,21,67,92,143,232,65,158,111,43,194,49,72,168,194,0,0,254,66,0,0,254,66,170,177,212,194,88,57,165,193};
	.const .align 4 .b8 k601YCbCrFullRange_To_RGB32f[36] = {129,128,128,59,0,0,0,0,188,116,179,59,129,128,128,59,194,50,176,186,179,209,54,187,129,128,128,59,229,208,226,59,0,0,0,0};
	.const .align 4 .b8 kRGB32f_To_709YPbPr[36] = {208,179,89,62,89,23,55,63,152,221,147,61,186,164,234,189,210,86,197,190,0,0,0,63,0,0,0,63,190,134,232,190,16,202,59,189};
	.const .align 4 .b8 k709YPbPr_To_RGB32f[36] = {0,0,128,63,0,0,0,0,12,147,201,63,0,0,128,63,221,209,63,190,243,173,239,190,0,0,128,63,77,132,237,63,0,0,0,0};
	.const .align 4 .b8 kRGB32f_To_709YCbCr[36] = {106,60,58,66,6,161,28,67,244,253,124,65,223,79,205,193,8,172,172,194,0,0,224,66,0,0,224,66,195,117,203,194,236,81,36,193};
	.const .align 4 .b8 k709YCbCr_To_RGB32f[36] = {37,160,149,59,0,0,0,0,239,94,230,59,37,160,149,59,33,57,91,186,178,245,8,187,37,160,149,59,82,185,7,60,0,0,0,0};
	.const .align 4 .b8 kRGB8u_To_709YCbCr[36] = {207,247,58,62,53,62,29,63,231,251,125,61,147,24,206,61,23,89,173,190,197,224,224,62,197,224,224,62,12,66,204,190,195,245,36,189};
	.const .align 4 .b8 k709YCbCr_To_RGB8u[36] = {127,10,149,63,0,0,0,0,147,120,229,63,127,10,149,63,53,94,90,190,205,108,8,191,127,10,149,63,154,49,7,64,0,0,0,0};
	.const .align 4 .b8 k709YCbCr_To_601YCbCr[36] = {0,0,128,63,23,100,203,61,1,77,68,62,0,0,0,0,18,103,125,63,10,158,226,189,0,0,0,0,61,98,148,189,249,191,123,63};
	.const .align 4 .b8 k601YCbCr_To_709YCbCr[36] = {0,0,128,63,122,165,236,189,179,237,84,190,0,0,0,0,204,98,130,63,216,188,234,61,0,0,0,0,74,179,153,61,234,61,131,63};
	.const .align 4 .b8 kYCbCrOffset[12] = {0,0,128,65,0,0,0,67,0,0,0,67};
	.const .align 4 .b8 kYCbCrFullRangeOffset[12] = {0,0,0,0,0,0,0,67,0,0,0,67};
	.const .align 4 .b8 kRGB32f_To_YIQ[36] = {135,22,153,62,162,69,22,63,213,120,233,61,216,128,24,63,27,133,140,190,149,124,164,190,236,135,88,62,134,200,5,191,22,77,159,62};
	.const .align 4 .b8 kYIQ_To_RGB32f[36] = {0,0,128,63,20,208,116,63,219,249,30,63,0,0,128,63,177,80,139,190,2,188,37,191,0,0,128,63,45,178,141,191,85,48,218,63};

