	.version 2.2
	.target sm_20
	// compiled with ../../../External/3rdParty/NVIDIA/CUDA/win/bin/../open64/lib//be.exe
	// nvopencc 3.2 built on 2010-11-04

	.visible .func (.param .f32 __cudaretf__Z22CalculateShadowsWeightfff) _Z22CalculateShadowsWeightfff (.param .f32 __cudaparmf1__Z22CalculateShadowsWeightfff, .param .f32 __cudaparmf2__Z22CalculateShadowsWeightfff, .param .f32 __cudaparmf3__Z22CalculateShadowsWeightfff)

	.visible .func (.param .f32 __cudaretf__Z25CalculateHighlightsWeightfff) _Z25CalculateHighlightsWeightfff (.param .f32 __cudaparmf1__Z25CalculateHighlightsWeightfff, .param .f32 __cudaparmf2__Z25CalculateHighlightsWeightfff, .param .f32 __cudaparmf3__Z25CalculateHighlightsWeightfff)

	.visible .func (.param .s32 __cudaretf__Z15IntegerMultiplyii) _Z15IntegerMultiplyii (.param .s32 __cudaparmf1__Z15IntegerMultiplyii, .param .s32 __cudaparmf2__Z15IntegerMultiplyii)

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelXv) _Z17Standard2DKernelXv ()

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelYv) _Z17Standard2DKernelYv ()

	.visible .func (.param .align 16 .b8 __cudaretf__Z13Half4ToFloat47ushort4[16]) _Z13Half4ToFloat47ushort4 (.param .align 8 .b8 __cudaparmf1__Z13Half4ToFloat47ushort4[8])

	.visible .func (.param .align 8 .b8 __cudaretf__Z13Float4ToHalf46float4[8]) _Z13Float4ToHalf46float4 (.param .align 16 .b8 __cudaparmf1__Z13Float4ToHalf46float4[16])

	.visible .func (.param .u32 __cudaretf__Z4Mix3RjS_S_) _Z4Mix3RjS_S_ (.param .u64 __cudaparmf1__Z4Mix3RjS_S_, .param .u64 __cudaparmf2__Z4Mix3RjS_S_, .param .u64 __cudaparmf3__Z4Mix3RjS_S_)

	.visible .func (.param .s32 __cudaretf__Z4Randj) _Z4Randj (.param .u32 __cudaparmf1__Z4Randj)

	.visible .func (.param .s32 __cudaretf__Z6Rand2Djjj) _Z6Rand2Djjj (.param .u32 __cudaparmf1__Z6Rand2Djjj, .param .u32 __cudaparmf2__Z6Rand2Djjj, .param .u32 __cudaparmf3__Z6Rand2Djjj)

	.visible .func (.param .s32 __cudaretf__Z6Rand2Dj) _Z6Rand2Dj (.param .u32 __cudaparmf1__Z6Rand2Dj)

	.visible .func (.param .align 8 .b8 __cudaretf__Z6Read2DI7ushort4ET_PKS1_iii[8]) _Z6Read2DI7ushort4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI7ushort4ET_PKS1_iii)

	.visible .func (.param .align 16 .b8 __cudaretf__Z6Read2DI6float4ET_PKS1_iii[16]) _Z6Read2DI6float4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI6float4ET_PKS1_iii)

	.visible .func _Z7Write2DI7ushort4EvT_PS1_iii (.param .align 8 .b8 __cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii[8], .param .u64 __cudaparmf2__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI7ushort4EvT_PS1_iii)

	.visible .func _Z7Write2DI6float4EvT_PS1_iii (.param .align 16 .b8 __cudaparmf1__Z7Write2DI6float4EvT_PS1_iii[16], .param .u64 __cudaparmf2__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI6float4EvT_PS1_iii)

	.visible .func (.param .align 16 .b8 __cudaretf__Z18UnpremultiplyPixel8PixelRGB[16]) _Z18UnpremultiplyPixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z18UnpremultiplyPixel8PixelRGB[16])

	.visible .func (.param .f32 __cudaretf__Z13ToLinearColorf) _Z13ToLinearColorf (.param .f32 __cudaparmf1__Z13ToLinearColorf)

	.visible .func (.param .f32 __cudaretf__Z15FromLinearColorf) _Z15FromLinearColorf (.param .f32 __cudaparmf1__Z15FromLinearColorf)

	.visible .func (.param .align 16 .b8 __cudaretf__Z25PremultiplyLinearizePixel8PixelRGB[16]) _Z25PremultiplyLinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16]) _Z29UnpremultiplyUnlinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z20PremultiplyLinearize6float4[16]) _Z20PremultiplyLinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z20PremultiplyLinearize6float4[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z24UnpremultiplyUnlinearize6float4[16]) _Z24UnpremultiplyUnlinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z24UnpremultiplyUnlinearize6float4[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_[16]) _Z18SwapComponentOrderI6float4ET_RKS1_ (.param .u64 __cudaparmf1__Z18SwapComponentOrderI6float4ET_RKS1_)

	.visible .func (.param .align 16 .b8 __cudaretf__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow[16]) _Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow (.param .align 16 .b8 __cudaparmf1__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow[16], .param .f32 __cudaparmf2__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow, .param .u64 __cudaparmf3__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow, .param .u64 __cudaparmf4__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow, .param .u64 __cudaparmf5__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow)

	.visible .func (.param .f32 __cudaretf__Z4LERPIfET_S0_S0_S0_) _Z4LERPIfET_S0_S0_S0_ (.param .f32 __cudaparmf1__Z4LERPIfET_S0_S0_S0_, .param .f32 __cudaparmf2__Z4LERPIfET_S0_S0_S0_, .param .f32 __cudaparmf3__Z4LERPIfET_S0_S0_S0_)

	.visible .func (.param .f32 __cudaretf__Z6Read2DIfET_PKS0_iii) _Z6Read2DIfET_PKS0_iii (.param .u64 __cudaparmf1__Z6Read2DIfET_PKS0_iii, .param .s32 __cudaparmf2__Z6Read2DIfET_PKS0_iii, .param .s32 __cudaparmf3__Z6Read2DIfET_PKS0_iii, .param .s32 __cudaparmf4__Z6Read2DIfET_PKS0_iii)

	//-----------------------------------------------------------
	// Compiling C:/Users/dvaeng/AppData/Local/Temp/tmpxft_00003908_00000000-11_RGBColorCorrector.cpp3.i (C:/Users/dvaeng/AppData/Local/Temp/ccBI#.a15404)
	//-----------------------------------------------------------

	//-----------------------------------------------------------
	// Options:
	//-----------------------------------------------------------
	//  Target:ptx, ISA:sm_20, Endian:little, Pointer Size:64
	//  -O3	(Optimization level)
	//  -g0	(Debug level)
	//  -m2	(Report advisories)
	//-----------------------------------------------------------

	.file	1	"C:/Users/dvaeng/AppData/Local/Temp/tmpxft_00003908_00000000-10_RGBColorCorrector.cudafe2.gpu"
	.file	2	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/PixelFormat.h"
	.file	3	"c:\Mulder64\shared\adobe\MediaCore\Display\Inc\CUDA/Effects/RGBColorCorrector.h"
	.file	4	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/PixelRGB.h"
	.file	5	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/PixelYUV.h"
	.file	6	"C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\include\crtdefs.h"
	.file	7	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\crt/device_runtime.h"
	.file	8	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\host_defines.h"
	.file	9	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\builtin_types.h"
	.file	10	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\device_types.h"
	.file	11	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\driver_types.h"
	.file	12	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\surface_types.h"
	.file	13	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\texture_types.h"
	.file	14	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\vector_types.h"
	.file	15	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\builtin_types.h"
	.file	16	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\host_defines.h"
	.file	17	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\device_launch_parameters.h"
	.file	18	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\crt\storage_class.h"
	.file	19	"C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\include\time.h"
	.file	20	"c:\Mulder64\shared\adobe\MediaCore\Display\Inc\CUDA/Effects/ColorCorrector_Common.h"
	.file	21	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/Utils.h"
	.file	22	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/VectorUtils.h"
	.file	23	"c:/Mulder64/shared/adobe/MediaCore/Display/Src/CUDA/Effects/RGBColorCorrector.cu"
	.file	24	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/Numeric.h"
	.file	25	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\common_functions.h"
	.file	26	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_functions.h"
	.file	27	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_constants.h"
	.file	28	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\device_functions.h"
	.file	29	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_11_atomic_functions.h"
	.file	30	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_12_atomic_functions.h"
	.file	31	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_13_double_functions.h"
	.file	32	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_20_atomic_functions.h"
	.file	33	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_20_intrinsics.h"
	.file	34	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\surface_functions.h"
	.file	35	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\texture_fetch_functions.h"
	.file	36	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_functions_dbl_ptx3.h"
	.file	37	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/ColorSpaceConvert.h"


	.visible .func (.param .f32 __cudaretf__Z22CalculateShadowsWeightfff) _Z22CalculateShadowsWeightfff (.param .f32 __cudaparmf1__Z22CalculateShadowsWeightfff, .param .f32 __cudaparmf2__Z22CalculateShadowsWeightfff, .param .f32 __cudaparmf3__Z22CalculateShadowsWeightfff)
	{
	.reg .f32 %f<12>;
	.reg .pred %p<4>;
	.loc	20	34	0
$LDWbegin__Z22CalculateShadowsWeightfff:
	ld.param.f32 	%f1, [__cudaparmf1__Z22CalculateShadowsWeightfff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z22CalculateShadowsWeightfff];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf3__Z22CalculateShadowsWeightfff];
	mov.f32 	%f6, %f5;
	.loc	20	35	0
	setp.le.ftz.f32 	%p1, %f2, %f4;
	@!%p1 bra 	$Lt_0_1282;
	.loc	20	37	0
	mov.f32 	%f7, 0f3f800000;     	// 1
	bra.uni 	$LBB6__Z22CalculateShadowsWeightfff;
$Lt_0_1282:
	.loc	20	39	0
	add.ftz.f32 	%f8, %f4, %f6;
	setp.ge.ftz.f32 	%p2, %f2, %f8;
	@!%p2 bra 	$Lt_0_1538;
	.loc	20	41	0
	mov.f32 	%f7, 0f00000000;     	// 0
	bra.uni 	$LBB6__Z22CalculateShadowsWeightfff;
$Lt_0_1538:
	.loc	20	45	0
	sub.ftz.f32 	%f9, %f2, %f4;
	div.approx.ftz.f32 	%f7, %f9, %f6;
$LBB6__Z22CalculateShadowsWeightfff:
	mov.f32 	%f10, %f7;
	st.param.f32 	[__cudaretf__Z22CalculateShadowsWeightfff], %f10;
	ret;
$LDWend__Z22CalculateShadowsWeightfff:
	} // _Z22CalculateShadowsWeightfff

	.visible .func (.param .f32 __cudaretf__Z25CalculateHighlightsWeightfff) _Z25CalculateHighlightsWeightfff (.param .f32 __cudaparmf1__Z25CalculateHighlightsWeightfff, .param .f32 __cudaparmf2__Z25CalculateHighlightsWeightfff, .param .f32 __cudaparmf3__Z25CalculateHighlightsWeightfff)
	{
	.reg .f32 %f<12>;
	.reg .pred %p<4>;
	.loc	20	52	0
$LDWbegin__Z25CalculateHighlightsWeightfff:
	ld.param.f32 	%f1, [__cudaparmf1__Z25CalculateHighlightsWeightfff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z25CalculateHighlightsWeightfff];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf3__Z25CalculateHighlightsWeightfff];
	mov.f32 	%f6, %f5;
	.loc	20	53	0
	sub.ftz.f32 	%f7, %f4, %f6;
	setp.gt.ftz.f32 	%p1, %f7, %f2;
	@!%p1 bra 	$Lt_1_1282;
	.loc	20	55	0
	mov.f32 	%f8, 0f00000000;     	// 0
	bra.uni 	$LBB6__Z25CalculateHighlightsWeightfff;
$Lt_1_1282:
	.loc	20	57	0
	setp.gt.ftz.f32 	%p2, %f2, %f4;
	@!%p2 bra 	$Lt_1_1538;
	.loc	20	59	0
	mov.f32 	%f8, 0f3f800000;     	// 1
	bra.uni 	$LBB6__Z25CalculateHighlightsWeightfff;
$Lt_1_1538:
	.loc	20	63	0
	sub.ftz.f32 	%f9, %f2, %f7;
	div.approx.ftz.f32 	%f8, %f9, %f6;
$LBB6__Z25CalculateHighlightsWeightfff:
	mov.f32 	%f10, %f8;
	st.param.f32 	[__cudaretf__Z25CalculateHighlightsWeightfff], %f10;
	ret;
$LDWend__Z25CalculateHighlightsWeightfff:
	} // _Z25CalculateHighlightsWeightfff

	.visible .func (.param .s32 __cudaretf__Z15IntegerMultiplyii) _Z15IntegerMultiplyii (.param .s32 __cudaparmf1__Z15IntegerMultiplyii, .param .s32 __cudaparmf2__Z15IntegerMultiplyii)
	{
	.reg .u32 %r<7>;
	.loc	21	60	0
$LDWbegin__Z15IntegerMultiplyii:
	ld.param.u32 	%r1, [__cudaparmf1__Z15IntegerMultiplyii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf2__Z15IntegerMultiplyii];
	mov.s32 	%r4, %r3;
	.loc	21	64	0
	mul.lo.s32 	%r5, %r2, %r4;
	st.param.s32 	[__cudaretf__Z15IntegerMultiplyii], %r5;
	ret;
$LDWend__Z15IntegerMultiplyii:
	} // _Z15IntegerMultiplyii

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelXv) _Z17Standard2DKernelXv ()
	{
	.reg .u32 %r<7>;
	.loc	21	73	0
$LDWbegin__Z17Standard2DKernelXv:
	.loc	21	74	0
	mov.u32 	%r1, %tid.x;
	cvt.s32.u32 	%r2, %ctaid.x;
	cvt.s32.u32 	%r3, %ntid.x;
	mul.lo.s32 	%r4, %r2, %r3;
	add.u32 	%r5, %r1, %r4;
	st.param.s32 	[__cudaretf__Z17Standard2DKernelXv], %r5;
	ret;
$LDWend__Z17Standard2DKernelXv:
	} // _Z17Standard2DKernelXv

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelYv) _Z17Standard2DKernelYv ()
	{
	.reg .u32 %r<7>;
	.loc	21	77	0
$LDWbegin__Z17Standard2DKernelYv:
	.loc	21	78	0
	mov.u32 	%r1, %tid.y;
	cvt.s32.u32 	%r2, %ctaid.y;
	cvt.s32.u32 	%r3, %ntid.y;
	mul.lo.s32 	%r4, %r2, %r3;
	add.u32 	%r5, %r1, %r4;
	st.param.s32 	[__cudaretf__Z17Standard2DKernelYv], %r5;
	ret;
$LDWend__Z17Standard2DKernelYv:
	} // _Z17Standard2DKernelYv

	.visible .func (.param .align 16 .b8 __cudaretf__Z13Half4ToFloat47ushort4[16]) _Z13Half4ToFloat47ushort4 (.param .align 8 .b8 __cudaparmf1__Z13Half4ToFloat47ushort4[8])
	{
	.reg .u32 %r<14>;
	.reg .f32 %f<9>;
	.loc	21	86	0
$LDWbegin__Z13Half4ToFloat47ushort4:
	ld.param.u16 	%r1, [__cudaparmf1__Z13Half4ToFloat47ushort4+0];
	mov.s32 	%r2, %r1;
	ld.param.u16 	%r3, [__cudaparmf1__Z13Half4ToFloat47ushort4+2];
	mov.s32 	%r4, %r3;
	ld.param.u16 	%r5, [__cudaparmf1__Z13Half4ToFloat47ushort4+4];
	mov.s32 	%r6, %r5;
	ld.param.u16 	%r7, [__cudaparmf1__Z13Half4ToFloat47ushort4+6];
	mov.s32 	%r8, %r7;
	.loc	21	87	0
	cvt.u16.u32 	%r9, %r4;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r9;
	cvt.ftz.f32.f16	%f1, %b1; }
	cvt.u16.u32 	%r10, %r6;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r10;
	cvt.ftz.f32.f16	%f2, %b1; }
	cvt.u16.u32 	%r11, %r8;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r11;
	cvt.ftz.f32.f16	%f3, %b1; }
	cvt.u16.u32 	%r12, %r2;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r12;
	cvt.ftz.f32.f16	%f4, %b1; }
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+0], %f4;
	mov.f32 	%f5, %f1;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+4], %f5;
	mov.f32 	%f6, %f2;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+8], %f6;
	mov.f32 	%f7, %f3;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+12], %f7;
	ret;
$LDWend__Z13Half4ToFloat47ushort4:
	} // _Z13Half4ToFloat47ushort4

	.visible .func (.param .align 8 .b8 __cudaretf__Z13Float4ToHalf46float4[8]) _Z13Float4ToHalf46float4 (.param .align 16 .b8 __cudaparmf1__Z13Float4ToHalf46float4[16])
	{
	.reg .u32 %r<13>;
	.reg .f32 %f<10>;
	.loc	21	95	0
$LDWbegin__Z13Float4ToHalf46float4:
	ld.param.f32 	%f1, [__cudaparmf1__Z13Float4ToHalf46float4+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z13Float4ToHalf46float4+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z13Float4ToHalf46float4+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z13Float4ToHalf46float4+12];
	mov.f32 	%f8, %f7;
	.loc	21	96	0
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r1, %b1; }
	cvt.u16.u32 	%r2, %r1;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r3, %b1; }
	cvt.u16.u32 	%r4, %r3;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f8;
	mov.b32		%r5, %b1; }
	cvt.u16.u32 	%r6, %r5;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f2;
	mov.b32		%r7, %b1; }
	cvt.u16.u32 	%r8, %r7;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+0], %r8;
	mov.s32 	%r9, %r2;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+2], %r9;
	mov.s32 	%r10, %r4;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+4], %r10;
	mov.s32 	%r11, %r6;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+6], %r11;
	ret;
$LDWend__Z13Float4ToHalf46float4:
	} // _Z13Float4ToHalf46float4

	.visible .func (.param .u32 __cudaretf__Z4Mix3RjS_S_) _Z4Mix3RjS_S_ (.param .u64 __cudaparmf1__Z4Mix3RjS_S_, .param .u64 __cudaparmf2__Z4Mix3RjS_S_, .param .u64 __cudaparmf3__Z4Mix3RjS_S_)
	{
	.reg .u32 %r<75>;
	.reg .u64 %rd<8>;
	.loc	21	138	0
$LDWbegin__Z4Mix3RjS_S_:
	ld.param.u64 	%rd1, [__cudaparmf1__Z4Mix3RjS_S_];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf2__Z4Mix3RjS_S_];
	mov.s64 	%rd4, %rd3;
	ld.param.u64 	%rd5, [__cudaparmf3__Z4Mix3RjS_S_];
	mov.s64 	%rd6, %rd5;
	.loc	21	139	0
	ld.u32 	%r1, [%rd2+0];
	ld.u32 	%r2, [%rd4+0];
	sub.u32 	%r3, %r1, %r2;
	st.u32 	[%rd2+0], %r3;
	ld.u32 	%r4, [%rd6+0];
	sub.u32 	%r5, %r3, %r4;
	st.u32 	[%rd2+0], %r5;
	ld.u32 	%r6, [%rd6+0];
	shr.u32 	%r7, %r6, 13;
	xor.b32 	%r8, %r5, %r7;
	st.u32 	[%rd2+0], %r8;
	.loc	21	140	0
	ld.u32 	%r9, [%rd4+0];
	ld.u32 	%r10, [%rd6+0];
	sub.u32 	%r11, %r9, %r10;
	st.u32 	[%rd4+0], %r11;
	ld.u32 	%r12, [%rd2+0];
	sub.u32 	%r13, %r11, %r12;
	st.u32 	[%rd4+0], %r13;
	ld.u32 	%r14, [%rd2+0];
	shl.b32 	%r15, %r14, 8;
	xor.b32 	%r16, %r13, %r15;
	st.u32 	[%rd4+0], %r16;
	.loc	21	141	0
	ld.u32 	%r17, [%rd6+0];
	ld.u32 	%r18, [%rd2+0];
	sub.u32 	%r19, %r17, %r18;
	st.u32 	[%rd6+0], %r19;
	ld.u32 	%r20, [%rd4+0];
	sub.u32 	%r21, %r19, %r20;
	st.u32 	[%rd6+0], %r21;
	ld.u32 	%r22, [%rd4+0];
	shr.u32 	%r23, %r22, 13;
	xor.b32 	%r24, %r21, %r23;
	st.u32 	[%rd6+0], %r24;
	.loc	21	142	0
	ld.u32 	%r25, [%rd2+0];
	ld.u32 	%r26, [%rd4+0];
	sub.u32 	%r27, %r25, %r26;
	st.u32 	[%rd2+0], %r27;
	ld.u32 	%r28, [%rd6+0];
	sub.u32 	%r29, %r27, %r28;
	st.u32 	[%rd2+0], %r29;
	ld.u32 	%r30, [%rd6+0];
	shr.u32 	%r31, %r30, 12;
	xor.b32 	%r32, %r29, %r31;
	st.u32 	[%rd2+0], %r32;
	.loc	21	143	0
	ld.u32 	%r33, [%rd4+0];
	ld.u32 	%r34, [%rd6+0];
	sub.u32 	%r35, %r33, %r34;
	st.u32 	[%rd4+0], %r35;
	ld.u32 	%r36, [%rd2+0];
	sub.u32 	%r37, %r35, %r36;
	st.u32 	[%rd4+0], %r37;
	ld.u32 	%r38, [%rd2+0];
	shl.b32 	%r39, %r38, 16;
	xor.b32 	%r40, %r37, %r39;
	st.u32 	[%rd4+0], %r40;
	.loc	21	144	0
	ld.u32 	%r41, [%rd6+0];
	ld.u32 	%r42, [%rd2+0];
	sub.u32 	%r43, %r41, %r42;
	st.u32 	[%rd6+0], %r43;
	ld.u32 	%r44, [%rd4+0];
	sub.u32 	%r45, %r43, %r44;
	st.u32 	[%rd6+0], %r45;
	ld.u32 	%r46, [%rd4+0];
	shr.u32 	%r47, %r46, 5;
	xor.b32 	%r48, %r45, %r47;
	st.u32 	[%rd6+0], %r48;
	.loc	21	145	0
	ld.u32 	%r49, [%rd2+0];
	ld.u32 	%r50, [%rd4+0];
	sub.u32 	%r51, %r49, %r50;
	st.u32 	[%rd2+0], %r51;
	ld.u32 	%r52, [%rd6+0];
	sub.u32 	%r53, %r51, %r52;
	st.u32 	[%rd2+0], %r53;
	ld.u32 	%r54, [%rd6+0];
	shr.u32 	%r55, %r54, 3;
	xor.b32 	%r56, %r53, %r55;
	st.u32 	[%rd2+0], %r56;
	.loc	21	146	0
	ld.u32 	%r57, [%rd4+0];
	ld.u32 	%r58, [%rd6+0];
	sub.u32 	%r59, %r57, %r58;
	st.u32 	[%rd4+0], %r59;
	ld.u32 	%r60, [%rd2+0];
	sub.u32 	%r61, %r59, %r60;
	st.u32 	[%rd4+0], %r61;
	ld.u32 	%r62, [%rd2+0];
	shl.b32 	%r63, %r62, 10;
	xor.b32 	%r64, %r61, %r63;
	st.u32 	[%rd4+0], %r64;
	.loc	21	147	0
	ld.u32 	%r65, [%rd6+0];
	ld.u32 	%r66, [%rd2+0];
	sub.u32 	%r67, %r65, %r66;
	st.u32 	[%rd6+0], %r67;
	ld.u32 	%r68, [%rd4+0];
	sub.u32 	%r69, %r67, %r68;
	st.u32 	[%rd6+0], %r69;
	ld.u32 	%r70, [%rd4+0];
	shr.u32 	%r71, %r70, 15;
	xor.b32 	%r72, %r69, %r71;
	st.u32 	[%rd6+0], %r72;
	.loc	21	148	0
	mov.s32 	%r73, %r72;
	st.param.u32 	[__cudaretf__Z4Mix3RjS_S_], %r73;
	ret;
$LDWend__Z4Mix3RjS_S_:
	} // _Z4Mix3RjS_S_

	.visible .func (.param .s32 __cudaretf__Z4Randj) _Z4Randj (.param .u32 __cudaparmf1__Z4Randj)
	{
	.reg .u32 %r<14>;
	.loc	21	152	0
$LDWbegin__Z4Randj:
	ld.param.u32 	%r1, [__cudaparmf1__Z4Randj];
	mov.s32 	%r2, %r1;
	.loc	21	163	0
	mul.lo.u32 	%r3, %r2, 1103515245;
	add.u32 	%r4, %r3, 12345;
	shr.u32 	%r5, %r4, 16;
	and.b32 	%r6, %r5, 255;
	shl.b32 	%r7, %r6, 7;
	mul.lo.u32 	%r8, %r2, -1029531031;
	sub.u32 	%r9, %r8, 740551042;
	shr.u32 	%r10, %r9, 16;
	and.b32 	%r11, %r10, 255;
	xor.b32 	%r12, %r7, %r11;
	st.param.s32 	[__cudaretf__Z4Randj], %r12;
	ret;
$LDWend__Z4Randj:
	} // _Z4Randj

	.visible .func (.param .s32 __cudaretf__Z6Rand2Djjj) _Z6Rand2Djjj (.param .u32 __cudaparmf1__Z6Rand2Djjj, .param .u32 __cudaparmf2__Z6Rand2Djjj, .param .u32 __cudaparmf3__Z6Rand2Djjj)
	{
	.reg .u32 %r<54>;
	.loc	21	169	0
$LDWbegin__Z6Rand2Djjj:
	ld.param.u32 	%r1, [__cudaparmf1__Z6Rand2Djjj];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf2__Z6Rand2Djjj];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf3__Z6Rand2Djjj];
	mov.s32 	%r6, %r5;
	.loc	21	139	0
	sub.u32 	%r7, %r2, %r4;
	sub.u32 	%r8, %r7, %r6;
	shr.u32 	%r9, %r6, 13;
	xor.b32 	%r10, %r8, %r9;
	.loc	21	140	0
	sub.u32 	%r11, %r4, %r6;
	sub.u32 	%r12, %r11, %r10;
	shl.b32 	%r13, %r10, 8;
	xor.b32 	%r14, %r12, %r13;
	.loc	21	141	0
	sub.u32 	%r15, %r6, %r10;
	sub.u32 	%r16, %r15, %r14;
	shr.u32 	%r17, %r14, 13;
	xor.b32 	%r18, %r16, %r17;
	.loc	21	142	0
	sub.u32 	%r19, %r10, %r14;
	sub.u32 	%r20, %r19, %r18;
	shr.u32 	%r21, %r18, 12;
	xor.b32 	%r22, %r20, %r21;
	.loc	21	143	0
	sub.u32 	%r23, %r14, %r18;
	sub.u32 	%r24, %r23, %r22;
	shl.b32 	%r25, %r22, 16;
	xor.b32 	%r26, %r24, %r25;
	.loc	21	144	0
	sub.u32 	%r27, %r18, %r22;
	sub.u32 	%r28, %r27, %r26;
	shr.u32 	%r29, %r26, 5;
	xor.b32 	%r30, %r28, %r29;
	.loc	21	145	0
	sub.u32 	%r31, %r22, %r26;
	sub.u32 	%r32, %r31, %r30;
	shr.u32 	%r33, %r30, 3;
	xor.b32 	%r34, %r32, %r33;
	.loc	21	146	0
	sub.u32 	%r35, %r26, %r30;
	sub.u32 	%r36, %r35, %r34;
	shl.b32 	%r37, %r34, 10;
	xor.b32 	%r38, %r36, %r37;
	.loc	21	147	0
	sub.u32 	%r39, %r30, %r34;
	sub.u32 	%r40, %r39, %r38;
	shr.u32 	%r41, %r38, 15;
	xor.b32 	%r42, %r40, %r41;
	.loc	21	170	0
	mul.lo.u32 	%r43, %r42, 1103515245;
	add.u32 	%r44, %r43, 12345;
	shr.u32 	%r45, %r44, 16;
	and.b32 	%r46, %r45, 255;
	shl.b32 	%r47, %r46, 7;
	mul.lo.u32 	%r48, %r42, -1029531031;
	sub.u32 	%r49, %r48, 740551042;
	shr.u32 	%r50, %r49, 16;
	and.b32 	%r51, %r50, 255;
	xor.b32 	%r52, %r47, %r51;
	st.param.s32 	[__cudaretf__Z6Rand2Djjj], %r52;
	ret;
$LDWend__Z6Rand2Djjj:
	} // _Z6Rand2Djjj

	.visible .func (.param .s32 __cudaretf__Z6Rand2Dj) _Z6Rand2Dj (.param .u32 __cudaparmf1__Z6Rand2Dj)
	{
	.reg .u32 %r<60>;
	.loc	21	175	0
$LDWbegin__Z6Rand2Dj:
	ld.param.u32 	%r1, [__cudaparmf1__Z6Rand2Dj];
	mov.s32 	%r2, %r1;
	.loc	21	143	0
	cvt.s32.u32 	%r3, %ctaid.y;
	cvt.s32.u32 	%r4, %ntid.y;
	mul.lo.s32 	%r5, %r3, %r4;
	cvt.s32.u32 	%r6, %ctaid.x;
	cvt.s32.u32 	%r7, %ntid.x;
	mul.lo.s32 	%r8, %r6, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r5, %r9;
	mov.u32 	%r11, %tid.x;
	add.u32 	%r12, %r8, %r11;
	shr.u32 	%r13, %r10, 13;
	sub.u32 	%r14, %r2, %r12;
	sub.u32 	%r15, %r12, %r10;
	sub.u32 	%r16, %r14, %r10;
	xor.b32 	%r17, %r13, %r16;
	shl.b32 	%r18, %r17, 8;
	sub.u32 	%r19, %r15, %r17;
	sub.u32 	%r20, %r10, %r17;
	xor.b32 	%r21, %r18, %r19;
	shr.u32 	%r22, %r21, 13;
	sub.u32 	%r23, %r20, %r21;
	sub.u32 	%r24, %r17, %r21;
	xor.b32 	%r25, %r22, %r23;
	shr.u32 	%r26, %r25, 12;
	sub.u32 	%r27, %r24, %r25;
	xor.b32 	%r28, %r26, %r27;
	sub.u32 	%r29, %r21, %r25;
	sub.u32 	%r30, %r29, %r28;
	shl.b32 	%r31, %r28, 16;
	xor.b32 	%r32, %r30, %r31;
	.loc	21	144	0
	sub.u32 	%r33, %r25, %r28;
	sub.u32 	%r34, %r33, %r32;
	shr.u32 	%r35, %r32, 5;
	xor.b32 	%r36, %r34, %r35;
	.loc	21	145	0
	sub.u32 	%r37, %r28, %r32;
	sub.u32 	%r38, %r37, %r36;
	shr.u32 	%r39, %r36, 3;
	xor.b32 	%r40, %r38, %r39;
	.loc	21	146	0
	sub.u32 	%r41, %r32, %r36;
	sub.u32 	%r42, %r41, %r40;
	shl.b32 	%r43, %r40, 10;
	xor.b32 	%r44, %r42, %r43;
	.loc	21	147	0
	sub.u32 	%r45, %r36, %r40;
	sub.u32 	%r46, %r45, %r44;
	shr.u32 	%r47, %r44, 15;
	xor.b32 	%r48, %r46, %r47;
	.loc	21	176	0
	mul.lo.u32 	%r49, %r48, 1103515245;
	add.u32 	%r50, %r49, 12345;
	shr.u32 	%r51, %r50, 16;
	and.b32 	%r52, %r51, 255;
	shl.b32 	%r53, %r52, 7;
	mul.lo.u32 	%r54, %r48, -1029531031;
	sub.u32 	%r55, %r54, 740551042;
	shr.u32 	%r56, %r55, 16;
	and.b32 	%r57, %r56, 255;
	xor.b32 	%r58, %r53, %r57;
	st.param.s32 	[__cudaretf__Z6Rand2Dj], %r58;
	ret;
$LDWend__Z6Rand2Dj:
	} // _Z6Rand2Dj

	.visible .func (.param .align 8 .b8 __cudaretf__Z6Read2DI7ushort4ET_PKS1_iii[8]) _Z6Read2DI7ushort4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI7ushort4ET_PKS1_iii)
	{
	.reg .u32 %r<14>;
	.reg .u64 %rd<7>;
	.loc	21	114	0
$LDWbegin__Z6Read2DI7ushort4ET_PKS1_iii:
	ld.param.u64 	%rd1, [__cudaparmf1__Z6Read2DI7ushort4ET_PKS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf2__Z6Read2DI7ushort4ET_PKS1_iii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf3__Z6Read2DI7ushort4ET_PKS1_iii];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf4__Z6Read2DI7ushort4ET_PKS1_iii];
	mov.s32 	%r6, %r5;
	.loc	21	115	0
	mul.lo.s32 	%r7, %r2, %r6;
	add.s32 	%r8, %r4, %r7;
	cvt.s64.s32 	%rd3, %r8;
	mul.wide.s32 	%rd4, %r8, 8;
	add.u64 	%rd5, %rd2, %rd4;
	ld.v4.u16 	{%r9,%r10,%r11,%r12}, [%rd5+0];
	st.param.u16 	[__cudaretf__Z6Read2DI7ushort4ET_PKS1_iii+0], %r9;
	st.param.u16 	[__cudaretf__Z6Read2DI7ushort4ET_PKS1_iii+2], %r10;
	st.param.u16 	[__cudaretf__Z6Read2DI7ushort4ET_PKS1_iii+4], %r11;
	st.param.u16 	[__cudaretf__Z6Read2DI7ushort4ET_PKS1_iii+6], %r12;
	ret;
$LDWend__Z6Read2DI7ushort4ET_PKS1_iii:
	} // _Z6Read2DI7ushort4ET_PKS1_iii

	.visible .func (.param .align 16 .b8 __cudaretf__Z6Read2DI6float4ET_PKS1_iii[16]) _Z6Read2DI6float4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI6float4ET_PKS1_iii)
	{
	.reg .u32 %r<10>;
	.reg .u64 %rd<7>;
	.reg .f32 %f<6>;
	.loc	21	114	0
$LDWbegin__Z6Read2DI6float4ET_PKS1_iii:
	ld.param.u64 	%rd1, [__cudaparmf1__Z6Read2DI6float4ET_PKS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf2__Z6Read2DI6float4ET_PKS1_iii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf3__Z6Read2DI6float4ET_PKS1_iii];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf4__Z6Read2DI6float4ET_PKS1_iii];
	mov.s32 	%r6, %r5;
	.loc	21	115	0
	mul.lo.s32 	%r7, %r2, %r6;
	add.s32 	%r8, %r4, %r7;
	cvt.s64.s32 	%rd3, %r8;
	mul.wide.s32 	%rd4, %r8, 16;
	add.u64 	%rd5, %rd2, %rd4;
	ld.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd5+0];
	st.param.f32 	[__cudaretf__Z6Read2DI6float4ET_PKS1_iii+0], %f1;
	st.param.f32 	[__cudaretf__Z6Read2DI6float4ET_PKS1_iii+4], %f2;
	st.param.f32 	[__cudaretf__Z6Read2DI6float4ET_PKS1_iii+8], %f3;
	st.param.f32 	[__cudaretf__Z6Read2DI6float4ET_PKS1_iii+12], %f4;
	ret;
$LDWend__Z6Read2DI6float4ET_PKS1_iii:
	} // _Z6Read2DI6float4ET_PKS1_iii

	.visible .func _Z7Write2DI7ushort4EvT_PS1_iii (.param .align 8 .b8 __cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii[8], .param .u64 __cudaparmf2__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI7ushort4EvT_PS1_iii)
	{
	.reg .u32 %r<18>;
	.reg .u64 %rd<7>;
	.loc	21	125	0
$LDWbegin__Z7Write2DI7ushort4EvT_PS1_iii:
	ld.param.u16 	%r1, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+0];
	mov.s32 	%r2, %r1;
	ld.param.u16 	%r3, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+2];
	mov.s32 	%r4, %r3;
	ld.param.u16 	%r5, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+4];
	mov.s32 	%r6, %r5;
	ld.param.u16 	%r7, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+6];
	mov.s32 	%r8, %r7;
	ld.param.u64 	%rd1, [__cudaparmf2__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r9, [__cudaparmf3__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s32 	%r10, %r9;
	ld.param.u32 	%r11, [__cudaparmf4__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s32 	%r12, %r11;
	ld.param.u32 	%r13, [__cudaparmf5__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s32 	%r14, %r13;
	.loc	21	126	0
	mul.lo.s32 	%r15, %r10, %r14;
	add.s32 	%r16, %r12, %r15;
	cvt.s64.s32 	%rd3, %r16;
	mul.wide.s32 	%rd4, %r16, 8;
	add.u64 	%rd5, %rd2, %rd4;
	st.v4.u16 	[%rd5+0], {%r2,%r4,%r6,%r8};
	.loc	21	127	0
	ret;
$LDWend__Z7Write2DI7ushort4EvT_PS1_iii:
	} // _Z7Write2DI7ushort4EvT_PS1_iii

	.visible .func _Z7Write2DI6float4EvT_PS1_iii (.param .align 16 .b8 __cudaparmf1__Z7Write2DI6float4EvT_PS1_iii[16], .param .u64 __cudaparmf2__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI6float4EvT_PS1_iii)
	{
	.reg .u32 %r<10>;
	.reg .u64 %rd<7>;
	.reg .f32 %f<10>;
	.loc	21	125	0
$LDWbegin__Z7Write2DI6float4EvT_PS1_iii:
	ld.param.f32 	%f1, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+12];
	mov.f32 	%f8, %f7;
	ld.param.u64 	%rd1, [__cudaparmf2__Z7Write2DI6float4EvT_PS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf3__Z7Write2DI6float4EvT_PS1_iii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf4__Z7Write2DI6float4EvT_PS1_iii];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf5__Z7Write2DI6float4EvT_PS1_iii];
	mov.s32 	%r6, %r5;
	.loc	21	126	0
	mul.lo.s32 	%r7, %r2, %r6;
	add.s32 	%r8, %r4, %r7;
	cvt.s64.s32 	%rd3, %r8;
	mul.wide.s32 	%rd4, %r8, 16;
	add.u64 	%rd5, %rd2, %rd4;
	st.v4.f32 	[%rd5+0], {%f2,%f4,%f6,%f8};
	.loc	21	127	0
	ret;
$LDWend__Z7Write2DI6float4EvT_PS1_iii:
	} // _Z7Write2DI6float4EvT_PS1_iii

	.visible .func (.param .align 16 .b8 __cudaretf__Z18UnpremultiplyPixel8PixelRGB[16]) _Z18UnpremultiplyPixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z18UnpremultiplyPixel8PixelRGB[16])
	{
	.reg .f32 %f<23>;
	.reg .pred %p<3>;
	.loc	4	206	0
$LDWbegin__Z18UnpremultiplyPixel8PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+12];
	mov.f32 	%f8, %f7;
	.loc	4	208	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f9, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f12, %f13;
	@%p1 bra 	$Lt_15_1282;
	.loc	4	213	0
	rcp.approx.ftz.f32 	%f14, %f9;
	mul.ftz.f32 	%f15, %f14, %f6;
	.loc	4	214	0
	mul.ftz.f32 	%f16, %f14, %f4;
	.loc	4	215	0
	mul.ftz.f32 	%f17, %f14, %f2;
	bra.uni 	$Lt_15_1026;
$Lt_15_1282:
	.loc	4	219	0
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	mov.f32 	%f17, 0f00000000;    	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
$Lt_15_1026:
	.loc	4	224	0
	mov.f32 	%f18, %f17;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+0], %f18;
	mov.f32 	%f19, %f16;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+4], %f19;
	mov.f32 	%f20, %f15;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+8], %f20;
	mov.f32 	%f21, %f10;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+12], %f21;
	ret;
$LDWend__Z18UnpremultiplyPixel8PixelRGB:
	} // _Z18UnpremultiplyPixel8PixelRGB

	.visible .func (.param .f32 __cudaretf__Z13ToLinearColorf) _Z13ToLinearColorf (.param .f32 __cudaparmf1__Z13ToLinearColorf)
	{
	.reg .f32 %f<15>;
	.reg .pred %p<3>;
	.loc	4	231	0
$LDWbegin__Z13ToLinearColorf:
	ld.param.f32 	%f1, [__cudaparmf1__Z13ToLinearColorf];
	mov.f32 	%f2, %f1;
	mov.f32 	%f3, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f3;
	@!%p1 bra 	$Lt_16_1026;
	.loc	4	234	0
	neg.ftz.f32 	%f4, %f2;
	lg2.approx.ftz.f32 	%f5, %f4;
	mov.f32 	%f6, 0f400ccccd;     	// 2.2
	mul.ftz.f32 	%f7, %f5, %f6;
	ex2.approx.ftz.f32 	%f8, %f7;
	neg.ftz.f32 	%f9, %f8;
	bra.uni 	$LBB4__Z13ToLinearColorf;
$Lt_16_1026:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f10, %f2;
	mov.f32 	%f11, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f12, %f10, %f11;
	ex2.approx.ftz.f32 	%f9, %f12;
$LBB4__Z13ToLinearColorf:
	mov.f32 	%f13, %f9;
	st.param.f32 	[__cudaretf__Z13ToLinearColorf], %f13;
	ret;
$LDWend__Z13ToLinearColorf:
	} // _Z13ToLinearColorf

	.visible .func (.param .f32 __cudaretf__Z15FromLinearColorf) _Z15FromLinearColorf (.param .f32 __cudaparmf1__Z15FromLinearColorf)
	{
	.reg .f32 %f<15>;
	.reg .pred %p<3>;
	.loc	4	239	0
$LDWbegin__Z15FromLinearColorf:
	ld.param.f32 	%f1, [__cudaparmf1__Z15FromLinearColorf];
	mov.f32 	%f2, %f1;
	mov.f32 	%f3, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f3;
	@!%p1 bra 	$Lt_17_1026;
	.loc	4	242	0
	neg.ftz.f32 	%f4, %f2;
	lg2.approx.ftz.f32 	%f5, %f4;
	mov.f32 	%f6, 0f3ee8ba2e;     	// 0.454545
	mul.ftz.f32 	%f7, %f5, %f6;
	ex2.approx.ftz.f32 	%f8, %f7;
	neg.ftz.f32 	%f9, %f8;
	bra.uni 	$LBB4__Z15FromLinearColorf;
$Lt_17_1026:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f10, %f2;
	mov.f32 	%f11, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f12, %f10, %f11;
	ex2.approx.ftz.f32 	%f9, %f12;
$LBB4__Z15FromLinearColorf:
	mov.f32 	%f13, %f9;
	st.param.f32 	[__cudaretf__Z15FromLinearColorf], %f13;
	ret;
$LDWend__Z15FromLinearColorf:
	} // _Z15FromLinearColorf

	.visible .func (.param .align 16 .b8 __cudaretf__Z25PremultiplyLinearizePixel8PixelRGB[16]) _Z25PremultiplyLinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB[16])
	{
	.reg .f32 %f<47>;
	.reg .pred %p<5>;
	.loc	4	252	0
$LDWbegin__Z25PremultiplyLinearizePixel8PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+12];
	mov.f32 	%f8, %f7;
	.loc	4	254	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	.loc	4	255	0
	mov.f32 	%f10, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f10;
	@!%p1 bra 	$Lt_18_4098;
	.loc	4	234	0
	neg.ftz.f32 	%f11, %f2;
	lg2.approx.ftz.f32 	%f12, %f11;
	mov.f32 	%f13, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f15, %f14;
	neg.ftz.f32 	%f16, %f15;
	bra.uni 	$LDWendi___log2f_195_5;
$Lt_18_4098:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f17, %f2;
	mov.f32 	%f18, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f16, %f19;
$LDWendi___log2f_195_5:
	.loc	4	256	0
	mov.f32 	%f20, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f4, %f20;
	@!%p2 bra 	$Lt_18_4610;
	.loc	4	234	0
	neg.ftz.f32 	%f21, %f4;
	lg2.approx.ftz.f32 	%f22, %f21;
	mov.f32 	%f23, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f25, %f24;
	neg.ftz.f32 	%f26, %f25;
	bra.uni 	$LDWendi___log2f_195_3;
$Lt_18_4610:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f27, %f4;
	mov.f32 	%f28, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f26, %f29;
$LDWendi___log2f_195_3:
	.loc	4	257	0
	mov.f32 	%f30, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f6, %f30;
	@!%p3 bra 	$Lt_18_5122;
	.loc	4	234	0
	neg.ftz.f32 	%f31, %f6;
	lg2.approx.ftz.f32 	%f32, %f31;
	mov.f32 	%f33, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f35, %f34;
	neg.ftz.f32 	%f36, %f35;
	bra.uni 	$LDWendi___log2f_195_1;
$Lt_18_5122:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f37, %f6;
	mov.f32 	%f38, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f39, %f37, %f38;
	ex2.approx.ftz.f32 	%f36, %f39;
$LDWendi___log2f_195_1:
	.loc	4	259	0
	mul.ftz.f32 	%f40, %f36, %f9;
	mul.ftz.f32 	%f41, %f26, %f9;
	mul.ftz.f32 	%f42, %f16, %f9;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+0], %f42;
	mov.f32 	%f43, %f41;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+4], %f43;
	mov.f32 	%f44, %f40;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+8], %f44;
	mov.f32 	%f45, %f9;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+12], %f45;
	ret;
$LDWend__Z25PremultiplyLinearizePixel8PixelRGB:
	} // _Z25PremultiplyLinearizePixel8PixelRGB

	.visible .func (.param .align 16 .b8 __cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16]) _Z29UnpremultiplyUnlinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16])
	{
	.reg .f32 %f<53>;
	.reg .pred %p<6>;
	.loc	4	263	0
$LDWbegin__Z29UnpremultiplyUnlinearizePixel8PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+12];
	mov.f32 	%f8, %f7;
	.loc	4	208	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f9, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f12, %f13;
	@%p1 bra 	$Lt_19_5122;
	.loc	4	213	0
	rcp.approx.ftz.f32 	%f14, %f9;
	mul.ftz.f32 	%f15, %f14, %f6;
	.loc	4	214	0
	mul.ftz.f32 	%f16, %f14, %f4;
	.loc	4	215	0
	mul.ftz.f32 	%f17, %f14, %f2;
	bra.uni 	$Lt_19_4866;
$Lt_19_5122:
	.loc	4	219	0
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	mov.f32 	%f17, 0f00000000;    	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
$Lt_19_4866:
	.loc	4	266	0
	mov.f32 	%f18, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f17, %f18;
	@!%p2 bra 	$Lt_19_5378;
	.loc	4	242	0
	neg.ftz.f32 	%f19, %f17;
	lg2.approx.ftz.f32 	%f20, %f19;
	mov.f32 	%f21, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f22, %f20, %f21;
	ex2.approx.ftz.f32 	%f23, %f22;
	neg.ftz.f32 	%f24, %f23;
	bra.uni 	$LDWendi___log2f_196_5;
$Lt_19_5378:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f25, %f17;
	mov.f32 	%f26, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f27, %f25, %f26;
	ex2.approx.ftz.f32 	%f24, %f27;
$LDWendi___log2f_196_5:
	.loc	4	267	0
	mov.f32 	%f28, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f16, %f28;
	@!%p3 bra 	$Lt_19_5890;
	.loc	4	242	0
	neg.ftz.f32 	%f29, %f16;
	lg2.approx.ftz.f32 	%f30, %f29;
	mov.f32 	%f31, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f32, %f30, %f31;
	ex2.approx.ftz.f32 	%f33, %f32;
	neg.ftz.f32 	%f34, %f33;
	bra.uni 	$LDWendi___log2f_196_3;
$Lt_19_5890:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f35, %f16;
	mov.f32 	%f36, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f37, %f35, %f36;
	ex2.approx.ftz.f32 	%f34, %f37;
$LDWendi___log2f_196_3:
	.loc	4	268	0
	mov.f32 	%f38, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f15, %f38;
	@!%p4 bra 	$Lt_19_6402;
	.loc	4	242	0
	neg.ftz.f32 	%f39, %f15;
	lg2.approx.ftz.f32 	%f40, %f39;
	mov.f32 	%f41, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f42, %f40, %f41;
	ex2.approx.ftz.f32 	%f43, %f42;
	neg.ftz.f32 	%f44, %f43;
	bra.uni 	$LDWendi___log2f_196_1;
$Lt_19_6402:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f45, %f15;
	mov.f32 	%f46, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f47, %f45, %f46;
	ex2.approx.ftz.f32 	%f44, %f47;
$LDWendi___log2f_196_1:
	.loc	4	269	0
	mov.f32 	%f48, %f24;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+0], %f48;
	mov.f32 	%f49, %f34;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+4], %f49;
	mov.f32 	%f50, %f44;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+8], %f50;
	mov.f32 	%f51, %f10;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+12], %f51;
	ret;
$LDWend__Z29UnpremultiplyUnlinearizePixel8PixelRGB:
	} // _Z29UnpremultiplyUnlinearizePixel8PixelRGB

	.visible .func (.param .align 16 .b8 __cudaretf__Z20PremultiplyLinearize6float4[16]) _Z20PremultiplyLinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z20PremultiplyLinearize6float4[16])
	{
	.reg .f32 %f<47>;
	.reg .pred %p<5>;
	.loc	4	277	0
$LDWbegin__Z20PremultiplyLinearize6float4:
	ld.param.f32 	%f1, [__cudaparmf1__Z20PremultiplyLinearize6float4+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z20PremultiplyLinearize6float4+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z20PremultiplyLinearize6float4+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z20PremultiplyLinearize6float4+12];
	mov.f32 	%f8, %f7;
	.loc	4	254	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	.loc	4	255	0
	mov.f32 	%f10, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f10;
	@!%p1 bra 	$Lt_20_4098;
	.loc	4	234	0
	neg.ftz.f32 	%f11, %f2;
	lg2.approx.ftz.f32 	%f12, %f11;
	mov.f32 	%f13, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f15, %f14;
	neg.ftz.f32 	%f16, %f15;
	bra.uni 	$LDWendi___log2f_197_5;
$Lt_20_4098:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f17, %f2;
	mov.f32 	%f18, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f16, %f19;
$LDWendi___log2f_197_5:
	.loc	4	256	0
	mov.f32 	%f20, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f4, %f20;
	@!%p2 bra 	$Lt_20_4610;
	.loc	4	234	0
	neg.ftz.f32 	%f21, %f4;
	lg2.approx.ftz.f32 	%f22, %f21;
	mov.f32 	%f23, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f25, %f24;
	neg.ftz.f32 	%f26, %f25;
	bra.uni 	$LDWendi___log2f_197_3;
$Lt_20_4610:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f27, %f4;
	mov.f32 	%f28, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f26, %f29;
$LDWendi___log2f_197_3:
	.loc	4	257	0
	mov.f32 	%f30, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f6, %f30;
	@!%p3 bra 	$Lt_20_5122;
	.loc	4	234	0
	neg.ftz.f32 	%f31, %f6;
	lg2.approx.ftz.f32 	%f32, %f31;
	mov.f32 	%f33, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f35, %f34;
	neg.ftz.f32 	%f36, %f35;
	bra.uni 	$LDWendi___log2f_197_1;
$Lt_20_5122:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f37, %f6;
	mov.f32 	%f38, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f39, %f37, %f38;
	ex2.approx.ftz.f32 	%f36, %f39;
$LDWendi___log2f_197_1:
	.loc	4	259	0
	mul.ftz.f32 	%f40, %f36, %f9;
	mul.ftz.f32 	%f41, %f26, %f9;
	.loc	4	278	0
	mul.ftz.f32 	%f42, %f16, %f9;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+0], %f42;
	mov.f32 	%f43, %f41;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+4], %f43;
	mov.f32 	%f44, %f40;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+8], %f44;
	mov.f32 	%f45, %f9;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+12], %f45;
	ret;
$LDWend__Z20PremultiplyLinearize6float4:
	} // _Z20PremultiplyLinearize6float4

	.visible .func (.param .align 16 .b8 __cudaretf__Z24UnpremultiplyUnlinearize6float4[16]) _Z24UnpremultiplyUnlinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z24UnpremultiplyUnlinearize6float4[16])
	{
	.reg .f32 %f<53>;
	.reg .pred %p<6>;
	.loc	4	284	0
$LDWbegin__Z24UnpremultiplyUnlinearize6float4:
	ld.param.f32 	%f1, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+12];
	mov.f32 	%f8, %f7;
	.loc	4	208	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f9, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f12, %f13;
	@%p1 bra 	$Lt_21_5122;
	.loc	4	213	0
	rcp.approx.ftz.f32 	%f14, %f9;
	mul.ftz.f32 	%f15, %f14, %f6;
	.loc	4	214	0
	mul.ftz.f32 	%f16, %f14, %f4;
	.loc	4	215	0
	mul.ftz.f32 	%f17, %f14, %f2;
	bra.uni 	$Lt_21_4866;
$Lt_21_5122:
	.loc	4	219	0
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	mov.f32 	%f17, 0f00000000;    	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
$Lt_21_4866:
	.loc	4	266	0
	mov.f32 	%f18, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f17, %f18;
	@!%p2 bra 	$Lt_21_5378;
	.loc	4	242	0
	neg.ftz.f32 	%f19, %f17;
	lg2.approx.ftz.f32 	%f20, %f19;
	mov.f32 	%f21, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f22, %f20, %f21;
	ex2.approx.ftz.f32 	%f23, %f22;
	neg.ftz.f32 	%f24, %f23;
	bra.uni 	$LDWendi___log2f_198_5;
$Lt_21_5378:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f25, %f17;
	mov.f32 	%f26, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f27, %f25, %f26;
	ex2.approx.ftz.f32 	%f24, %f27;
$LDWendi___log2f_198_5:
	.loc	4	267	0
	mov.f32 	%f28, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f16, %f28;
	@!%p3 bra 	$Lt_21_5890;
	.loc	4	242	0
	neg.ftz.f32 	%f29, %f16;
	lg2.approx.ftz.f32 	%f30, %f29;
	mov.f32 	%f31, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f32, %f30, %f31;
	ex2.approx.ftz.f32 	%f33, %f32;
	neg.ftz.f32 	%f34, %f33;
	bra.uni 	$LDWendi___log2f_198_3;
$Lt_21_5890:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f35, %f16;
	mov.f32 	%f36, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f37, %f35, %f36;
	ex2.approx.ftz.f32 	%f34, %f37;
$LDWendi___log2f_198_3:
	.loc	4	268	0
	mov.f32 	%f38, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f15, %f38;
	@!%p4 bra 	$Lt_21_6402;
	.loc	4	242	0
	neg.ftz.f32 	%f39, %f15;
	lg2.approx.ftz.f32 	%f40, %f39;
	mov.f32 	%f41, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f42, %f40, %f41;
	ex2.approx.ftz.f32 	%f43, %f42;
	neg.ftz.f32 	%f44, %f43;
	bra.uni 	$LDWendi___log2f_198_1;
$Lt_21_6402:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f45, %f15;
	mov.f32 	%f46, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f47, %f45, %f46;
	ex2.approx.ftz.f32 	%f44, %f47;
$LDWendi___log2f_198_1:
	.loc	4	285	0
	mov.f32 	%f48, %f24;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+0], %f48;
	mov.f32 	%f49, %f34;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+4], %f49;
	mov.f32 	%f50, %f44;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+8], %f50;
	mov.f32 	%f51, %f10;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+12], %f51;
	ret;
$LDWend__Z24UnpremultiplyUnlinearize6float4:
	} // _Z24UnpremultiplyUnlinearize6float4

	.visible .func (.param .align 16 .b8 __cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_[16]) _Z18SwapComponentOrderI6float4ET_RKS1_ (.param .u64 __cudaparmf1__Z18SwapComponentOrderI6float4ET_RKS1_)
	{
	.reg .u64 %rd<4>;
	.reg .f32 %f<6>;
	.loc	22	264	0
$LDWbegin__Z18SwapComponentOrderI6float4ET_RKS1_:
	ld.param.u64 	%rd1, [__cudaparmf1__Z18SwapComponentOrderI6float4ET_RKS1_];
	mov.s64 	%rd2, %rd1;
	.loc	22	270	0
	ld.f32 	%f1, [%rd2+12];
	st.param.f32 	[__cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_+0], %f1;
	ld.f32 	%f2, [%rd2+8];
	st.param.f32 	[__cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_+4], %f2;
	ld.f32 	%f3, [%rd2+4];
	st.param.f32 	[__cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_+8], %f3;
	ld.f32 	%f4, [%rd2+0];
	st.param.f32 	[__cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_+12], %f4;
	ret;
$LDWend__Z18SwapComponentOrderI6float4ET_RKS1_:
	} // _Z18SwapComponentOrderI6float4ET_RKS1_
	.const .align 4 .b8 kRGB32f_To_601YPbPr[36] = {135,22,153,62,162,69,22,63,213,120,233,61,33,201,44,190,111,155,169,190,0,0,0,63,0,0,0,63,70,94,214,190,232,134,166,189};

	.visible .func (.param .align 16 .b8 __cudaretf__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow[16]) _Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow (.param .align 16 .b8 __cudaparmf1__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow[16], .param .f32 __cudaparmf2__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow, .param .u64 __cudaparmf3__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow, .param .u64 __cudaparmf4__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow, .param .u64 __cudaparmf5__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow)
	{
	.reg .u64 %rd<8>;
	.reg .f32 %f<355>;
	.reg .pred %p<7>;
	.loc	23	36	0
$LDWbegin__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow:
	ld.param.f32 	%f1, [__cudaparmf1__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow];
	mov.f32 	%f10, %f9;
	ld.param.u64 	%rd1, [__cudaparmf3__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf4__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow];
	mov.s64 	%rd4, %rd3;
	ld.param.u64 	%rd5, [__cudaparmf5__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow];
	mov.s64 	%rd6, %rd5;
	.loc	23	41	0
	mov.f32 	%f11, 0f00000000;    	// 0
	max.ftz.f32 	%f12, %f6, %f11;
	.loc	23	42	0
	mov.f32 	%f13, 0f00000000;    	// 0
	max.ftz.f32 	%f14, %f4, %f13;
	.loc	23	43	0
	mov.f32 	%f15, 0f00000000;    	// 0
	max.ftz.f32 	%f16, %f2, %f15;
	mov.f32 	%f17, 0f3a83126f;    	// 0.001
	setp.gt.ftz.f32 	%p1, %f10, %f17;
	@!%p1 bra 	$Lt_23_4098;
	ld.f32 	%f18, [%rd2+0];
	ld.const.f32 	%f19, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f20, %f19, %f4;
	ld.const.f32 	%f21, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f22, %f21, %f6, %f20;
	ld.const.f32 	%f23, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f24, %f23, %f2, %f22;
	setp.gt.ftz.f32 	%p2, %f18, %f24;
	@!%p2 bra 	$Lt_23_4866;
	.loc	23	50	0
	ld.f32 	%f25, [%rd6+48];
	rcp.approx.ftz.f32 	%f26, %f25;
	.loc	23	51	0
	ld.f32 	%f27, [%rd6+60];
	rcp.approx.ftz.f32 	%f28, %f27;
	.loc	23	53	0
	ld.f32 	%f29, [%rd6+72];
	rcp.approx.ftz.f32 	%f30, %f29;
	.loc	23	55	0
	ld.f32 	%f31, [%rd6+84];
	rcp.approx.ftz.f32 	%f32, %f31;
	.loc	23	58	0
	ld.f32 	%f33, [%rd6+52];
	ld.f32 	%f34, [%rd6+64];
	add.ftz.f32 	%f35, %f34, %f33;
	lg2.approx.ftz.f32 	%f36, %f12;
	mul.ftz.f32 	%f37, %f26, %f36;
	ex2.approx.ftz.f32 	%f38, %f37;
	lg2.approx.ftz.f32 	%f39, %f38;
	mul.ftz.f32 	%f40, %f28, %f39;
	ex2.approx.ftz.f32 	%f41, %f40;
	add.ftz.f32 	%f42, %f35, %f41;
	.loc	23	59	0
	ld.f32 	%f43, [%rd6+76];
	add.ftz.f32 	%f44, %f43, %f33;
	lg2.approx.ftz.f32 	%f45, %f14;
	mul.ftz.f32 	%f46, %f26, %f45;
	ex2.approx.ftz.f32 	%f47, %f46;
	lg2.approx.ftz.f32 	%f48, %f47;
	mul.ftz.f32 	%f49, %f30, %f48;
	ex2.approx.ftz.f32 	%f50, %f49;
	add.ftz.f32 	%f51, %f44, %f50;
	.loc	23	60	0
	ld.f32 	%f52, [%rd6+88];
	add.ftz.f32 	%f53, %f52, %f33;
	lg2.approx.ftz.f32 	%f54, %f16;
	mul.ftz.f32 	%f55, %f26, %f54;
	ex2.approx.ftz.f32 	%f56, %f55;
	lg2.approx.ftz.f32 	%f57, %f56;
	mul.ftz.f32 	%f58, %f32, %f57;
	ex2.approx.ftz.f32 	%f59, %f58;
	add.ftz.f32 	%f60, %f53, %f59;
	.loc	23	63	0
	ld.f32 	%f61, [%rd6+56];
	ld.f32 	%f62, [%rd6+68];
	mul.ftz.f32 	%f63, %f62, %f61;
	mul.ftz.f32 	%f12, %f42, %f63;
	.loc	23	64	0
	ld.f32 	%f64, [%rd6+80];
	mul.ftz.f32 	%f65, %f64, %f61;
	mul.ftz.f32 	%f14, %f51, %f65;
	.loc	23	65	0
	ld.f32 	%f66, [%rd6+92];
	mul.ftz.f32 	%f67, %f66, %f61;
	mul.ftz.f32 	%f16, %f60, %f67;
	bra.uni 	$Lt_23_4610;
$Lt_23_4866:
	ld.f32 	%f68, [%rd2+8];
	setp.lt.ftz.f32 	%p3, %f68, %f24;
	@!%p3 bra 	$Lt_23_5378;
	.loc	23	70	0
	ld.f32 	%f69, [%rd4+48];
	rcp.approx.ftz.f32 	%f70, %f69;
	.loc	23	71	0
	ld.f32 	%f71, [%rd4+60];
	rcp.approx.ftz.f32 	%f72, %f71;
	.loc	23	73	0
	ld.f32 	%f73, [%rd4+72];
	rcp.approx.ftz.f32 	%f74, %f73;
	.loc	23	75	0
	ld.f32 	%f75, [%rd4+84];
	rcp.approx.ftz.f32 	%f76, %f75;
	.loc	23	78	0
	ld.f32 	%f77, [%rd4+52];
	ld.f32 	%f78, [%rd4+64];
	add.ftz.f32 	%f79, %f78, %f77;
	lg2.approx.ftz.f32 	%f80, %f12;
	mul.ftz.f32 	%f81, %f70, %f80;
	ex2.approx.ftz.f32 	%f82, %f81;
	lg2.approx.ftz.f32 	%f83, %f82;
	mul.ftz.f32 	%f84, %f72, %f83;
	ex2.approx.ftz.f32 	%f85, %f84;
	add.ftz.f32 	%f86, %f79, %f85;
	.loc	23	79	0
	ld.f32 	%f87, [%rd4+76];
	add.ftz.f32 	%f88, %f87, %f77;
	lg2.approx.ftz.f32 	%f89, %f14;
	mul.ftz.f32 	%f90, %f70, %f89;
	ex2.approx.ftz.f32 	%f91, %f90;
	lg2.approx.ftz.f32 	%f92, %f91;
	mul.ftz.f32 	%f93, %f74, %f92;
	ex2.approx.ftz.f32 	%f94, %f93;
	add.ftz.f32 	%f95, %f88, %f94;
	.loc	23	80	0
	ld.f32 	%f96, [%rd4+88];
	add.ftz.f32 	%f97, %f96, %f77;
	lg2.approx.ftz.f32 	%f98, %f16;
	mul.ftz.f32 	%f99, %f70, %f98;
	ex2.approx.ftz.f32 	%f100, %f99;
	lg2.approx.ftz.f32 	%f101, %f100;
	mul.ftz.f32 	%f102, %f76, %f101;
	ex2.approx.ftz.f32 	%f103, %f102;
	add.ftz.f32 	%f104, %f97, %f103;
	.loc	23	83	0
	ld.f32 	%f105, [%rd4+56];
	ld.f32 	%f106, [%rd4+68];
	mul.ftz.f32 	%f107, %f106, %f105;
	mul.ftz.f32 	%f12, %f86, %f107;
	.loc	23	84	0
	ld.f32 	%f108, [%rd4+80];
	mul.ftz.f32 	%f109, %f108, %f105;
	mul.ftz.f32 	%f14, %f95, %f109;
	.loc	23	85	0
	ld.f32 	%f110, [%rd4+92];
	mul.ftz.f32 	%f111, %f110, %f105;
	mul.ftz.f32 	%f16, %f104, %f111;
	bra.uni 	$Lt_23_5122;
$Lt_23_5378:
	ld.f32 	%f112, [%rd2+4];
	ld.f32 	%f113, [%rd6+0];
	ld.f32 	%f114, [%rd6+12];
	ld.f32 	%f115, [%rd6+24];
	ld.f32 	%f116, [%rd6+36];
	ld.f32 	%f117, [%rd6+4];
	ld.f32 	%f118, [%rd6+16];
	ld.f32 	%f119, [%rd6+28];
	ld.f32 	%f120, [%rd6+40];
	ld.f32 	%f121, [%rd6+8];
	ld.f32 	%f122, [%rd6+20];
	ld.f32 	%f123, [%rd6+32];
	ld.f32 	%f124, [%rd6+44];
	add.ftz.f32 	%f125, %f18, %f112;
	setp.lt.ftz.f32 	%p4, %f24, %f125;
	@!%p4 bra 	$Lt_23_5890;
	.loc	23	90	0
	sub.ftz.f32 	%f126, %f24, %f18;
	.loc	23	93	0
	div.approx.ftz.f32 	%f127, %f126, %f112;
	mov.f32 	%f128, 0f3f800000;   	// 1
	sub.ftz.f32 	%f129, %f128, %f127;
	ld.f32 	%f130, [%rd6+48];
	mul.ftz.f32 	%f131, %f130, %f127;
	fma.rn.ftz.f32 	%f132, %f113, %f129, %f131;
	.loc	23	94	0
	ld.f32 	%f133, [%rd6+60];
	mul.ftz.f32 	%f134, %f133, %f127;
	fma.rn.ftz.f32 	%f135, %f114, %f129, %f134;
	.loc	23	95	0
	ld.f32 	%f136, [%rd6+72];
	mul.ftz.f32 	%f137, %f136, %f127;
	fma.rn.ftz.f32 	%f138, %f115, %f129, %f137;
	.loc	23	96	0
	ld.f32 	%f139, [%rd6+84];
	mul.ftz.f32 	%f140, %f139, %f127;
	fma.rn.ftz.f32 	%f141, %f116, %f129, %f140;
	.loc	23	105	0
	ld.f32 	%f142, [%rd6+52];
	mul.ftz.f32 	%f143, %f142, %f127;
	fma.rn.ftz.f32 	%f144, %f117, %f129, %f143;
	.loc	23	111	0
	rcp.approx.ftz.f32 	%f145, %f132;
	ld.f32 	%f146, [%rd6+64];
	mul.ftz.f32 	%f147, %f146, %f127;
	fma.rn.ftz.f32 	%f148, %f118, %f129, %f147;
	add.ftz.f32 	%f149, %f144, %f148;
	lg2.approx.ftz.f32 	%f150, %f12;
	mul.ftz.f32 	%f151, %f145, %f150;
	ex2.approx.ftz.f32 	%f152, %f151;
	lg2.approx.ftz.f32 	%f153, %f152;
	rcp.approx.ftz.f32 	%f154, %f135;
	mul.ftz.f32 	%f155, %f153, %f154;
	ex2.approx.ftz.f32 	%f156, %f155;
	add.ftz.f32 	%f157, %f149, %f156;
	.loc	23	112	0
	ld.f32 	%f158, [%rd6+76];
	mul.ftz.f32 	%f159, %f158, %f127;
	fma.rn.ftz.f32 	%f160, %f119, %f129, %f159;
	add.ftz.f32 	%f161, %f144, %f160;
	lg2.approx.ftz.f32 	%f162, %f14;
	mul.ftz.f32 	%f163, %f145, %f162;
	ex2.approx.ftz.f32 	%f164, %f163;
	lg2.approx.ftz.f32 	%f165, %f164;
	rcp.approx.ftz.f32 	%f166, %f138;
	mul.ftz.f32 	%f167, %f165, %f166;
	ex2.approx.ftz.f32 	%f168, %f167;
	add.ftz.f32 	%f169, %f161, %f168;
	.loc	23	113	0
	ld.f32 	%f170, [%rd6+88];
	mul.ftz.f32 	%f171, %f170, %f127;
	fma.rn.ftz.f32 	%f172, %f120, %f129, %f171;
	add.ftz.f32 	%f173, %f144, %f172;
	lg2.approx.ftz.f32 	%f174, %f16;
	mul.ftz.f32 	%f175, %f145, %f174;
	ex2.approx.ftz.f32 	%f176, %f175;
	lg2.approx.ftz.f32 	%f177, %f176;
	rcp.approx.ftz.f32 	%f178, %f141;
	mul.ftz.f32 	%f179, %f177, %f178;
	ex2.approx.ftz.f32 	%f180, %f179;
	add.ftz.f32 	%f181, %f173, %f180;
	.loc	23	115	0
	ld.f32 	%f182, [%rd6+56];
	mul.ftz.f32 	%f183, %f182, %f127;
	fma.rn.ftz.f32 	%f184, %f121, %f129, %f183;
	.loc	23	121	0
	ld.f32 	%f185, [%rd6+68];
	mul.ftz.f32 	%f186, %f185, %f127;
	fma.rn.ftz.f32 	%f187, %f122, %f129, %f186;
	mul.ftz.f32 	%f188, %f184, %f187;
	mul.ftz.f32 	%f12, %f157, %f188;
	.loc	23	122	0
	ld.f32 	%f189, [%rd6+80];
	mul.ftz.f32 	%f190, %f189, %f127;
	fma.rn.ftz.f32 	%f191, %f123, %f129, %f190;
	mul.ftz.f32 	%f192, %f184, %f191;
	mul.ftz.f32 	%f14, %f169, %f192;
	.loc	23	123	0
	ld.f32 	%f193, [%rd6+92];
	mul.ftz.f32 	%f194, %f193, %f127;
	fma.rn.ftz.f32 	%f195, %f124, %f129, %f194;
	mul.ftz.f32 	%f196, %f184, %f195;
	mul.ftz.f32 	%f16, %f181, %f196;
	bra.uni 	$Lt_23_5634;
$Lt_23_5890:
	ld.f32 	%f197, [%rd2+12];
	sub.ftz.f32 	%f198, %f68, %f197;
	setp.lt.ftz.f32 	%p5, %f198, %f24;
	@!%p5 bra 	$Lt_23_6402;
	.loc	23	128	0
	sub.ftz.f32 	%f199, %f24, %f198;
	.loc	23	131	0
	div.approx.ftz.f32 	%f200, %f199, %f197;
	mov.f32 	%f201, 0f3f800000;   	// 1
	sub.ftz.f32 	%f202, %f201, %f200;
	ld.f32 	%f203, [%rd4+48];
	mul.ftz.f32 	%f204, %f203, %f200;
	fma.rn.ftz.f32 	%f205, %f113, %f202, %f204;
	.loc	23	132	0
	ld.f32 	%f206, [%rd4+60];
	mul.ftz.f32 	%f207, %f206, %f200;
	fma.rn.ftz.f32 	%f208, %f114, %f202, %f207;
	.loc	23	133	0
	ld.f32 	%f209, [%rd4+72];
	mul.ftz.f32 	%f210, %f209, %f200;
	fma.rn.ftz.f32 	%f211, %f115, %f202, %f210;
	.loc	23	134	0
	ld.f32 	%f212, [%rd4+84];
	mul.ftz.f32 	%f213, %f212, %f200;
	fma.rn.ftz.f32 	%f214, %f116, %f202, %f213;
	.loc	23	143	0
	ld.f32 	%f215, [%rd4+52];
	mul.ftz.f32 	%f216, %f215, %f200;
	fma.rn.ftz.f32 	%f217, %f117, %f202, %f216;
	.loc	23	149	0
	rcp.approx.ftz.f32 	%f218, %f205;
	ld.f32 	%f219, [%rd4+64];
	mul.ftz.f32 	%f220, %f219, %f200;
	fma.rn.ftz.f32 	%f221, %f118, %f202, %f220;
	add.ftz.f32 	%f222, %f217, %f221;
	lg2.approx.ftz.f32 	%f223, %f12;
	mul.ftz.f32 	%f224, %f218, %f223;
	ex2.approx.ftz.f32 	%f225, %f224;
	lg2.approx.ftz.f32 	%f226, %f225;
	rcp.approx.ftz.f32 	%f227, %f208;
	mul.ftz.f32 	%f228, %f226, %f227;
	ex2.approx.ftz.f32 	%f229, %f228;
	add.ftz.f32 	%f230, %f222, %f229;
	.loc	23	150	0
	ld.f32 	%f231, [%rd4+76];
	mul.ftz.f32 	%f232, %f231, %f200;
	fma.rn.ftz.f32 	%f233, %f119, %f202, %f232;
	add.ftz.f32 	%f234, %f217, %f233;
	lg2.approx.ftz.f32 	%f235, %f14;
	mul.ftz.f32 	%f236, %f218, %f235;
	ex2.approx.ftz.f32 	%f237, %f236;
	lg2.approx.ftz.f32 	%f238, %f237;
	rcp.approx.ftz.f32 	%f239, %f211;
	mul.ftz.f32 	%f240, %f238, %f239;
	ex2.approx.ftz.f32 	%f241, %f240;
	add.ftz.f32 	%f242, %f234, %f241;
	.loc	23	151	0
	ld.f32 	%f243, [%rd4+88];
	mul.ftz.f32 	%f244, %f243, %f200;
	fma.rn.ftz.f32 	%f245, %f120, %f202, %f244;
	add.ftz.f32 	%f246, %f217, %f245;
	lg2.approx.ftz.f32 	%f247, %f16;
	mul.ftz.f32 	%f248, %f218, %f247;
	ex2.approx.ftz.f32 	%f249, %f248;
	lg2.approx.ftz.f32 	%f250, %f249;
	rcp.approx.ftz.f32 	%f251, %f214;
	mul.ftz.f32 	%f252, %f250, %f251;
	ex2.approx.ftz.f32 	%f253, %f252;
	add.ftz.f32 	%f254, %f246, %f253;
	.loc	23	153	0
	ld.f32 	%f255, [%rd4+56];
	mul.ftz.f32 	%f256, %f255, %f200;
	fma.rn.ftz.f32 	%f257, %f121, %f202, %f256;
	.loc	23	159	0
	ld.f32 	%f258, [%rd4+68];
	mul.ftz.f32 	%f259, %f258, %f200;
	fma.rn.ftz.f32 	%f260, %f122, %f202, %f259;
	mul.ftz.f32 	%f261, %f257, %f260;
	mul.ftz.f32 	%f12, %f230, %f261;
	.loc	23	160	0
	ld.f32 	%f262, [%rd4+80];
	mul.ftz.f32 	%f263, %f262, %f200;
	fma.rn.ftz.f32 	%f264, %f123, %f202, %f263;
	mul.ftz.f32 	%f265, %f257, %f264;
	mul.ftz.f32 	%f14, %f242, %f265;
	.loc	23	161	0
	ld.f32 	%f266, [%rd4+92];
	mul.ftz.f32 	%f267, %f266, %f200;
	fma.rn.ftz.f32 	%f268, %f124, %f202, %f267;
	mul.ftz.f32 	%f269, %f257, %f268;
	mul.ftz.f32 	%f16, %f254, %f269;
	bra.uni 	$Lt_23_6146;
$Lt_23_6402:
	.loc	23	166	0
	rcp.approx.ftz.f32 	%f270, %f113;
	.loc	23	167	0
	rcp.approx.ftz.f32 	%f271, %f114;
	.loc	23	169	0
	rcp.approx.ftz.f32 	%f272, %f115;
	.loc	23	171	0
	rcp.approx.ftz.f32 	%f273, %f116;
	.loc	23	174	0
	add.ftz.f32 	%f274, %f117, %f118;
	lg2.approx.ftz.f32 	%f275, %f12;
	mul.ftz.f32 	%f276, %f270, %f275;
	ex2.approx.ftz.f32 	%f277, %f276;
	lg2.approx.ftz.f32 	%f278, %f277;
	mul.ftz.f32 	%f279, %f271, %f278;
	ex2.approx.ftz.f32 	%f280, %f279;
	add.ftz.f32 	%f281, %f274, %f280;
	.loc	23	175	0
	add.ftz.f32 	%f282, %f117, %f119;
	lg2.approx.ftz.f32 	%f283, %f14;
	mul.ftz.f32 	%f284, %f270, %f283;
	ex2.approx.ftz.f32 	%f285, %f284;
	lg2.approx.ftz.f32 	%f286, %f285;
	mul.ftz.f32 	%f287, %f272, %f286;
	ex2.approx.ftz.f32 	%f288, %f287;
	add.ftz.f32 	%f289, %f282, %f288;
	.loc	23	176	0
	add.ftz.f32 	%f290, %f117, %f120;
	lg2.approx.ftz.f32 	%f291, %f16;
	mul.ftz.f32 	%f292, %f270, %f291;
	ex2.approx.ftz.f32 	%f293, %f292;
	lg2.approx.ftz.f32 	%f294, %f293;
	mul.ftz.f32 	%f295, %f273, %f294;
	ex2.approx.ftz.f32 	%f296, %f295;
	add.ftz.f32 	%f297, %f290, %f296;
	.loc	23	179	0
	mul.ftz.f32 	%f298, %f121, %f122;
	mul.ftz.f32 	%f12, %f281, %f298;
	.loc	23	180	0
	mul.ftz.f32 	%f299, %f121, %f123;
	mul.ftz.f32 	%f14, %f289, %f299;
	.loc	23	181	0
	mul.ftz.f32 	%f300, %f121, %f124;
	mul.ftz.f32 	%f16, %f297, %f300;
$Lt_23_6146:
$Lt_23_5634:
$Lt_23_5122:
$Lt_23_4610:
	.loc	23	185	0
	ld.f32 	%f301, [%rd4+0];
	rcp.approx.ftz.f32 	%f302, %f301;
	.loc	23	186	0
	ld.f32 	%f303, [%rd4+12];
	rcp.approx.ftz.f32 	%f304, %f303;
	.loc	23	188	0
	ld.f32 	%f305, [%rd4+24];
	rcp.approx.ftz.f32 	%f306, %f305;
	.loc	23	190	0
	ld.f32 	%f307, [%rd4+36];
	rcp.approx.ftz.f32 	%f308, %f307;
	.loc	23	193	0
	ld.f32 	%f309, [%rd4+4];
	ld.f32 	%f310, [%rd4+16];
	add.ftz.f32 	%f311, %f310, %f309;
	lg2.approx.ftz.f32 	%f312, %f12;
	mul.ftz.f32 	%f313, %f302, %f312;
	ex2.approx.ftz.f32 	%f314, %f313;
	lg2.approx.ftz.f32 	%f315, %f314;
	mul.ftz.f32 	%f316, %f304, %f315;
	ex2.approx.ftz.f32 	%f317, %f316;
	add.ftz.f32 	%f318, %f311, %f317;
	.loc	23	194	0
	ld.f32 	%f319, [%rd4+28];
	add.ftz.f32 	%f320, %f319, %f309;
	lg2.approx.ftz.f32 	%f321, %f14;
	mul.ftz.f32 	%f322, %f302, %f321;
	ex2.approx.ftz.f32 	%f323, %f322;
	lg2.approx.ftz.f32 	%f324, %f323;
	mul.ftz.f32 	%f325, %f306, %f324;
	ex2.approx.ftz.f32 	%f326, %f325;
	add.ftz.f32 	%f327, %f320, %f326;
	.loc	23	195	0
	ld.f32 	%f328, [%rd4+40];
	add.ftz.f32 	%f329, %f328, %f309;
	lg2.approx.ftz.f32 	%f330, %f16;
	mul.ftz.f32 	%f331, %f302, %f330;
	ex2.approx.ftz.f32 	%f332, %f331;
	lg2.approx.ftz.f32 	%f333, %f332;
	mul.ftz.f32 	%f334, %f308, %f333;
	ex2.approx.ftz.f32 	%f335, %f334;
	add.ftz.f32 	%f336, %f329, %f335;
	.loc	23	198	0
	ld.f32 	%f337, [%rd4+8];
	ld.f32 	%f338, [%rd4+20];
	mul.ftz.f32 	%f339, %f338, %f337;
	mul.ftz.f32 	%f340, %f318, %f339;
	.loc	23	199	0
	ld.f32 	%f341, [%rd4+32];
	mul.ftz.f32 	%f342, %f341, %f337;
	mul.ftz.f32 	%f343, %f327, %f342;
	.loc	23	200	0
	ld.f32 	%f344, [%rd4+44];
	mul.ftz.f32 	%f345, %f344, %f337;
	mul.ftz.f32 	%f346, %f336, %f345;
	.loc	23	203	0
	sub.ftz.f32 	%f347, %f340, %f6;
	fma.rn.ftz.f32 	%f12, %f10, %f347, %f6;
	.loc	23	204	0
	sub.ftz.f32 	%f348, %f343, %f4;
	fma.rn.ftz.f32 	%f14, %f10, %f348, %f4;
	.loc	23	205	0
	sub.ftz.f32 	%f349, %f346, %f2;
	fma.rn.ftz.f32 	%f16, %f10, %f349, %f2;
$Lt_23_4098:
	.loc	23	208	0
	mov.f32 	%f350, %f16;
	st.param.f32 	[__cudaretf__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow+0], %f350;
	mov.f32 	%f351, %f14;
	st.param.f32 	[__cudaretf__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow+4], %f351;
	mov.f32 	%f352, %f12;
	st.param.f32 	[__cudaretf__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow+8], %f352;
	mov.f32 	%f353, %f8;
	st.param.f32 	[__cudaretf__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow+12], %f353;
	ret;
$LDWend__Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow:
	} // _Z13CalculateBGRA8PixelRGBfRK38RGBColorCorrectorParameters_TonalRangeRK43RGBColorCorrectorParameters_MasterHighlightRK41RGBColorCorrectorParameters_MidtoneShadow

	.visible .func (.param .f32 __cudaretf__Z4LERPIfET_S0_S0_S0_) _Z4LERPIfET_S0_S0_S0_ (.param .f32 __cudaparmf1__Z4LERPIfET_S0_S0_S0_, .param .f32 __cudaparmf2__Z4LERPIfET_S0_S0_S0_, .param .f32 __cudaparmf3__Z4LERPIfET_S0_S0_S0_)
	{
	.reg .f32 %f<10>;
	.loc	24	78	0
$LDWbegin__Z4LERPIfET_S0_S0_S0_:
	ld.param.f32 	%f1, [__cudaparmf1__Z4LERPIfET_S0_S0_S0_];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z4LERPIfET_S0_S0_S0_];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf3__Z4LERPIfET_S0_S0_S0_];
	mov.f32 	%f6, %f5;
	.loc	24	79	0
	sub.ftz.f32 	%f7, %f4, %f2;
	fma.rn.ftz.f32 	%f8, %f6, %f7, %f2;
	st.param.f32 	[__cudaretf__Z4LERPIfET_S0_S0_S0_], %f8;
	ret;
$LDWend__Z4LERPIfET_S0_S0_S0_:
	} // _Z4LERPIfET_S0_S0_S0_

	.visible .func (.param .f32 __cudaretf__Z6Read2DIfET_PKS0_iii) _Z6Read2DIfET_PKS0_iii (.param .u64 __cudaparmf1__Z6Read2DIfET_PKS0_iii, .param .s32 __cudaparmf2__Z6Read2DIfET_PKS0_iii, .param .s32 __cudaparmf3__Z6Read2DIfET_PKS0_iii, .param .s32 __cudaparmf4__Z6Read2DIfET_PKS0_iii)
	{
	.reg .u32 %r<10>;
	.reg .u64 %rd<7>;
	.reg .f32 %f<3>;
	.loc	21	114	0
$LDWbegin__Z6Read2DIfET_PKS0_iii:
	ld.param.u64 	%rd1, [__cudaparmf1__Z6Read2DIfET_PKS0_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf2__Z6Read2DIfET_PKS0_iii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf3__Z6Read2DIfET_PKS0_iii];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf4__Z6Read2DIfET_PKS0_iii];
	mov.s32 	%r6, %r5;
	.loc	21	115	0
	mul.lo.s32 	%r7, %r2, %r6;
	add.s32 	%r8, %r4, %r7;
	cvt.s64.s32 	%rd3, %r8;
	mul.wide.s32 	%rd4, %r8, 4;
	add.u64 	%rd5, %rd2, %rd4;
	ld.f32 	%f1, [%rd5+0];
	st.param.f32 	[__cudaretf__Z6Read2DIfET_PKS0_iii], %f1;
	ret;
$LDWend__Z6Read2DIfET_PKS0_iii:
	} // _Z6Read2DIfET_PKS0_iii

	.entry RGBColorCorrector_MaskKernel (
		.param .u64 __cudaparm_RGBColorCorrector_MaskKernel_inImage,
		.param .s32 __cudaparm_RGBColorCorrector_MaskKernel_inPitch,
		.param .u64 __cudaparm_RGBColorCorrector_MaskKernel_inSecondaryMask,
		.param .s32 __cudaparm_RGBColorCorrector_MaskKernel_inSecondaryPitch,
		.param .u32 __cudaparm_RGBColorCorrector_MaskKernel_inDeviceFormat,
		.param .s32 __cudaparm_RGBColorCorrector_MaskKernel_inWidth,
		.param .s32 __cudaparm_RGBColorCorrector_MaskKernel_inHeight)
	{
	.reg .u32 %r<31>;
	.reg .u64 %rd<13>;
	.reg .f32 %f<5>;
	.reg .pred %p<5>;
	.loc	23	221	0
$LDWbegin_RGBColorCorrector_MaskKernel:
	.loc	23	224	0
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_RGBColorCorrector_MaskKernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_RGBColorCorrector_MaskKernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_26_3842;
	ld.param.u64 	%rd1, [__cudaparm_RGBColorCorrector_MaskKernel_inSecondaryMask];
	mov.u64 	%rd2, 0;
	setp.eq.u64 	%p2, %rd1, %rd2;
	@%p2 bra 	$Lt_26_4610;
	.loc	23	229	0
	ld.param.s32 	%r19, [__cudaparm_RGBColorCorrector_MaskKernel_inSecondaryPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd3, %r21;
	mul.wide.s32 	%rd4, %r21, 4;
	add.u64 	%rd5, %rd1, %rd4;
	ld.global.f32 	%f1, [%rd5+0];
	bra.uni 	$Lt_26_4354;
$Lt_26_4610:
	mov.f32 	%f1, 0f3f800000;     	// 1
$Lt_26_4354:
	ld.param.s32 	%r22, [__cudaparm_RGBColorCorrector_MaskKernel_inPitch];
	mul.lo.s32 	%r23, %r22, %r10;
	add.s32 	%r24, %r8, %r23;
	cvt.s64.s32 	%rd6, %r24;
	ld.param.u64 	%rd7, [__cudaparm_RGBColorCorrector_MaskKernel_inImage];
	ld.param.s32 	%r25, [__cudaparm_RGBColorCorrector_MaskKernel_inDeviceFormat];
	mov.u32 	%r26, 0;
	setp.ne.s32 	%p3, %r25, %r26;
	@%p3 bra 	$Lt_26_5122;
	.loc	21	126	0
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f1;
	mov.b32		%r27, %b1; }
	mov.s32 	%r28, %r27;
	mul.lo.u64 	%rd8, %rd6, 8;
	add.u64 	%rd9, %rd7, %rd8;
	mov.f32 	%f2, 0f3f800000;     	// 1
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f2;
	mov.b32		%r29, %b1; }
	st.global.v4.u16 	[%rd9+0], {%r28,%r28,%r28,%r29};
	.loc	23	233	0
	bra.uni 	$Lt_26_4866;
$Lt_26_5122:
	.loc	21	126	0
	mul.lo.u64 	%rd10, %rd6, 16;
	add.u64 	%rd11, %rd7, %rd10;
	mov.f32 	%f3, 0f3f800000;     	// 1
	st.global.v4.f32 	[%rd11+0], {%f1,%f1,%f1,%f3};
$Lt_26_4866:
$Lt_26_3842:
	.loc	23	235	0
	exit;
$LDWend_RGBColorCorrector_MaskKernel:
	} // RGBColorCorrector_MaskKernel

	.entry RGBColorCorrector_TonalRangeKernel (
		.param .u64 __cudaparm_RGBColorCorrector_TonalRangeKernel_inImage,
		.param .s32 __cudaparm_RGBColorCorrector_TonalRangeKernel_inPitch,
		.param .u64 __cudaparm_RGBColorCorrector_TonalRangeKernel_inSecondaryMask,
		.param .s32 __cudaparm_RGBColorCorrector_TonalRangeKernel_inSecondaryPitch,
		.param .u32 __cudaparm_RGBColorCorrector_TonalRangeKernel_inDeviceFormat,
		.param .s32 __cudaparm_RGBColorCorrector_TonalRangeKernel_inWidth,
		.param .s32 __cudaparm_RGBColorCorrector_TonalRangeKernel_inHeight,
		.param .align 4 .b8 __cudaparm_RGBColorCorrector_TonalRangeKernel_inTonalRangeParameters[16])
	{
	.reg .u32 %r<34>;
	.reg .u64 %rd<17>;
	.reg .f32 %f<42>;
	.reg .pred %p<13>;
	.loc	23	248	0
$LDWbegin_RGBColorCorrector_TonalRangeKernel:
	.loc	23	251	0
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_RGBColorCorrector_TonalRangeKernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_RGBColorCorrector_TonalRangeKernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_27_11778;
	ld.param.s32 	%r19, [__cudaparm_RGBColorCorrector_TonalRangeKernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_RGBColorCorrector_TonalRangeKernel_inPitch];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_RGBColorCorrector_TonalRangeKernel_inImage];
	@!%p2 bra 	$Lt_27_12546;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	23	254	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_27_12290;
$Lt_27_12546:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_27_12290:
	.loc	20	35	0
	ld.const.f32 	%f5, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f6, %f5, %f2;
	ld.const.f32 	%f7, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f8, %f7, %f3, %f6;
	ld.const.f32 	%f9, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f10, %f9, %f1, %f8;
	ld.param.f32 	%f11, [__cudaparm_RGBColorCorrector_TonalRangeKernel_inTonalRangeParameters+4];
	ld.param.f32 	%f12, [__cudaparm_RGBColorCorrector_TonalRangeKernel_inTonalRangeParameters+0];
	setp.le.ftz.f32 	%p3, %f10, %f12;
	@!%p3 bra 	$Lt_27_8450;
	.loc	20	37	0
	mov.f32 	%f13, 0f3f800000;    	// 1
	bra.uni 	$LDWendi_fdividef_204_3;
$Lt_27_8450:
	.loc	20	39	0
	add.ftz.f32 	%f14, %f11, %f12;
	setp.ge.ftz.f32 	%p4, %f10, %f14;
	@!%p4 bra 	$Lt_27_8706;
	.loc	20	41	0
	mov.f32 	%f13, 0f00000000;    	// 0
	bra.uni 	$LDWendi_fdividef_204_3;
$Lt_27_8706:
	.loc	20	45	0
	sub.ftz.f32 	%f15, %f10, %f12;
	div.approx.ftz.f32 	%f13, %f15, %f11;
$LDWendi_fdividef_204_3:
	.loc	20	53	0
	ld.param.f32 	%f16, [__cudaparm_RGBColorCorrector_TonalRangeKernel_inTonalRangeParameters+12];
	ld.param.f32 	%f17, [__cudaparm_RGBColorCorrector_TonalRangeKernel_inTonalRangeParameters+8];
	sub.ftz.f32 	%f18, %f17, %f16;
	setp.gt.ftz.f32 	%p5, %f18, %f10;
	@!%p5 bra 	$Lt_27_8962;
	.loc	20	55	0
	mov.f32 	%f19, 0f00000000;    	// 0
	bra.uni 	$LDWendi_fdividef_204_1;
$Lt_27_8962:
	.loc	20	57	0
	setp.gt.ftz.f32 	%p6, %f10, %f17;
	@!%p6 bra 	$Lt_27_9218;
	.loc	20	59	0
	mov.f32 	%f19, 0f3f800000;    	// 1
	bra.uni 	$LDWendi_fdividef_204_1;
$Lt_27_9218:
	.loc	20	63	0
	sub.ftz.f32 	%f20, %f10, %f18;
	div.approx.ftz.f32 	%f19, %f20, %f16;
$LDWendi_fdividef_204_1:
	.loc	23	264	0
	ld.param.u64 	%rd7, [__cudaparm_RGBColorCorrector_TonalRangeKernel_inSecondaryMask];
	mov.u64 	%rd8, 0;
	setp.eq.u64 	%p7, %rd7, %rd8;
	@%p7 bra 	$Lt_27_13058;
	.loc	23	267	0
	ld.param.s32 	%r28, [__cudaparm_RGBColorCorrector_TonalRangeKernel_inSecondaryPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd9, %r30;
	mul.wide.s32 	%rd10, %r30, 4;
	add.u64 	%rd11, %rd7, %rd10;
	ld.global.f32 	%f21, [%rd11+0];
	bra.uni 	$Lt_27_12802;
$Lt_27_13058:
	mov.f32 	%f21, 0f3f800000;    	// 1
$Lt_27_12802:
	setp.lt.ftz.f32 	%p8, %f16, %f11;
	mov.f32 	%f22, 0f40000000;    	// 2
	mov.f32 	%f23, 0f3f800000;    	// 1
	selp.f32 	%f24, %f22, %f23, %p8;
	div.approx.ftz.f32 	%f25, %f13, %f24;
	mul.ftz.f32 	%f26, %f21, %f25;
	mov.f32 	%f27, 0f3f800000;    	// 1
	setp.eq.ftz.f32 	%p9, %f26, %f27;
	@!%p9 bra 	$Lt_27_13570;
	mov.f32 	%f28, 0f00000000;    	// 0
	bra.uni 	$Lt_27_13314;
$Lt_27_13570:
	sub.ftz.f32 	%f29, %f24, %f13;
	sub.ftz.f32 	%f30, %f29, %f19;
	div.approx.ftz.f32 	%f31, %f30, %f24;
	mul.ftz.f32 	%f32, %f21, %f31;
	mov.f32 	%f33, 0f3f800000;    	// 1
	setp.eq.ftz.f32 	%p10, %f32, %f33;
	@!%p10 bra 	$Lt_27_14082;
	mov.f32 	%f28, 0f3f000000;    	// 0.5
	bra.uni 	$Lt_27_13826;
$Lt_27_14082:
	div.approx.ftz.f32 	%f34, %f19, %f24;
	mul.ftz.f32 	%f35, %f21, %f34;
	mov.f32 	%f36, 0f3f800000;    	// 1
	setp.eq.ftz.f32 	%p11, %f35, %f36;
	@!%p11 bra 	$Lt_27_14594;
	mov.f32 	%f28, 0f3f800000;    	// 1
	bra.uni 	$Lt_27_14338;
$Lt_27_14594:
	.loc	23	292	0
	mov.f32 	%f37, 0f3f000000;    	// 0.5
	mul.ftz.f32 	%f38, %f32, %f37;
	add.ftz.f32 	%f39, %f35, %f38;
	.loc	23	294	0
	mov.f32 	%f28, %f39;
$Lt_27_14338:
$Lt_27_13826:
$Lt_27_13314:
	@!%p2 bra 	$Lt_27_15106;
	.loc	21	126	0
	mul.lo.u64 	%rd12, %rd1, 8;
	add.u64 	%rd13, %rd2, %rd12;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f28;
	mov.b32		%r31, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r32, %b1; }
	st.global.v4.u16 	[%rd13+0], {%r31,%r31,%r31,%r32};
	.loc	23	297	0
	bra.uni 	$Lt_27_14850;
$Lt_27_15106:
	.loc	21	126	0
	mul.lo.u64 	%rd14, %rd1, 16;
	add.u64 	%rd15, %rd2, %rd14;
	mov.f32 	%f40, %f28;
	st.global.v4.f32 	[%rd15+0], {%f40,%f40,%f40,%f4};
$Lt_27_14850:
$Lt_27_11778:
	.loc	23	299	0
	exit;
$LDWend_RGBColorCorrector_TonalRangeKernel:
	} // RGBColorCorrector_TonalRangeKernel

	.entry RGBColorCorrector_LumaKernel (
		.param .u64 __cudaparm_RGBColorCorrector_LumaKernel_inImage,
		.param .s32 __cudaparm_RGBColorCorrector_LumaKernel_inPitch,
		.param .u64 __cudaparm_RGBColorCorrector_LumaKernel_inSecondaryMask,
		.param .s32 __cudaparm_RGBColorCorrector_LumaKernel_inSecondaryPitch,
		.param .u32 __cudaparm_RGBColorCorrector_LumaKernel_inDeviceFormat,
		.param .s32 __cudaparm_RGBColorCorrector_LumaKernel_inWidth,
		.param .s32 __cudaparm_RGBColorCorrector_LumaKernel_inHeight,
		.param .align 4 .b8 __cudaparm_RGBColorCorrector_LumaKernel___val_paraminTonalRangeParameters[16],
		.param .align 4 .b8 __cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters[96],
		.param .align 4 .b8 __cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters[96])
	{
	.reg .u32 %r<35>;
	.reg .u64 %rd<17>;
	.reg .f32 %f<349>;
	.reg .pred %p<10>;
	.loc	23	314	0
$LDWbegin_RGBColorCorrector_LumaKernel:
	.loc	23	317	0
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_RGBColorCorrector_LumaKernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_RGBColorCorrector_LumaKernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_28_7426;
	ld.param.s32 	%r19, [__cudaparm_RGBColorCorrector_LumaKernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_RGBColorCorrector_LumaKernel_inPitch];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_RGBColorCorrector_LumaKernel_inImage];
	@!%p2 bra 	$Lt_28_8194;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	23	320	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_28_7938;
$Lt_28_8194:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_28_7938:
	ld.param.u64 	%rd7, [__cudaparm_RGBColorCorrector_LumaKernel_inSecondaryMask];
	mov.u64 	%rd8, 0;
	setp.eq.u64 	%p3, %rd7, %rd8;
	@%p3 bra 	$Lt_28_8706;
	.loc	23	321	0
	ld.param.s32 	%r28, [__cudaparm_RGBColorCorrector_LumaKernel_inSecondaryPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd9, %r30;
	mul.wide.s32 	%rd10, %r30, 4;
	add.u64 	%rd11, %rd7, %rd10;
	ld.global.f32 	%f5, [%rd11+0];
	bra.uni 	$Lt_28_8450;
$Lt_28_8706:
	mov.f32 	%f5, 0f3f800000;     	// 1
$Lt_28_8450:
	.loc	23	41	0
	mov.f32 	%f6, 0f00000000;     	// 0
	max.ftz.f32 	%f7, %f3, %f6;
	.loc	23	42	0
	mov.f32 	%f8, 0f00000000;     	// 0
	max.ftz.f32 	%f9, %f2, %f8;
	.loc	23	43	0
	mov.f32 	%f10, 0f00000000;    	// 0
	max.ftz.f32 	%f11, %f1, %f10;
	ld.const.f32 	%f12, [kRGB32f_To_601YPbPr+8];
	ld.const.f32 	%f13, [kRGB32f_To_601YPbPr+0];
	ld.const.f32 	%f14, [kRGB32f_To_601YPbPr+4];
	mov.f32 	%f15, 0f3a83126f;    	// 0.001
	setp.gt.ftz.f32 	%p4, %f5, %f15;
	@!%p4 bra 	$Lt_28_8962;
	mul.ftz.f32 	%f16, %f2, %f14;
	fma.rn.ftz.f32 	%f17, %f13, %f3, %f16;
	fma.rn.ftz.f32 	%f18, %f12, %f1, %f17;
	ld.param.f32 	%f19, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminTonalRangeParameters+0];
	setp.lt.ftz.f32 	%p5, %f18, %f19;
	@!%p5 bra 	$Lt_28_9730;
	.loc	23	58	0
	ld.param.f32 	%f20, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+48];
	rcp.approx.ftz.f32 	%f21, %f20;
	ld.param.f32 	%f22, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+52];
	ld.param.f32 	%f23, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+64];
	add.ftz.f32 	%f24, %f23, %f22;
	lg2.approx.ftz.f32 	%f25, %f7;
	mul.ftz.f32 	%f26, %f21, %f25;
	ex2.approx.ftz.f32 	%f27, %f26;
	lg2.approx.ftz.f32 	%f28, %f27;
	ld.param.f32 	%f29, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+60];
	rcp.approx.ftz.f32 	%f30, %f29;
	mul.ftz.f32 	%f31, %f28, %f30;
	ex2.approx.ftz.f32 	%f32, %f31;
	add.ftz.f32 	%f33, %f24, %f32;
	.loc	23	59	0
	ld.param.f32 	%f34, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+76];
	add.ftz.f32 	%f35, %f34, %f22;
	lg2.approx.ftz.f32 	%f36, %f9;
	mul.ftz.f32 	%f37, %f21, %f36;
	ex2.approx.ftz.f32 	%f38, %f37;
	lg2.approx.ftz.f32 	%f39, %f38;
	ld.param.f32 	%f40, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+72];
	rcp.approx.ftz.f32 	%f41, %f40;
	mul.ftz.f32 	%f42, %f39, %f41;
	ex2.approx.ftz.f32 	%f43, %f42;
	add.ftz.f32 	%f44, %f35, %f43;
	.loc	23	60	0
	ld.param.f32 	%f45, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+88];
	add.ftz.f32 	%f46, %f45, %f22;
	lg2.approx.ftz.f32 	%f47, %f11;
	mul.ftz.f32 	%f48, %f21, %f47;
	ex2.approx.ftz.f32 	%f49, %f48;
	lg2.approx.ftz.f32 	%f50, %f49;
	ld.param.f32 	%f51, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+84];
	rcp.approx.ftz.f32 	%f52, %f51;
	mul.ftz.f32 	%f53, %f50, %f52;
	ex2.approx.ftz.f32 	%f54, %f53;
	add.ftz.f32 	%f55, %f46, %f54;
	.loc	23	63	0
	ld.param.f32 	%f56, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+56];
	ld.param.f32 	%f57, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+68];
	mul.ftz.f32 	%f58, %f57, %f56;
	mul.ftz.f32 	%f7, %f33, %f58;
	.loc	23	64	0
	ld.param.f32 	%f59, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+80];
	mul.ftz.f32 	%f60, %f59, %f56;
	mul.ftz.f32 	%f9, %f44, %f60;
	.loc	23	65	0
	ld.param.f32 	%f61, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+92];
	mul.ftz.f32 	%f62, %f61, %f56;
	mul.ftz.f32 	%f11, %f55, %f62;
	bra.uni 	$Lt_28_9474;
$Lt_28_9730:
	ld.param.f32 	%f63, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminTonalRangeParameters+8];
	setp.gt.ftz.f32 	%p6, %f18, %f63;
	@!%p6 bra 	$Lt_28_10242;
	.loc	23	78	0
	ld.param.f32 	%f64, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+48];
	rcp.approx.ftz.f32 	%f65, %f64;
	ld.param.f32 	%f66, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+52];
	ld.param.f32 	%f67, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+64];
	add.ftz.f32 	%f68, %f67, %f66;
	lg2.approx.ftz.f32 	%f69, %f7;
	mul.ftz.f32 	%f70, %f65, %f69;
	ex2.approx.ftz.f32 	%f71, %f70;
	lg2.approx.ftz.f32 	%f72, %f71;
	ld.param.f32 	%f73, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+60];
	rcp.approx.ftz.f32 	%f74, %f73;
	mul.ftz.f32 	%f75, %f72, %f74;
	ex2.approx.ftz.f32 	%f76, %f75;
	add.ftz.f32 	%f77, %f68, %f76;
	.loc	23	79	0
	ld.param.f32 	%f78, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+76];
	add.ftz.f32 	%f79, %f78, %f66;
	lg2.approx.ftz.f32 	%f80, %f9;
	mul.ftz.f32 	%f81, %f65, %f80;
	ex2.approx.ftz.f32 	%f82, %f81;
	lg2.approx.ftz.f32 	%f83, %f82;
	ld.param.f32 	%f84, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+72];
	rcp.approx.ftz.f32 	%f85, %f84;
	mul.ftz.f32 	%f86, %f83, %f85;
	ex2.approx.ftz.f32 	%f87, %f86;
	add.ftz.f32 	%f88, %f79, %f87;
	.loc	23	80	0
	ld.param.f32 	%f89, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+88];
	add.ftz.f32 	%f90, %f89, %f66;
	lg2.approx.ftz.f32 	%f91, %f11;
	mul.ftz.f32 	%f92, %f65, %f91;
	ex2.approx.ftz.f32 	%f93, %f92;
	lg2.approx.ftz.f32 	%f94, %f93;
	ld.param.f32 	%f95, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+84];
	rcp.approx.ftz.f32 	%f96, %f95;
	mul.ftz.f32 	%f97, %f94, %f96;
	ex2.approx.ftz.f32 	%f98, %f97;
	add.ftz.f32 	%f99, %f90, %f98;
	.loc	23	83	0
	ld.param.f32 	%f100, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+56];
	ld.param.f32 	%f101, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+68];
	mul.ftz.f32 	%f102, %f101, %f100;
	mul.ftz.f32 	%f7, %f77, %f102;
	.loc	23	84	0
	ld.param.f32 	%f103, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+80];
	mul.ftz.f32 	%f104, %f103, %f100;
	mul.ftz.f32 	%f9, %f88, %f104;
	.loc	23	85	0
	ld.param.f32 	%f105, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+92];
	mul.ftz.f32 	%f106, %f105, %f100;
	mul.ftz.f32 	%f11, %f99, %f106;
	bra.uni 	$Lt_28_9986;
$Lt_28_10242:
	ld.param.f32 	%f107, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+44];
	ld.param.f32 	%f108, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+32];
	ld.param.f32 	%f109, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+20];
	ld.param.f32 	%f110, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+8];
	ld.param.f32 	%f111, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+36];
	ld.param.f32 	%f112, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+40];
	ld.param.f32 	%f113, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+24];
	ld.param.f32 	%f114, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+28];
	ld.param.f32 	%f115, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+12];
	ld.param.f32 	%f116, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+16];
	ld.param.f32 	%f117, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+4];
	ld.param.f32 	%f118, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+0];
	ld.param.f32 	%f119, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminTonalRangeParameters+4];
	add.ftz.f32 	%f120, %f119, %f19;
	setp.lt.ftz.f32 	%p7, %f18, %f120;
	@!%p7 bra 	$Lt_28_10754;
	.loc	28	523	0
	sub.ftz.f32 	%f121, %f18, %f19;
	div.approx.ftz.f32 	%f122, %f121, %f119;
	ld.param.f32 	%f123, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+48];
	mul.ftz.f32 	%f124, %f123, %f122;
	mov.f32 	%f125, 0f3f800000;   	// 1
	sub.ftz.f32 	%f126, %f125, %f122;
	fma.rn.ftz.f32 	%f127, %f118, %f126, %f124;
	rcp.approx.ftz.f32 	%f128, %f127;
	lg2.approx.ftz.f32 	%f129, %f7;
	mul.ftz.f32 	%f130, %f128, %f129;
	ex2.approx.ftz.f32 	%f131, %f130;
	lg2.approx.ftz.f32 	%f132, %f131;
	ld.param.f32 	%f133, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+60];
	mul.ftz.f32 	%f134, %f133, %f122;
	fma.rn.ftz.f32 	%f135, %f126, %f115, %f134;
	rcp.approx.ftz.f32 	%f136, %f135;
	mul.ftz.f32 	%f137, %f132, %f136;
	ex2.approx.ftz.f32 	%f138, %f137;
	lg2.approx.ftz.f32 	%f139, %f9;
	mul.ftz.f32 	%f140, %f128, %f139;
	ex2.approx.ftz.f32 	%f141, %f140;
	lg2.approx.ftz.f32 	%f142, %f141;
	ld.param.f32 	%f143, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+72];
	mul.ftz.f32 	%f144, %f143, %f122;
	fma.rn.ftz.f32 	%f145, %f126, %f113, %f144;
	rcp.approx.ftz.f32 	%f146, %f145;
	mul.ftz.f32 	%f147, %f142, %f146;
	ex2.approx.ftz.f32 	%f148, %f147;
	lg2.approx.ftz.f32 	%f149, %f11;
	mul.ftz.f32 	%f150, %f128, %f149;
	ex2.approx.ftz.f32 	%f151, %f150;
	lg2.approx.ftz.f32 	%f152, %f151;
	ld.param.f32 	%f153, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+84];
	mul.ftz.f32 	%f154, %f153, %f122;
	fma.rn.ftz.f32 	%f155, %f126, %f111, %f154;
	rcp.approx.ftz.f32 	%f156, %f155;
	mul.ftz.f32 	%f157, %f152, %f156;
	ex2.approx.ftz.f32 	%f158, %f157;
	.loc	23	121	0
	ld.param.f32 	%f159, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+52];
	mul.ftz.f32 	%f160, %f159, %f122;
	ld.param.f32 	%f161, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+56];
	mul.ftz.f32 	%f162, %f161, %f122;
	fma.rn.ftz.f32 	%f163, %f126, %f117, %f160;
	fma.rn.ftz.f32 	%f164, %f126, %f110, %f162;
	ld.param.f32 	%f165, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+64];
	mul.ftz.f32 	%f166, %f165, %f122;
	fma.rn.ftz.f32 	%f167, %f126, %f116, %f166;
	add.ftz.f32 	%f168, %f163, %f167;
	add.ftz.f32 	%f169, %f138, %f168;
	ld.param.f32 	%f170, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+68];
	mul.ftz.f32 	%f171, %f170, %f122;
	fma.rn.ftz.f32 	%f172, %f126, %f109, %f171;
	mul.ftz.f32 	%f173, %f164, %f172;
	mul.ftz.f32 	%f7, %f169, %f173;
	.loc	23	122	0
	ld.param.f32 	%f174, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+76];
	mul.ftz.f32 	%f175, %f174, %f122;
	fma.rn.ftz.f32 	%f176, %f126, %f114, %f175;
	add.ftz.f32 	%f177, %f163, %f176;
	add.ftz.f32 	%f178, %f148, %f177;
	ld.param.f32 	%f179, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+80];
	mul.ftz.f32 	%f180, %f179, %f122;
	fma.rn.ftz.f32 	%f181, %f126, %f108, %f180;
	mul.ftz.f32 	%f182, %f164, %f181;
	mul.ftz.f32 	%f9, %f178, %f182;
	.loc	23	123	0
	ld.param.f32 	%f183, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+88];
	mul.ftz.f32 	%f184, %f183, %f122;
	fma.rn.ftz.f32 	%f185, %f126, %f112, %f184;
	add.ftz.f32 	%f186, %f163, %f185;
	add.ftz.f32 	%f187, %f158, %f186;
	ld.param.f32 	%f188, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMidtoneShadowParameters+92];
	mul.ftz.f32 	%f189, %f188, %f122;
	fma.rn.ftz.f32 	%f190, %f126, %f107, %f189;
	mul.ftz.f32 	%f191, %f164, %f190;
	mul.ftz.f32 	%f11, %f187, %f191;
	bra.uni 	$Lt_28_10498;
$Lt_28_10754:
	ld.param.f32 	%f192, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminTonalRangeParameters+12];
	sub.ftz.f32 	%f193, %f63, %f192;
	setp.lt.ftz.f32 	%p8, %f193, %f18;
	@!%p8 bra 	$Lt_28_11266;
	.loc	28	523	0
	sub.ftz.f32 	%f194, %f18, %f193;
	div.approx.ftz.f32 	%f195, %f194, %f192;
	ld.param.f32 	%f196, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+48];
	mul.ftz.f32 	%f197, %f196, %f195;
	mov.f32 	%f198, 0f3f800000;   	// 1
	sub.ftz.f32 	%f199, %f198, %f195;
	fma.rn.ftz.f32 	%f200, %f118, %f199, %f197;
	rcp.approx.ftz.f32 	%f201, %f200;
	lg2.approx.ftz.f32 	%f202, %f7;
	mul.ftz.f32 	%f203, %f201, %f202;
	ex2.approx.ftz.f32 	%f204, %f203;
	lg2.approx.ftz.f32 	%f205, %f204;
	ld.param.f32 	%f206, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+60];
	mul.ftz.f32 	%f207, %f206, %f195;
	fma.rn.ftz.f32 	%f208, %f199, %f115, %f207;
	rcp.approx.ftz.f32 	%f209, %f208;
	mul.ftz.f32 	%f210, %f205, %f209;
	ex2.approx.ftz.f32 	%f211, %f210;
	lg2.approx.ftz.f32 	%f212, %f9;
	mul.ftz.f32 	%f213, %f201, %f212;
	ex2.approx.ftz.f32 	%f214, %f213;
	lg2.approx.ftz.f32 	%f215, %f214;
	ld.param.f32 	%f216, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+72];
	mul.ftz.f32 	%f217, %f216, %f195;
	fma.rn.ftz.f32 	%f218, %f199, %f113, %f217;
	rcp.approx.ftz.f32 	%f219, %f218;
	mul.ftz.f32 	%f220, %f215, %f219;
	ex2.approx.ftz.f32 	%f221, %f220;
	lg2.approx.ftz.f32 	%f222, %f11;
	mul.ftz.f32 	%f223, %f201, %f222;
	ex2.approx.ftz.f32 	%f224, %f223;
	lg2.approx.ftz.f32 	%f225, %f224;
	ld.param.f32 	%f226, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+84];
	mul.ftz.f32 	%f227, %f226, %f195;
	fma.rn.ftz.f32 	%f228, %f199, %f111, %f227;
	rcp.approx.ftz.f32 	%f229, %f228;
	mul.ftz.f32 	%f230, %f225, %f229;
	ex2.approx.ftz.f32 	%f231, %f230;
	.loc	23	159	0
	ld.param.f32 	%f232, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+52];
	mul.ftz.f32 	%f233, %f232, %f195;
	ld.param.f32 	%f234, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+56];
	mul.ftz.f32 	%f235, %f234, %f195;
	fma.rn.ftz.f32 	%f236, %f199, %f117, %f233;
	fma.rn.ftz.f32 	%f237, %f199, %f110, %f235;
	ld.param.f32 	%f238, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+64];
	mul.ftz.f32 	%f239, %f238, %f195;
	fma.rn.ftz.f32 	%f240, %f199, %f116, %f239;
	add.ftz.f32 	%f241, %f236, %f240;
	add.ftz.f32 	%f242, %f211, %f241;
	ld.param.f32 	%f243, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+68];
	mul.ftz.f32 	%f244, %f243, %f195;
	fma.rn.ftz.f32 	%f245, %f199, %f109, %f244;
	mul.ftz.f32 	%f246, %f237, %f245;
	mul.ftz.f32 	%f7, %f242, %f246;
	.loc	23	160	0
	ld.param.f32 	%f247, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+76];
	mul.ftz.f32 	%f248, %f247, %f195;
	fma.rn.ftz.f32 	%f249, %f199, %f114, %f248;
	add.ftz.f32 	%f250, %f236, %f249;
	add.ftz.f32 	%f251, %f221, %f250;
	ld.param.f32 	%f252, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+80];
	mul.ftz.f32 	%f253, %f252, %f195;
	fma.rn.ftz.f32 	%f254, %f199, %f108, %f253;
	mul.ftz.f32 	%f255, %f237, %f254;
	mul.ftz.f32 	%f9, %f251, %f255;
	.loc	23	161	0
	ld.param.f32 	%f256, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+88];
	mul.ftz.f32 	%f257, %f256, %f195;
	fma.rn.ftz.f32 	%f258, %f199, %f112, %f257;
	add.ftz.f32 	%f259, %f236, %f258;
	add.ftz.f32 	%f260, %f231, %f259;
	ld.param.f32 	%f261, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+92];
	mul.ftz.f32 	%f262, %f261, %f195;
	fma.rn.ftz.f32 	%f263, %f199, %f107, %f262;
	mul.ftz.f32 	%f264, %f237, %f263;
	mul.ftz.f32 	%f11, %f260, %f264;
	bra.uni 	$Lt_28_11010;
$Lt_28_11266:
	.loc	23	174	0
	rcp.approx.ftz.f32 	%f265, %f118;
	add.ftz.f32 	%f266, %f116, %f117;
	lg2.approx.ftz.f32 	%f267, %f7;
	mul.ftz.f32 	%f268, %f265, %f267;
	ex2.approx.ftz.f32 	%f269, %f268;
	lg2.approx.ftz.f32 	%f270, %f269;
	rcp.approx.ftz.f32 	%f271, %f115;
	mul.ftz.f32 	%f272, %f270, %f271;
	ex2.approx.ftz.f32 	%f273, %f272;
	add.ftz.f32 	%f274, %f266, %f273;
	.loc	23	175	0
	add.ftz.f32 	%f275, %f114, %f117;
	lg2.approx.ftz.f32 	%f276, %f9;
	mul.ftz.f32 	%f277, %f265, %f276;
	ex2.approx.ftz.f32 	%f278, %f277;
	lg2.approx.ftz.f32 	%f279, %f278;
	rcp.approx.ftz.f32 	%f280, %f113;
	mul.ftz.f32 	%f281, %f279, %f280;
	ex2.approx.ftz.f32 	%f282, %f281;
	add.ftz.f32 	%f283, %f275, %f282;
	.loc	23	176	0
	add.ftz.f32 	%f284, %f112, %f117;
	lg2.approx.ftz.f32 	%f285, %f11;
	mul.ftz.f32 	%f286, %f265, %f285;
	ex2.approx.ftz.f32 	%f287, %f286;
	lg2.approx.ftz.f32 	%f288, %f287;
	rcp.approx.ftz.f32 	%f289, %f111;
	mul.ftz.f32 	%f290, %f288, %f289;
	ex2.approx.ftz.f32 	%f291, %f290;
	add.ftz.f32 	%f292, %f284, %f291;
	.loc	23	179	0
	mul.ftz.f32 	%f293, %f109, %f110;
	mul.ftz.f32 	%f7, %f274, %f293;
	.loc	23	180	0
	mul.ftz.f32 	%f294, %f108, %f110;
	mul.ftz.f32 	%f9, %f283, %f294;
	.loc	23	181	0
	mul.ftz.f32 	%f295, %f107, %f110;
	mul.ftz.f32 	%f11, %f292, %f295;
$Lt_28_11010:
$Lt_28_10498:
$Lt_28_9986:
$Lt_28_9474:
	.loc	23	193	0
	ld.param.f32 	%f296, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+0];
	rcp.approx.ftz.f32 	%f297, %f296;
	ld.param.f32 	%f298, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+4];
	ld.param.f32 	%f299, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+16];
	add.ftz.f32 	%f300, %f299, %f298;
	lg2.approx.ftz.f32 	%f301, %f7;
	mul.ftz.f32 	%f302, %f297, %f301;
	ex2.approx.ftz.f32 	%f303, %f302;
	lg2.approx.ftz.f32 	%f304, %f303;
	ld.param.f32 	%f305, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+12];
	rcp.approx.ftz.f32 	%f306, %f305;
	mul.ftz.f32 	%f307, %f304, %f306;
	ex2.approx.ftz.f32 	%f308, %f307;
	add.ftz.f32 	%f309, %f300, %f308;
	.loc	23	194	0
	ld.param.f32 	%f310, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+28];
	add.ftz.f32 	%f311, %f310, %f298;
	lg2.approx.ftz.f32 	%f312, %f9;
	mul.ftz.f32 	%f313, %f297, %f312;
	ex2.approx.ftz.f32 	%f314, %f313;
	lg2.approx.ftz.f32 	%f315, %f314;
	ld.param.f32 	%f316, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+24];
	rcp.approx.ftz.f32 	%f317, %f316;
	mul.ftz.f32 	%f318, %f315, %f317;
	ex2.approx.ftz.f32 	%f319, %f318;
	add.ftz.f32 	%f320, %f311, %f319;
	.loc	23	195	0
	ld.param.f32 	%f321, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+40];
	add.ftz.f32 	%f322, %f321, %f298;
	lg2.approx.ftz.f32 	%f323, %f11;
	mul.ftz.f32 	%f324, %f297, %f323;
	ex2.approx.ftz.f32 	%f325, %f324;
	lg2.approx.ftz.f32 	%f326, %f325;
	ld.param.f32 	%f327, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+36];
	rcp.approx.ftz.f32 	%f328, %f327;
	mul.ftz.f32 	%f329, %f326, %f328;
	ex2.approx.ftz.f32 	%f330, %f329;
	add.ftz.f32 	%f331, %f322, %f330;
	.loc	23	198	0
	ld.param.f32 	%f332, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+8];
	ld.param.f32 	%f333, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+20];
	mul.ftz.f32 	%f334, %f333, %f332;
	mul.ftz.f32 	%f335, %f309, %f334;
	.loc	23	199	0
	ld.param.f32 	%f336, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+32];
	mul.ftz.f32 	%f337, %f336, %f332;
	mul.ftz.f32 	%f338, %f320, %f337;
	.loc	23	200	0
	ld.param.f32 	%f339, [__cudaparm_RGBColorCorrector_LumaKernel___val_paraminMasterHighlightParameters+44];
	mul.ftz.f32 	%f340, %f339, %f332;
	mul.ftz.f32 	%f341, %f331, %f340;
	.loc	23	203	0
	sub.ftz.f32 	%f342, %f335, %f3;
	fma.rn.ftz.f32 	%f7, %f5, %f342, %f3;
	.loc	23	204	0
	sub.ftz.f32 	%f343, %f338, %f2;
	fma.rn.ftz.f32 	%f9, %f5, %f343, %f2;
	.loc	23	205	0
	sub.ftz.f32 	%f344, %f341, %f1;
	fma.rn.ftz.f32 	%f11, %f5, %f344, %f1;
$Lt_28_8962:
	.loc	23	330	0
	mul.ftz.f32 	%f345, %f9, %f14;
	fma.rn.ftz.f32 	%f346, %f13, %f7, %f345;
	fma.rn.ftz.f32 	%f347, %f12, %f11, %f346;
	@!%p2 bra 	$Lt_28_11778;
	.loc	21	126	0
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f347;
	mov.b32		%r31, %b1; }
	mov.s32 	%r32, %r31;
	mul.lo.u64 	%rd12, %rd1, 8;
	add.u64 	%rd13, %rd2, %rd12;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r33, %b1; }
	st.global.v4.u16 	[%rd13+0], {%r32,%r32,%r32,%r33};
	.loc	23	334	0
	bra.uni 	$Lt_28_11522;
$Lt_28_11778:
	.loc	21	126	0
	mul.lo.u64 	%rd14, %rd1, 16;
	add.u64 	%rd15, %rd2, %rd14;
	st.global.v4.f32 	[%rd15+0], {%f347,%f347,%f347,%f4};
$Lt_28_11522:
$Lt_28_7426:
	.loc	23	336	0
	exit;
$LDWend_RGBColorCorrector_LumaKernel:
	} // RGBColorCorrector_LumaKernel

	.entry RGBColorCorrector_CompositeKernel (
		.param .u64 __cudaparm_RGBColorCorrector_CompositeKernel_inImage,
		.param .s32 __cudaparm_RGBColorCorrector_CompositeKernel_inPitch,
		.param .u64 __cudaparm_RGBColorCorrector_CompositeKernel_inSecondaryMask,
		.param .s32 __cudaparm_RGBColorCorrector_CompositeKernel_inSecondaryPitch,
		.param .u32 __cudaparm_RGBColorCorrector_CompositeKernel_inDeviceFormat,
		.param .s32 __cudaparm_RGBColorCorrector_CompositeKernel_inWidth,
		.param .s32 __cudaparm_RGBColorCorrector_CompositeKernel_inHeight,
		.param .align 4 .b8 __cudaparm_RGBColorCorrector_CompositeKernel___val_paraminTonalRangeParameters[16],
		.param .align 4 .b8 __cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters[96],
		.param .align 4 .b8 __cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters[96])
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<17>;
	.reg .f32 %f<346>;
	.reg .pred %p<10>;
	.loc	23	351	0
$LDWbegin_RGBColorCorrector_CompositeKernel:
	.loc	23	354	0
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_RGBColorCorrector_CompositeKernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_RGBColorCorrector_CompositeKernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_29_7426;
	ld.param.s32 	%r19, [__cudaparm_RGBColorCorrector_CompositeKernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_RGBColorCorrector_CompositeKernel_inPitch];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_RGBColorCorrector_CompositeKernel_inImage];
	@!%p2 bra 	$Lt_29_8194;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	23	357	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_29_7938;
$Lt_29_8194:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_29_7938:
	.loc	23	358	0
	ld.param.u64 	%rd7, [__cudaparm_RGBColorCorrector_CompositeKernel_inSecondaryMask];
	mov.u64 	%rd8, 0;
	setp.eq.u64 	%p3, %rd7, %rd8;
	@%p3 bra 	$Lt_29_8706;
	.loc	23	359	0
	ld.param.s32 	%r28, [__cudaparm_RGBColorCorrector_CompositeKernel_inSecondaryPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd9, %r30;
	mul.wide.s32 	%rd10, %r30, 4;
	add.u64 	%rd11, %rd7, %rd10;
	ld.global.f32 	%f5, [%rd11+0];
	bra.uni 	$Lt_29_8450;
$Lt_29_8706:
	mov.f32 	%f5, 0f3f800000;     	// 1
$Lt_29_8450:
	.loc	23	41	0
	mov.f32 	%f6, 0f00000000;     	// 0
	max.ftz.f32 	%f7, %f3, %f6;
	.loc	23	42	0
	mov.f32 	%f8, 0f00000000;     	// 0
	max.ftz.f32 	%f9, %f2, %f8;
	.loc	23	43	0
	mov.f32 	%f10, 0f00000000;    	// 0
	max.ftz.f32 	%f11, %f1, %f10;
	mov.f32 	%f12, 0f3a83126f;    	// 0.001
	setp.gt.ftz.f32 	%p4, %f5, %f12;
	@!%p4 bra 	$Lt_29_8962;
	ld.const.f32 	%f13, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f14, %f13, %f2;
	ld.const.f32 	%f15, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f16, %f15, %f3, %f14;
	ld.const.f32 	%f17, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f18, %f17, %f1, %f16;
	ld.param.f32 	%f19, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminTonalRangeParameters+0];
	setp.lt.ftz.f32 	%p5, %f18, %f19;
	@!%p5 bra 	$Lt_29_9730;
	.loc	23	58	0
	ld.param.f32 	%f20, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+48];
	rcp.approx.ftz.f32 	%f21, %f20;
	ld.param.f32 	%f22, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+52];
	ld.param.f32 	%f23, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+64];
	add.ftz.f32 	%f24, %f23, %f22;
	lg2.approx.ftz.f32 	%f25, %f7;
	mul.ftz.f32 	%f26, %f21, %f25;
	ex2.approx.ftz.f32 	%f27, %f26;
	lg2.approx.ftz.f32 	%f28, %f27;
	ld.param.f32 	%f29, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+60];
	rcp.approx.ftz.f32 	%f30, %f29;
	mul.ftz.f32 	%f31, %f28, %f30;
	ex2.approx.ftz.f32 	%f32, %f31;
	add.ftz.f32 	%f33, %f24, %f32;
	.loc	23	59	0
	ld.param.f32 	%f34, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+76];
	add.ftz.f32 	%f35, %f34, %f22;
	lg2.approx.ftz.f32 	%f36, %f9;
	mul.ftz.f32 	%f37, %f21, %f36;
	ex2.approx.ftz.f32 	%f38, %f37;
	lg2.approx.ftz.f32 	%f39, %f38;
	ld.param.f32 	%f40, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+72];
	rcp.approx.ftz.f32 	%f41, %f40;
	mul.ftz.f32 	%f42, %f39, %f41;
	ex2.approx.ftz.f32 	%f43, %f42;
	add.ftz.f32 	%f44, %f35, %f43;
	.loc	23	60	0
	ld.param.f32 	%f45, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+88];
	add.ftz.f32 	%f46, %f45, %f22;
	lg2.approx.ftz.f32 	%f47, %f11;
	mul.ftz.f32 	%f48, %f21, %f47;
	ex2.approx.ftz.f32 	%f49, %f48;
	lg2.approx.ftz.f32 	%f50, %f49;
	ld.param.f32 	%f51, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+84];
	rcp.approx.ftz.f32 	%f52, %f51;
	mul.ftz.f32 	%f53, %f50, %f52;
	ex2.approx.ftz.f32 	%f54, %f53;
	add.ftz.f32 	%f55, %f46, %f54;
	.loc	23	63	0
	ld.param.f32 	%f56, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+56];
	ld.param.f32 	%f57, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+68];
	mul.ftz.f32 	%f58, %f57, %f56;
	mul.ftz.f32 	%f7, %f33, %f58;
	.loc	23	64	0
	ld.param.f32 	%f59, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+80];
	mul.ftz.f32 	%f60, %f59, %f56;
	mul.ftz.f32 	%f9, %f44, %f60;
	.loc	23	65	0
	ld.param.f32 	%f61, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+92];
	mul.ftz.f32 	%f62, %f61, %f56;
	mul.ftz.f32 	%f11, %f55, %f62;
	bra.uni 	$Lt_29_9474;
$Lt_29_9730:
	ld.param.f32 	%f63, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminTonalRangeParameters+8];
	setp.gt.ftz.f32 	%p6, %f18, %f63;
	@!%p6 bra 	$Lt_29_10242;
	.loc	23	78	0
	ld.param.f32 	%f64, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+48];
	rcp.approx.ftz.f32 	%f65, %f64;
	ld.param.f32 	%f66, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+52];
	ld.param.f32 	%f67, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+64];
	add.ftz.f32 	%f68, %f67, %f66;
	lg2.approx.ftz.f32 	%f69, %f7;
	mul.ftz.f32 	%f70, %f65, %f69;
	ex2.approx.ftz.f32 	%f71, %f70;
	lg2.approx.ftz.f32 	%f72, %f71;
	ld.param.f32 	%f73, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+60];
	rcp.approx.ftz.f32 	%f74, %f73;
	mul.ftz.f32 	%f75, %f72, %f74;
	ex2.approx.ftz.f32 	%f76, %f75;
	add.ftz.f32 	%f77, %f68, %f76;
	.loc	23	79	0
	ld.param.f32 	%f78, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+76];
	add.ftz.f32 	%f79, %f78, %f66;
	lg2.approx.ftz.f32 	%f80, %f9;
	mul.ftz.f32 	%f81, %f65, %f80;
	ex2.approx.ftz.f32 	%f82, %f81;
	lg2.approx.ftz.f32 	%f83, %f82;
	ld.param.f32 	%f84, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+72];
	rcp.approx.ftz.f32 	%f85, %f84;
	mul.ftz.f32 	%f86, %f83, %f85;
	ex2.approx.ftz.f32 	%f87, %f86;
	add.ftz.f32 	%f88, %f79, %f87;
	.loc	23	80	0
	ld.param.f32 	%f89, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+88];
	add.ftz.f32 	%f90, %f89, %f66;
	lg2.approx.ftz.f32 	%f91, %f11;
	mul.ftz.f32 	%f92, %f65, %f91;
	ex2.approx.ftz.f32 	%f93, %f92;
	lg2.approx.ftz.f32 	%f94, %f93;
	ld.param.f32 	%f95, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+84];
	rcp.approx.ftz.f32 	%f96, %f95;
	mul.ftz.f32 	%f97, %f94, %f96;
	ex2.approx.ftz.f32 	%f98, %f97;
	add.ftz.f32 	%f99, %f90, %f98;
	.loc	23	83	0
	ld.param.f32 	%f100, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+56];
	ld.param.f32 	%f101, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+68];
	mul.ftz.f32 	%f102, %f101, %f100;
	mul.ftz.f32 	%f7, %f77, %f102;
	.loc	23	84	0
	ld.param.f32 	%f103, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+80];
	mul.ftz.f32 	%f104, %f103, %f100;
	mul.ftz.f32 	%f9, %f88, %f104;
	.loc	23	85	0
	ld.param.f32 	%f105, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+92];
	mul.ftz.f32 	%f106, %f105, %f100;
	mul.ftz.f32 	%f11, %f99, %f106;
	bra.uni 	$Lt_29_9986;
$Lt_29_10242:
	ld.param.f32 	%f107, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+44];
	ld.param.f32 	%f108, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+32];
	ld.param.f32 	%f109, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+20];
	ld.param.f32 	%f110, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+8];
	ld.param.f32 	%f111, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+36];
	ld.param.f32 	%f112, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+40];
	ld.param.f32 	%f113, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+24];
	ld.param.f32 	%f114, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+28];
	ld.param.f32 	%f115, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+12];
	ld.param.f32 	%f116, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+16];
	ld.param.f32 	%f117, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+4];
	ld.param.f32 	%f118, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+0];
	ld.param.f32 	%f119, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminTonalRangeParameters+4];
	add.ftz.f32 	%f120, %f119, %f19;
	setp.lt.ftz.f32 	%p7, %f18, %f120;
	@!%p7 bra 	$Lt_29_10754;
	.loc	28	523	0
	sub.ftz.f32 	%f121, %f18, %f19;
	div.approx.ftz.f32 	%f122, %f121, %f119;
	ld.param.f32 	%f123, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+48];
	mul.ftz.f32 	%f124, %f123, %f122;
	mov.f32 	%f125, 0f3f800000;   	// 1
	sub.ftz.f32 	%f126, %f125, %f122;
	fma.rn.ftz.f32 	%f127, %f118, %f126, %f124;
	rcp.approx.ftz.f32 	%f128, %f127;
	lg2.approx.ftz.f32 	%f129, %f7;
	mul.ftz.f32 	%f130, %f128, %f129;
	ex2.approx.ftz.f32 	%f131, %f130;
	lg2.approx.ftz.f32 	%f132, %f131;
	ld.param.f32 	%f133, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+60];
	mul.ftz.f32 	%f134, %f133, %f122;
	fma.rn.ftz.f32 	%f135, %f126, %f115, %f134;
	rcp.approx.ftz.f32 	%f136, %f135;
	mul.ftz.f32 	%f137, %f132, %f136;
	ex2.approx.ftz.f32 	%f138, %f137;
	lg2.approx.ftz.f32 	%f139, %f9;
	mul.ftz.f32 	%f140, %f128, %f139;
	ex2.approx.ftz.f32 	%f141, %f140;
	lg2.approx.ftz.f32 	%f142, %f141;
	ld.param.f32 	%f143, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+72];
	mul.ftz.f32 	%f144, %f143, %f122;
	fma.rn.ftz.f32 	%f145, %f126, %f113, %f144;
	rcp.approx.ftz.f32 	%f146, %f145;
	mul.ftz.f32 	%f147, %f142, %f146;
	ex2.approx.ftz.f32 	%f148, %f147;
	lg2.approx.ftz.f32 	%f149, %f11;
	mul.ftz.f32 	%f150, %f128, %f149;
	ex2.approx.ftz.f32 	%f151, %f150;
	lg2.approx.ftz.f32 	%f152, %f151;
	ld.param.f32 	%f153, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+84];
	mul.ftz.f32 	%f154, %f153, %f122;
	fma.rn.ftz.f32 	%f155, %f126, %f111, %f154;
	rcp.approx.ftz.f32 	%f156, %f155;
	mul.ftz.f32 	%f157, %f152, %f156;
	ex2.approx.ftz.f32 	%f158, %f157;
	.loc	23	121	0
	ld.param.f32 	%f159, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+52];
	mul.ftz.f32 	%f160, %f159, %f122;
	ld.param.f32 	%f161, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+56];
	mul.ftz.f32 	%f162, %f161, %f122;
	fma.rn.ftz.f32 	%f163, %f126, %f117, %f160;
	fma.rn.ftz.f32 	%f164, %f126, %f110, %f162;
	ld.param.f32 	%f165, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+64];
	mul.ftz.f32 	%f166, %f165, %f122;
	fma.rn.ftz.f32 	%f167, %f126, %f116, %f166;
	add.ftz.f32 	%f168, %f163, %f167;
	add.ftz.f32 	%f169, %f138, %f168;
	ld.param.f32 	%f170, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+68];
	mul.ftz.f32 	%f171, %f170, %f122;
	fma.rn.ftz.f32 	%f172, %f126, %f109, %f171;
	mul.ftz.f32 	%f173, %f164, %f172;
	mul.ftz.f32 	%f7, %f169, %f173;
	.loc	23	122	0
	ld.param.f32 	%f174, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+76];
	mul.ftz.f32 	%f175, %f174, %f122;
	fma.rn.ftz.f32 	%f176, %f126, %f114, %f175;
	add.ftz.f32 	%f177, %f163, %f176;
	add.ftz.f32 	%f178, %f148, %f177;
	ld.param.f32 	%f179, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+80];
	mul.ftz.f32 	%f180, %f179, %f122;
	fma.rn.ftz.f32 	%f181, %f126, %f108, %f180;
	mul.ftz.f32 	%f182, %f164, %f181;
	mul.ftz.f32 	%f9, %f178, %f182;
	.loc	23	123	0
	ld.param.f32 	%f183, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+88];
	mul.ftz.f32 	%f184, %f183, %f122;
	fma.rn.ftz.f32 	%f185, %f126, %f112, %f184;
	add.ftz.f32 	%f186, %f163, %f185;
	add.ftz.f32 	%f187, %f158, %f186;
	ld.param.f32 	%f188, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMidtoneShadowParameters+92];
	mul.ftz.f32 	%f189, %f188, %f122;
	fma.rn.ftz.f32 	%f190, %f126, %f107, %f189;
	mul.ftz.f32 	%f191, %f164, %f190;
	mul.ftz.f32 	%f11, %f187, %f191;
	bra.uni 	$Lt_29_10498;
$Lt_29_10754:
	ld.param.f32 	%f192, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminTonalRangeParameters+12];
	sub.ftz.f32 	%f193, %f63, %f192;
	setp.lt.ftz.f32 	%p8, %f193, %f18;
	@!%p8 bra 	$Lt_29_11266;
	.loc	28	523	0
	sub.ftz.f32 	%f194, %f18, %f193;
	div.approx.ftz.f32 	%f195, %f194, %f192;
	ld.param.f32 	%f196, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+48];
	mul.ftz.f32 	%f197, %f196, %f195;
	mov.f32 	%f198, 0f3f800000;   	// 1
	sub.ftz.f32 	%f199, %f198, %f195;
	fma.rn.ftz.f32 	%f200, %f118, %f199, %f197;
	rcp.approx.ftz.f32 	%f201, %f200;
	lg2.approx.ftz.f32 	%f202, %f7;
	mul.ftz.f32 	%f203, %f201, %f202;
	ex2.approx.ftz.f32 	%f204, %f203;
	lg2.approx.ftz.f32 	%f205, %f204;
	ld.param.f32 	%f206, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+60];
	mul.ftz.f32 	%f207, %f206, %f195;
	fma.rn.ftz.f32 	%f208, %f199, %f115, %f207;
	rcp.approx.ftz.f32 	%f209, %f208;
	mul.ftz.f32 	%f210, %f205, %f209;
	ex2.approx.ftz.f32 	%f211, %f210;
	lg2.approx.ftz.f32 	%f212, %f9;
	mul.ftz.f32 	%f213, %f201, %f212;
	ex2.approx.ftz.f32 	%f214, %f213;
	lg2.approx.ftz.f32 	%f215, %f214;
	ld.param.f32 	%f216, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+72];
	mul.ftz.f32 	%f217, %f216, %f195;
	fma.rn.ftz.f32 	%f218, %f199, %f113, %f217;
	rcp.approx.ftz.f32 	%f219, %f218;
	mul.ftz.f32 	%f220, %f215, %f219;
	ex2.approx.ftz.f32 	%f221, %f220;
	lg2.approx.ftz.f32 	%f222, %f11;
	mul.ftz.f32 	%f223, %f201, %f222;
	ex2.approx.ftz.f32 	%f224, %f223;
	lg2.approx.ftz.f32 	%f225, %f224;
	ld.param.f32 	%f226, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+84];
	mul.ftz.f32 	%f227, %f226, %f195;
	fma.rn.ftz.f32 	%f228, %f199, %f111, %f227;
	rcp.approx.ftz.f32 	%f229, %f228;
	mul.ftz.f32 	%f230, %f225, %f229;
	ex2.approx.ftz.f32 	%f231, %f230;
	.loc	23	159	0
	ld.param.f32 	%f232, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+52];
	mul.ftz.f32 	%f233, %f232, %f195;
	ld.param.f32 	%f234, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+56];
	mul.ftz.f32 	%f235, %f234, %f195;
	fma.rn.ftz.f32 	%f236, %f199, %f117, %f233;
	fma.rn.ftz.f32 	%f237, %f199, %f110, %f235;
	ld.param.f32 	%f238, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+64];
	mul.ftz.f32 	%f239, %f238, %f195;
	fma.rn.ftz.f32 	%f240, %f199, %f116, %f239;
	add.ftz.f32 	%f241, %f236, %f240;
	add.ftz.f32 	%f242, %f211, %f241;
	ld.param.f32 	%f243, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+68];
	mul.ftz.f32 	%f244, %f243, %f195;
	fma.rn.ftz.f32 	%f245, %f199, %f109, %f244;
	mul.ftz.f32 	%f246, %f237, %f245;
	mul.ftz.f32 	%f7, %f242, %f246;
	.loc	23	160	0
	ld.param.f32 	%f247, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+76];
	mul.ftz.f32 	%f248, %f247, %f195;
	fma.rn.ftz.f32 	%f249, %f199, %f114, %f248;
	add.ftz.f32 	%f250, %f236, %f249;
	add.ftz.f32 	%f251, %f221, %f250;
	ld.param.f32 	%f252, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+80];
	mul.ftz.f32 	%f253, %f252, %f195;
	fma.rn.ftz.f32 	%f254, %f199, %f108, %f253;
	mul.ftz.f32 	%f255, %f237, %f254;
	mul.ftz.f32 	%f9, %f251, %f255;
	.loc	23	161	0
	ld.param.f32 	%f256, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+88];
	mul.ftz.f32 	%f257, %f256, %f195;
	fma.rn.ftz.f32 	%f258, %f199, %f112, %f257;
	add.ftz.f32 	%f259, %f236, %f258;
	add.ftz.f32 	%f260, %f231, %f259;
	ld.param.f32 	%f261, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+92];
	mul.ftz.f32 	%f262, %f261, %f195;
	fma.rn.ftz.f32 	%f263, %f199, %f107, %f262;
	mul.ftz.f32 	%f264, %f237, %f263;
	mul.ftz.f32 	%f11, %f260, %f264;
	bra.uni 	$Lt_29_11010;
$Lt_29_11266:
	.loc	23	174	0
	rcp.approx.ftz.f32 	%f265, %f118;
	add.ftz.f32 	%f266, %f116, %f117;
	lg2.approx.ftz.f32 	%f267, %f7;
	mul.ftz.f32 	%f268, %f265, %f267;
	ex2.approx.ftz.f32 	%f269, %f268;
	lg2.approx.ftz.f32 	%f270, %f269;
	rcp.approx.ftz.f32 	%f271, %f115;
	mul.ftz.f32 	%f272, %f270, %f271;
	ex2.approx.ftz.f32 	%f273, %f272;
	add.ftz.f32 	%f274, %f266, %f273;
	.loc	23	175	0
	add.ftz.f32 	%f275, %f114, %f117;
	lg2.approx.ftz.f32 	%f276, %f9;
	mul.ftz.f32 	%f277, %f265, %f276;
	ex2.approx.ftz.f32 	%f278, %f277;
	lg2.approx.ftz.f32 	%f279, %f278;
	rcp.approx.ftz.f32 	%f280, %f113;
	mul.ftz.f32 	%f281, %f279, %f280;
	ex2.approx.ftz.f32 	%f282, %f281;
	add.ftz.f32 	%f283, %f275, %f282;
	.loc	23	176	0
	add.ftz.f32 	%f284, %f112, %f117;
	lg2.approx.ftz.f32 	%f285, %f11;
	mul.ftz.f32 	%f286, %f265, %f285;
	ex2.approx.ftz.f32 	%f287, %f286;
	lg2.approx.ftz.f32 	%f288, %f287;
	rcp.approx.ftz.f32 	%f289, %f111;
	mul.ftz.f32 	%f290, %f288, %f289;
	ex2.approx.ftz.f32 	%f291, %f290;
	add.ftz.f32 	%f292, %f284, %f291;
	.loc	23	179	0
	mul.ftz.f32 	%f293, %f109, %f110;
	mul.ftz.f32 	%f7, %f274, %f293;
	.loc	23	180	0
	mul.ftz.f32 	%f294, %f108, %f110;
	mul.ftz.f32 	%f9, %f283, %f294;
	.loc	23	181	0
	mul.ftz.f32 	%f295, %f107, %f110;
	mul.ftz.f32 	%f11, %f292, %f295;
$Lt_29_11010:
$Lt_29_10498:
$Lt_29_9986:
$Lt_29_9474:
	.loc	23	193	0
	ld.param.f32 	%f296, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+0];
	rcp.approx.ftz.f32 	%f297, %f296;
	ld.param.f32 	%f298, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+4];
	ld.param.f32 	%f299, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+16];
	add.ftz.f32 	%f300, %f299, %f298;
	lg2.approx.ftz.f32 	%f301, %f7;
	mul.ftz.f32 	%f302, %f297, %f301;
	ex2.approx.ftz.f32 	%f303, %f302;
	lg2.approx.ftz.f32 	%f304, %f303;
	ld.param.f32 	%f305, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+12];
	rcp.approx.ftz.f32 	%f306, %f305;
	mul.ftz.f32 	%f307, %f304, %f306;
	ex2.approx.ftz.f32 	%f308, %f307;
	add.ftz.f32 	%f309, %f300, %f308;
	.loc	23	194	0
	ld.param.f32 	%f310, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+28];
	add.ftz.f32 	%f311, %f310, %f298;
	lg2.approx.ftz.f32 	%f312, %f9;
	mul.ftz.f32 	%f313, %f297, %f312;
	ex2.approx.ftz.f32 	%f314, %f313;
	lg2.approx.ftz.f32 	%f315, %f314;
	ld.param.f32 	%f316, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+24];
	rcp.approx.ftz.f32 	%f317, %f316;
	mul.ftz.f32 	%f318, %f315, %f317;
	ex2.approx.ftz.f32 	%f319, %f318;
	add.ftz.f32 	%f320, %f311, %f319;
	.loc	23	195	0
	ld.param.f32 	%f321, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+40];
	add.ftz.f32 	%f322, %f321, %f298;
	lg2.approx.ftz.f32 	%f323, %f11;
	mul.ftz.f32 	%f324, %f297, %f323;
	ex2.approx.ftz.f32 	%f325, %f324;
	lg2.approx.ftz.f32 	%f326, %f325;
	ld.param.f32 	%f327, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+36];
	rcp.approx.ftz.f32 	%f328, %f327;
	mul.ftz.f32 	%f329, %f326, %f328;
	ex2.approx.ftz.f32 	%f330, %f329;
	add.ftz.f32 	%f331, %f322, %f330;
	.loc	23	198	0
	ld.param.f32 	%f332, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+8];
	ld.param.f32 	%f333, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+20];
	mul.ftz.f32 	%f334, %f333, %f332;
	mul.ftz.f32 	%f335, %f309, %f334;
	.loc	23	199	0
	ld.param.f32 	%f336, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+32];
	mul.ftz.f32 	%f337, %f336, %f332;
	mul.ftz.f32 	%f338, %f320, %f337;
	.loc	23	200	0
	ld.param.f32 	%f339, [__cudaparm_RGBColorCorrector_CompositeKernel___val_paraminMasterHighlightParameters+44];
	mul.ftz.f32 	%f340, %f339, %f332;
	mul.ftz.f32 	%f341, %f331, %f340;
	.loc	23	203	0
	sub.ftz.f32 	%f342, %f335, %f3;
	fma.rn.ftz.f32 	%f7, %f5, %f342, %f3;
	.loc	23	204	0
	sub.ftz.f32 	%f343, %f338, %f2;
	fma.rn.ftz.f32 	%f9, %f5, %f343, %f2;
	.loc	23	205	0
	sub.ftz.f32 	%f344, %f341, %f1;
	fma.rn.ftz.f32 	%f11, %f5, %f344, %f1;
$Lt_29_8962:
	.loc	23	361	0
	@!%p2 bra 	$Lt_29_11778;
	.loc	21	126	0
	mul.lo.u64 	%rd12, %rd1, 8;
	add.u64 	%rd13, %rd2, %rd12;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r31, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f9;
	mov.b32		%r32, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f7;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r34, %b1; }
	st.global.v4.u16 	[%rd13+0], {%r31,%r32,%r33,%r34};
	.loc	23	361	0
	bra.uni 	$Lt_29_11522;
$Lt_29_11778:
	.loc	21	126	0
	mul.lo.u64 	%rd14, %rd1, 16;
	add.u64 	%rd15, %rd2, %rd14;
	st.global.v4.f32 	[%rd15+0], {%f11,%f9,%f7,%f4};
$Lt_29_11522:
$Lt_29_7426:
	.loc	23	374	0
	exit;
$LDWend_RGBColorCorrector_CompositeKernel:
	} // RGBColorCorrector_CompositeKernel
	.const .align 4 .b8 k601YPbPr_To_RGB32f[36] = {0,0,128,63,0,0,0,0,188,116,179,63,0,0,128,63,152,50,176,190,158,209,54,191,0,0,128,63,229,208,226,63,0,0,0,0};
	.const .align 4 .b8 kRGB32f_To_601YCbCr[36] = {70,246,130,66,145,141,0,67,94,186,199,65,33,48,23,194,240,103,148,194,0,0,224,66,0,0,224,66,111,146,187,194,70,182,145,193};
	.const .align 4 .b8 k601YCbCr_To_RGB32f[36] = {37,160,149,59,0,0,0,0,182,23,205,59,37,160,149,59,40,15,201,186,156,239,80,187,37,160,149,59,236,155,1,60,0,0,0,0};
	.const .align 4 .b8 kRGB8u_To_601YCbCr[36] = {219,121,131,62,152,14,1,63,18,131,200,61,174,199,23,190,238,252,148,190,197,224,224,62,197,224,224,62,217,78,188,190,174,71,146,189};
	.const .align 4 .b8 k601YCbCr_To_RGB8u[36] = {127,10,149,63,0,0,0,0,160,74,204,63,127,10,149,63,254,148,200,190,184,30,80,191,127,10,149,63,78,26,1,64,0,0,0,0};
	.const .align 4 .b8 kRGB8u_To_601YCbCrFullRange[36] = {135,22,153,62,162,69,22,63,213,120,233,61,166,27,44,190,39,241,168,190,250,254,254,62,250,254,254,62,43,135,213,190,59,223,165,189};
	.const .align 4 .b8 k601YCbCrFullRange_To_RGB8u[36] = {0,0,128,63,0,0,0,0,72,193,178,63,0,0,128,63,143,130,175,190,225,26,54,191,0,0,128,63,20,238,225,63,0,0,0,0};
	.const .align 4 .b8 kRGB32f_To_601YCbCrFullRange[36] = {113,125,152,66,92,175,21,67,92,143,232,65,158,111,43,194,49,72,168,194,0,0,254,66,0,0,254,66,170,177,212,194,88,57,165,193};
	.const .align 4 .b8 k601YCbCrFullRange_To_RGB32f[36] = {129,128,128,59,0,0,0,0,188,116,179,59,129,128,128,59,194,50,176,186,179,209,54,187,129,128,128,59,229,208,226,59,0,0,0,0};
	.const .align 4 .b8 kRGB32f_To_709YPbPr[36] = {208,179,89,62,89,23,55,63,152,221,147,61,186,164,234,189,210,86,197,190,0,0,0,63,0,0,0,63,190,134,232,190,16,202,59,189};
	.const .align 4 .b8 k709YPbPr_To_RGB32f[36] = {0,0,128,63,0,0,0,0,12,147,201,63,0,0,128,63,221,209,63,190,243,173,239,190,0,0,128,63,77,132,237,63,0,0,0,0};
	.const .align 4 .b8 kRGB32f_To_709YCbCr[36] = {106,60,58,66,6,161,28,67,244,253,124,65,223,79,205,193,8,172,172,194,0,0,224,66,0,0,224,66,195,117,203,194,236,81,36,193};
	.const .align 4 .b8 k709YCbCr_To_RGB32f[36] = {37,160,149,59,0,0,0,0,239,94,230,59,37,160,149,59,33,57,91,186,178,245,8,187,37,160,149,59,82,185,7,60,0,0,0,0};
	.const .align 4 .b8 kRGB8u_To_709YCbCr[36] = {207,247,58,62,53,62,29,63,231,251,125,61,147,24,206,61,23,89,173,190,197,224,224,62,197,224,224,62,12,66,204,190,195,245,36,189};
	.const .align 4 .b8 k709YCbCr_To_RGB8u[36] = {127,10,149,63,0,0,0,0,147,120,229,63,127,10,149,63,53,94,90,190,205,108,8,191,127,10,149,63,154,49,7,64,0,0,0,0};
	.const .align 4 .b8 k709YCbCr_To_601YCbCr[36] = {0,0,128,63,23,100,203,61,1,77,68,62,0,0,0,0,18,103,125,63,10,158,226,189,0,0,0,0,61,98,148,189,249,191,123,63};
	.const .align 4 .b8 k601YCbCr_To_709YCbCr[36] = {0,0,128,63,122,165,236,189,179,237,84,190,0,0,0,0,204,98,130,63,216,188,234,61,0,0,0,0,74,179,153,61,234,61,131,63};
	.const .align 4 .b8 kYCbCrOffset[12] = {0,0,128,65,0,0,0,67,0,0,0,67};
	.const .align 4 .b8 kYCbCrFullRangeOffset[12] = {0,0,0,0,0,0,0,67,0,0,0,67};
	.const .align 4 .b8 kRGB32f_To_YIQ[36] = {135,22,153,62,162,69,22,63,213,120,233,61,216,128,24,63,27,133,140,190,149,124,164,190,236,135,88,62,134,200,5,191,22,77,159,62};
	.const .align 4 .b8 kYIQ_To_RGB32f[36] = {0,0,128,63,20,208,116,63,219,249,30,63,0,0,128,63,177,80,139,190,2,188,37,191,0,0,128,63,45,178,141,191,85,48,218,63};

