	.version 2.2
	.target sm_20
	// compiled with ../../../External/3rdParty/NVIDIA/CUDA/win/bin/../open64/lib//be.exe
	// nvopencc 3.2 built on 2010-11-04

	.visible .func (.param .s32 __cudaretf__Z15IntegerMultiplyii) _Z15IntegerMultiplyii (.param .s32 __cudaparmf1__Z15IntegerMultiplyii, .param .s32 __cudaparmf2__Z15IntegerMultiplyii)

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelXv) _Z17Standard2DKernelXv ()

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelYv) _Z17Standard2DKernelYv ()

	.visible .func (.param .align 16 .b8 __cudaretf__Z13Half4ToFloat47ushort4[16]) _Z13Half4ToFloat47ushort4 (.param .align 8 .b8 __cudaparmf1__Z13Half4ToFloat47ushort4[8])

	.visible .func (.param .align 8 .b8 __cudaretf__Z13Float4ToHalf46float4[8]) _Z13Float4ToHalf46float4 (.param .align 16 .b8 __cudaparmf1__Z13Float4ToHalf46float4[16])

	.visible .func (.param .u32 __cudaretf__Z4Mix3RjS_S_) _Z4Mix3RjS_S_ (.param .u64 __cudaparmf1__Z4Mix3RjS_S_, .param .u64 __cudaparmf2__Z4Mix3RjS_S_, .param .u64 __cudaparmf3__Z4Mix3RjS_S_)

	.visible .func (.param .s32 __cudaretf__Z4Randj) _Z4Randj (.param .u32 __cudaparmf1__Z4Randj)

	.visible .func (.param .s32 __cudaretf__Z6Rand2Djjj) _Z6Rand2Djjj (.param .u32 __cudaparmf1__Z6Rand2Djjj, .param .u32 __cudaparmf2__Z6Rand2Djjj, .param .u32 __cudaparmf3__Z6Rand2Djjj)

	.visible .func (.param .s32 __cudaretf__Z6Rand2Dj) _Z6Rand2Dj (.param .u32 __cudaparmf1__Z6Rand2Dj)

	.visible .func _Z7Write2DI7ushort4EvT_PS1_iii (.param .align 8 .b8 __cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii[8], .param .u64 __cudaparmf2__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI7ushort4EvT_PS1_iii)

	.visible .func _Z7Write2DI6float4EvT_PS1_iii (.param .align 16 .b8 __cudaparmf1__Z7Write2DI6float4EvT_PS1_iii[16], .param .u64 __cudaparmf2__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI6float4EvT_PS1_iii)

	.visible .func (.param .align 16 .b8 __cudaretf__Z18UnpremultiplyPixel8PixelRGB[16]) _Z18UnpremultiplyPixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z18UnpremultiplyPixel8PixelRGB[16])

	.visible .func (.param .f32 __cudaretf__Z13ToLinearColorf) _Z13ToLinearColorf (.param .f32 __cudaparmf1__Z13ToLinearColorf)

	.visible .func (.param .f32 __cudaretf__Z15FromLinearColorf) _Z15FromLinearColorf (.param .f32 __cudaparmf1__Z15FromLinearColorf)

	.visible .func (.param .align 16 .b8 __cudaretf__Z25PremultiplyLinearizePixel8PixelRGB[16]) _Z25PremultiplyLinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16]) _Z29UnpremultiplyUnlinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z20PremultiplyLinearize6float4[16]) _Z20PremultiplyLinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z20PremultiplyLinearize6float4[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z24UnpremultiplyUnlinearize6float4[16]) _Z24UnpremultiplyUnlinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z24UnpremultiplyUnlinearize6float4[16])

	.visible .func _Z23computeInverseTransformRK4QuadR10Homography (.param .u64 __cudaparmf1__Z23computeInverseTransformRK4QuadR10Homography, .param .u64 __cudaparmf2__Z23computeInverseTransformRK4QuadR10Homography)

	.visible .func _ZNK4Quad11boundingBoxERfS0_S0_S0_ (.param .u64 __cudaparmf1__ZNK4Quad11boundingBoxERfS0_S0_S0_, .param .u64 __cudaparmf2__ZNK4Quad11boundingBoxERfS0_S0_S0_, .param .u64 __cudaparmf3__ZNK4Quad11boundingBoxERfS0_S0_S0_, .param .u64 __cudaparmf4__ZNK4Quad11boundingBoxERfS0_S0_S0_, .param .u64 __cudaparmf5__ZNK4Quad11boundingBoxERfS0_S0_S0_)

	.visible .func (.param .s32 __cudaretf__ZNK4Quad13containsPointERK6float2) _ZNK4Quad13containsPointERK6float2 (.param .u64 __cudaparmf1__ZNK4Quad13containsPointERK6float2, .param .u64 __cudaparmf2__ZNK4Quad13containsPointERK6float2)

	.visible .func (.param .s32 __cudaretf__ZNK4Quad18countIntersectionsERK6float2S2_S2_) _ZNK4Quad18countIntersectionsERK6float2S2_S2_ (.param .u64 __cudaparmf1__ZNK4Quad18countIntersectionsERK6float2S2_S2_, .param .u64 __cudaparmf2__ZNK4Quad18countIntersectionsERK6float2S2_S2_, .param .u64 __cudaparmf3__ZNK4Quad18countIntersectionsERK6float2S2_S2_, .param .u64 __cudaparmf4__ZNK4Quad18countIntersectionsERK6float2S2_S2_)

	//-----------------------------------------------------------
	// Compiling C:/Users/dvaeng/AppData/Local/Temp/tmpxft_00003eac_00000000-11_StabilizerWarp.cpp3.i (C:/Users/dvaeng/AppData/Local/Temp/ccBI#.a12756)
	//-----------------------------------------------------------

	//-----------------------------------------------------------
	// Options:
	//-----------------------------------------------------------
	//  Target:ptx, ISA:sm_20, Endian:little, Pointer Size:64
	//  -O3	(Optimization level)
	//  -g0	(Debug level)
	//  -m2	(Report advisories)
	//-----------------------------------------------------------

	.file	1	"C:/Users/dvaeng/AppData/Local/Temp/tmpxft_00003eac_00000000-10_StabilizerWarp.cudafe2.gpu"
	.file	2	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/PixelFormat.h"
	.file	3	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/PixelRGB.h"
	.file	4	"c:/Mulder64/shared/adobe/MediaCore/Display/Src/CUDA/Effects/StabilizerWarp.cu"
	.file	5	"C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\include\crtdefs.h"
	.file	6	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\crt/device_runtime.h"
	.file	7	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\host_defines.h"
	.file	8	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\builtin_types.h"
	.file	9	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\device_types.h"
	.file	10	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\driver_types.h"
	.file	11	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\surface_types.h"
	.file	12	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\texture_types.h"
	.file	13	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\vector_types.h"
	.file	14	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\builtin_types.h"
	.file	15	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\host_defines.h"
	.file	16	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\device_launch_parameters.h"
	.file	17	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\crt\storage_class.h"
	.file	18	"C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\include\time.h"
	.file	19	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/Utils.h"
	.file	20	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\common_functions.h"
	.file	21	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_functions.h"
	.file	22	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_constants.h"
	.file	23	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\device_functions.h"
	.file	24	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_11_atomic_functions.h"
	.file	25	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_12_atomic_functions.h"
	.file	26	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_13_double_functions.h"
	.file	27	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_20_atomic_functions.h"
	.file	28	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_20_intrinsics.h"
	.file	29	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\surface_functions.h"
	.file	30	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\texture_fetch_functions.h"
	.file	31	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_functions_dbl_ptx3.h"


	.visible .func (.param .s32 __cudaretf__Z15IntegerMultiplyii) _Z15IntegerMultiplyii (.param .s32 __cudaparmf1__Z15IntegerMultiplyii, .param .s32 __cudaparmf2__Z15IntegerMultiplyii)
	{
	.reg .u32 %r<7>;
	.loc	19	60	0
$LDWbegin__Z15IntegerMultiplyii:
	ld.param.u32 	%r1, [__cudaparmf1__Z15IntegerMultiplyii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf2__Z15IntegerMultiplyii];
	mov.s32 	%r4, %r3;
	.loc	19	64	0
	mul.lo.s32 	%r5, %r2, %r4;
	st.param.s32 	[__cudaretf__Z15IntegerMultiplyii], %r5;
	ret;
$LDWend__Z15IntegerMultiplyii:
	} // _Z15IntegerMultiplyii

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelXv) _Z17Standard2DKernelXv ()
	{
	.reg .u32 %r<7>;
	.loc	19	73	0
$LDWbegin__Z17Standard2DKernelXv:
	.loc	19	74	0
	mov.u32 	%r1, %tid.x;
	cvt.s32.u32 	%r2, %ctaid.x;
	cvt.s32.u32 	%r3, %ntid.x;
	mul.lo.s32 	%r4, %r2, %r3;
	add.u32 	%r5, %r1, %r4;
	st.param.s32 	[__cudaretf__Z17Standard2DKernelXv], %r5;
	ret;
$LDWend__Z17Standard2DKernelXv:
	} // _Z17Standard2DKernelXv

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelYv) _Z17Standard2DKernelYv ()
	{
	.reg .u32 %r<7>;
	.loc	19	77	0
$LDWbegin__Z17Standard2DKernelYv:
	.loc	19	78	0
	mov.u32 	%r1, %tid.y;
	cvt.s32.u32 	%r2, %ctaid.y;
	cvt.s32.u32 	%r3, %ntid.y;
	mul.lo.s32 	%r4, %r2, %r3;
	add.u32 	%r5, %r1, %r4;
	st.param.s32 	[__cudaretf__Z17Standard2DKernelYv], %r5;
	ret;
$LDWend__Z17Standard2DKernelYv:
	} // _Z17Standard2DKernelYv

	.visible .func (.param .align 16 .b8 __cudaretf__Z13Half4ToFloat47ushort4[16]) _Z13Half4ToFloat47ushort4 (.param .align 8 .b8 __cudaparmf1__Z13Half4ToFloat47ushort4[8])
	{
	.reg .u32 %r<14>;
	.reg .f32 %f<9>;
	.loc	19	86	0
$LDWbegin__Z13Half4ToFloat47ushort4:
	ld.param.u16 	%r1, [__cudaparmf1__Z13Half4ToFloat47ushort4+0];
	mov.s32 	%r2, %r1;
	ld.param.u16 	%r3, [__cudaparmf1__Z13Half4ToFloat47ushort4+2];
	mov.s32 	%r4, %r3;
	ld.param.u16 	%r5, [__cudaparmf1__Z13Half4ToFloat47ushort4+4];
	mov.s32 	%r6, %r5;
	ld.param.u16 	%r7, [__cudaparmf1__Z13Half4ToFloat47ushort4+6];
	mov.s32 	%r8, %r7;
	.loc	19	87	0
	cvt.u16.u32 	%r9, %r4;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r9;
	cvt.ftz.f32.f16	%f1, %b1; }
	cvt.u16.u32 	%r10, %r6;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r10;
	cvt.ftz.f32.f16	%f2, %b1; }
	cvt.u16.u32 	%r11, %r8;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r11;
	cvt.ftz.f32.f16	%f3, %b1; }
	cvt.u16.u32 	%r12, %r2;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r12;
	cvt.ftz.f32.f16	%f4, %b1; }
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+0], %f4;
	mov.f32 	%f5, %f1;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+4], %f5;
	mov.f32 	%f6, %f2;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+8], %f6;
	mov.f32 	%f7, %f3;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+12], %f7;
	ret;
$LDWend__Z13Half4ToFloat47ushort4:
	} // _Z13Half4ToFloat47ushort4

	.visible .func (.param .align 8 .b8 __cudaretf__Z13Float4ToHalf46float4[8]) _Z13Float4ToHalf46float4 (.param .align 16 .b8 __cudaparmf1__Z13Float4ToHalf46float4[16])
	{
	.reg .u32 %r<13>;
	.reg .f32 %f<10>;
	.loc	19	95	0
$LDWbegin__Z13Float4ToHalf46float4:
	ld.param.f32 	%f1, [__cudaparmf1__Z13Float4ToHalf46float4+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z13Float4ToHalf46float4+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z13Float4ToHalf46float4+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z13Float4ToHalf46float4+12];
	mov.f32 	%f8, %f7;
	.loc	19	96	0
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r1, %b1; }
	cvt.u16.u32 	%r2, %r1;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r3, %b1; }
	cvt.u16.u32 	%r4, %r3;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f8;
	mov.b32		%r5, %b1; }
	cvt.u16.u32 	%r6, %r5;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f2;
	mov.b32		%r7, %b1; }
	cvt.u16.u32 	%r8, %r7;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+0], %r8;
	mov.s32 	%r9, %r2;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+2], %r9;
	mov.s32 	%r10, %r4;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+4], %r10;
	mov.s32 	%r11, %r6;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+6], %r11;
	ret;
$LDWend__Z13Float4ToHalf46float4:
	} // _Z13Float4ToHalf46float4

	.visible .func (.param .u32 __cudaretf__Z4Mix3RjS_S_) _Z4Mix3RjS_S_ (.param .u64 __cudaparmf1__Z4Mix3RjS_S_, .param .u64 __cudaparmf2__Z4Mix3RjS_S_, .param .u64 __cudaparmf3__Z4Mix3RjS_S_)
	{
	.reg .u32 %r<75>;
	.reg .u64 %rd<8>;
	.loc	19	138	0
$LDWbegin__Z4Mix3RjS_S_:
	ld.param.u64 	%rd1, [__cudaparmf1__Z4Mix3RjS_S_];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf2__Z4Mix3RjS_S_];
	mov.s64 	%rd4, %rd3;
	ld.param.u64 	%rd5, [__cudaparmf3__Z4Mix3RjS_S_];
	mov.s64 	%rd6, %rd5;
	.loc	19	139	0
	ld.u32 	%r1, [%rd2+0];
	ld.u32 	%r2, [%rd4+0];
	sub.u32 	%r3, %r1, %r2;
	st.u32 	[%rd2+0], %r3;
	ld.u32 	%r4, [%rd6+0];
	sub.u32 	%r5, %r3, %r4;
	st.u32 	[%rd2+0], %r5;
	ld.u32 	%r6, [%rd6+0];
	shr.u32 	%r7, %r6, 13;
	xor.b32 	%r8, %r5, %r7;
	st.u32 	[%rd2+0], %r8;
	.loc	19	140	0
	ld.u32 	%r9, [%rd4+0];
	ld.u32 	%r10, [%rd6+0];
	sub.u32 	%r11, %r9, %r10;
	st.u32 	[%rd4+0], %r11;
	ld.u32 	%r12, [%rd2+0];
	sub.u32 	%r13, %r11, %r12;
	st.u32 	[%rd4+0], %r13;
	ld.u32 	%r14, [%rd2+0];
	shl.b32 	%r15, %r14, 8;
	xor.b32 	%r16, %r13, %r15;
	st.u32 	[%rd4+0], %r16;
	.loc	19	141	0
	ld.u32 	%r17, [%rd6+0];
	ld.u32 	%r18, [%rd2+0];
	sub.u32 	%r19, %r17, %r18;
	st.u32 	[%rd6+0], %r19;
	ld.u32 	%r20, [%rd4+0];
	sub.u32 	%r21, %r19, %r20;
	st.u32 	[%rd6+0], %r21;
	ld.u32 	%r22, [%rd4+0];
	shr.u32 	%r23, %r22, 13;
	xor.b32 	%r24, %r21, %r23;
	st.u32 	[%rd6+0], %r24;
	.loc	19	142	0
	ld.u32 	%r25, [%rd2+0];
	ld.u32 	%r26, [%rd4+0];
	sub.u32 	%r27, %r25, %r26;
	st.u32 	[%rd2+0], %r27;
	ld.u32 	%r28, [%rd6+0];
	sub.u32 	%r29, %r27, %r28;
	st.u32 	[%rd2+0], %r29;
	ld.u32 	%r30, [%rd6+0];
	shr.u32 	%r31, %r30, 12;
	xor.b32 	%r32, %r29, %r31;
	st.u32 	[%rd2+0], %r32;
	.loc	19	143	0
	ld.u32 	%r33, [%rd4+0];
	ld.u32 	%r34, [%rd6+0];
	sub.u32 	%r35, %r33, %r34;
	st.u32 	[%rd4+0], %r35;
	ld.u32 	%r36, [%rd2+0];
	sub.u32 	%r37, %r35, %r36;
	st.u32 	[%rd4+0], %r37;
	ld.u32 	%r38, [%rd2+0];
	shl.b32 	%r39, %r38, 16;
	xor.b32 	%r40, %r37, %r39;
	st.u32 	[%rd4+0], %r40;
	.loc	19	144	0
	ld.u32 	%r41, [%rd6+0];
	ld.u32 	%r42, [%rd2+0];
	sub.u32 	%r43, %r41, %r42;
	st.u32 	[%rd6+0], %r43;
	ld.u32 	%r44, [%rd4+0];
	sub.u32 	%r45, %r43, %r44;
	st.u32 	[%rd6+0], %r45;
	ld.u32 	%r46, [%rd4+0];
	shr.u32 	%r47, %r46, 5;
	xor.b32 	%r48, %r45, %r47;
	st.u32 	[%rd6+0], %r48;
	.loc	19	145	0
	ld.u32 	%r49, [%rd2+0];
	ld.u32 	%r50, [%rd4+0];
	sub.u32 	%r51, %r49, %r50;
	st.u32 	[%rd2+0], %r51;
	ld.u32 	%r52, [%rd6+0];
	sub.u32 	%r53, %r51, %r52;
	st.u32 	[%rd2+0], %r53;
	ld.u32 	%r54, [%rd6+0];
	shr.u32 	%r55, %r54, 3;
	xor.b32 	%r56, %r53, %r55;
	st.u32 	[%rd2+0], %r56;
	.loc	19	146	0
	ld.u32 	%r57, [%rd4+0];
	ld.u32 	%r58, [%rd6+0];
	sub.u32 	%r59, %r57, %r58;
	st.u32 	[%rd4+0], %r59;
	ld.u32 	%r60, [%rd2+0];
	sub.u32 	%r61, %r59, %r60;
	st.u32 	[%rd4+0], %r61;
	ld.u32 	%r62, [%rd2+0];
	shl.b32 	%r63, %r62, 10;
	xor.b32 	%r64, %r61, %r63;
	st.u32 	[%rd4+0], %r64;
	.loc	19	147	0
	ld.u32 	%r65, [%rd6+0];
	ld.u32 	%r66, [%rd2+0];
	sub.u32 	%r67, %r65, %r66;
	st.u32 	[%rd6+0], %r67;
	ld.u32 	%r68, [%rd4+0];
	sub.u32 	%r69, %r67, %r68;
	st.u32 	[%rd6+0], %r69;
	ld.u32 	%r70, [%rd4+0];
	shr.u32 	%r71, %r70, 15;
	xor.b32 	%r72, %r69, %r71;
	st.u32 	[%rd6+0], %r72;
	.loc	19	148	0
	mov.s32 	%r73, %r72;
	st.param.u32 	[__cudaretf__Z4Mix3RjS_S_], %r73;
	ret;
$LDWend__Z4Mix3RjS_S_:
	} // _Z4Mix3RjS_S_

	.visible .func (.param .s32 __cudaretf__Z4Randj) _Z4Randj (.param .u32 __cudaparmf1__Z4Randj)
	{
	.reg .u32 %r<14>;
	.loc	19	152	0
$LDWbegin__Z4Randj:
	ld.param.u32 	%r1, [__cudaparmf1__Z4Randj];
	mov.s32 	%r2, %r1;
	.loc	19	163	0
	mul.lo.u32 	%r3, %r2, 1103515245;
	add.u32 	%r4, %r3, 12345;
	shr.u32 	%r5, %r4, 16;
	and.b32 	%r6, %r5, 255;
	shl.b32 	%r7, %r6, 7;
	mul.lo.u32 	%r8, %r2, -1029531031;
	sub.u32 	%r9, %r8, 740551042;
	shr.u32 	%r10, %r9, 16;
	and.b32 	%r11, %r10, 255;
	xor.b32 	%r12, %r7, %r11;
	st.param.s32 	[__cudaretf__Z4Randj], %r12;
	ret;
$LDWend__Z4Randj:
	} // _Z4Randj

	.visible .func (.param .s32 __cudaretf__Z6Rand2Djjj) _Z6Rand2Djjj (.param .u32 __cudaparmf1__Z6Rand2Djjj, .param .u32 __cudaparmf2__Z6Rand2Djjj, .param .u32 __cudaparmf3__Z6Rand2Djjj)
	{
	.reg .u32 %r<54>;
	.loc	19	169	0
$LDWbegin__Z6Rand2Djjj:
	ld.param.u32 	%r1, [__cudaparmf1__Z6Rand2Djjj];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf2__Z6Rand2Djjj];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf3__Z6Rand2Djjj];
	mov.s32 	%r6, %r5;
	.loc	19	139	0
	sub.u32 	%r7, %r2, %r4;
	sub.u32 	%r8, %r7, %r6;
	shr.u32 	%r9, %r6, 13;
	xor.b32 	%r10, %r8, %r9;
	.loc	19	140	0
	sub.u32 	%r11, %r4, %r6;
	sub.u32 	%r12, %r11, %r10;
	shl.b32 	%r13, %r10, 8;
	xor.b32 	%r14, %r12, %r13;
	.loc	19	141	0
	sub.u32 	%r15, %r6, %r10;
	sub.u32 	%r16, %r15, %r14;
	shr.u32 	%r17, %r14, 13;
	xor.b32 	%r18, %r16, %r17;
	.loc	19	142	0
	sub.u32 	%r19, %r10, %r14;
	sub.u32 	%r20, %r19, %r18;
	shr.u32 	%r21, %r18, 12;
	xor.b32 	%r22, %r20, %r21;
	.loc	19	143	0
	sub.u32 	%r23, %r14, %r18;
	sub.u32 	%r24, %r23, %r22;
	shl.b32 	%r25, %r22, 16;
	xor.b32 	%r26, %r24, %r25;
	.loc	19	144	0
	sub.u32 	%r27, %r18, %r22;
	sub.u32 	%r28, %r27, %r26;
	shr.u32 	%r29, %r26, 5;
	xor.b32 	%r30, %r28, %r29;
	.loc	19	145	0
	sub.u32 	%r31, %r22, %r26;
	sub.u32 	%r32, %r31, %r30;
	shr.u32 	%r33, %r30, 3;
	xor.b32 	%r34, %r32, %r33;
	.loc	19	146	0
	sub.u32 	%r35, %r26, %r30;
	sub.u32 	%r36, %r35, %r34;
	shl.b32 	%r37, %r34, 10;
	xor.b32 	%r38, %r36, %r37;
	.loc	19	147	0
	sub.u32 	%r39, %r30, %r34;
	sub.u32 	%r40, %r39, %r38;
	shr.u32 	%r41, %r38, 15;
	xor.b32 	%r42, %r40, %r41;
	.loc	19	170	0
	mul.lo.u32 	%r43, %r42, 1103515245;
	add.u32 	%r44, %r43, 12345;
	shr.u32 	%r45, %r44, 16;
	and.b32 	%r46, %r45, 255;
	shl.b32 	%r47, %r46, 7;
	mul.lo.u32 	%r48, %r42, -1029531031;
	sub.u32 	%r49, %r48, 740551042;
	shr.u32 	%r50, %r49, 16;
	and.b32 	%r51, %r50, 255;
	xor.b32 	%r52, %r47, %r51;
	st.param.s32 	[__cudaretf__Z6Rand2Djjj], %r52;
	ret;
$LDWend__Z6Rand2Djjj:
	} // _Z6Rand2Djjj

	.visible .func (.param .s32 __cudaretf__Z6Rand2Dj) _Z6Rand2Dj (.param .u32 __cudaparmf1__Z6Rand2Dj)
	{
	.reg .u32 %r<60>;
	.loc	19	175	0
$LDWbegin__Z6Rand2Dj:
	ld.param.u32 	%r1, [__cudaparmf1__Z6Rand2Dj];
	mov.s32 	%r2, %r1;
	.loc	19	143	0
	cvt.s32.u32 	%r3, %ctaid.y;
	cvt.s32.u32 	%r4, %ntid.y;
	mul.lo.s32 	%r5, %r3, %r4;
	cvt.s32.u32 	%r6, %ctaid.x;
	cvt.s32.u32 	%r7, %ntid.x;
	mul.lo.s32 	%r8, %r6, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r5, %r9;
	mov.u32 	%r11, %tid.x;
	add.u32 	%r12, %r8, %r11;
	shr.u32 	%r13, %r10, 13;
	sub.u32 	%r14, %r2, %r12;
	sub.u32 	%r15, %r12, %r10;
	sub.u32 	%r16, %r14, %r10;
	xor.b32 	%r17, %r13, %r16;
	shl.b32 	%r18, %r17, 8;
	sub.u32 	%r19, %r15, %r17;
	sub.u32 	%r20, %r10, %r17;
	xor.b32 	%r21, %r18, %r19;
	shr.u32 	%r22, %r21, 13;
	sub.u32 	%r23, %r20, %r21;
	sub.u32 	%r24, %r17, %r21;
	xor.b32 	%r25, %r22, %r23;
	shr.u32 	%r26, %r25, 12;
	sub.u32 	%r27, %r24, %r25;
	xor.b32 	%r28, %r26, %r27;
	sub.u32 	%r29, %r21, %r25;
	sub.u32 	%r30, %r29, %r28;
	shl.b32 	%r31, %r28, 16;
	xor.b32 	%r32, %r30, %r31;
	.loc	19	144	0
	sub.u32 	%r33, %r25, %r28;
	sub.u32 	%r34, %r33, %r32;
	shr.u32 	%r35, %r32, 5;
	xor.b32 	%r36, %r34, %r35;
	.loc	19	145	0
	sub.u32 	%r37, %r28, %r32;
	sub.u32 	%r38, %r37, %r36;
	shr.u32 	%r39, %r36, 3;
	xor.b32 	%r40, %r38, %r39;
	.loc	19	146	0
	sub.u32 	%r41, %r32, %r36;
	sub.u32 	%r42, %r41, %r40;
	shl.b32 	%r43, %r40, 10;
	xor.b32 	%r44, %r42, %r43;
	.loc	19	147	0
	sub.u32 	%r45, %r36, %r40;
	sub.u32 	%r46, %r45, %r44;
	shr.u32 	%r47, %r44, 15;
	xor.b32 	%r48, %r46, %r47;
	.loc	19	176	0
	mul.lo.u32 	%r49, %r48, 1103515245;
	add.u32 	%r50, %r49, 12345;
	shr.u32 	%r51, %r50, 16;
	and.b32 	%r52, %r51, 255;
	shl.b32 	%r53, %r52, 7;
	mul.lo.u32 	%r54, %r48, -1029531031;
	sub.u32 	%r55, %r54, 740551042;
	shr.u32 	%r56, %r55, 16;
	and.b32 	%r57, %r56, 255;
	xor.b32 	%r58, %r53, %r57;
	st.param.s32 	[__cudaretf__Z6Rand2Dj], %r58;
	ret;
$LDWend__Z6Rand2Dj:
	} // _Z6Rand2Dj

	.visible .func _Z7Write2DI7ushort4EvT_PS1_iii (.param .align 8 .b8 __cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii[8], .param .u64 __cudaparmf2__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI7ushort4EvT_PS1_iii)
	{
	.reg .u32 %r<18>;
	.reg .u64 %rd<7>;
	.loc	19	125	0
$LDWbegin__Z7Write2DI7ushort4EvT_PS1_iii:
	ld.param.u16 	%r1, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+0];
	mov.s32 	%r2, %r1;
	ld.param.u16 	%r3, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+2];
	mov.s32 	%r4, %r3;
	ld.param.u16 	%r5, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+4];
	mov.s32 	%r6, %r5;
	ld.param.u16 	%r7, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+6];
	mov.s32 	%r8, %r7;
	ld.param.u64 	%rd1, [__cudaparmf2__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r9, [__cudaparmf3__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s32 	%r10, %r9;
	ld.param.u32 	%r11, [__cudaparmf4__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s32 	%r12, %r11;
	ld.param.u32 	%r13, [__cudaparmf5__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s32 	%r14, %r13;
	.loc	19	126	0
	mul.lo.s32 	%r15, %r10, %r14;
	add.s32 	%r16, %r12, %r15;
	cvt.s64.s32 	%rd3, %r16;
	mul.wide.s32 	%rd4, %r16, 8;
	add.u64 	%rd5, %rd2, %rd4;
	st.v4.u16 	[%rd5+0], {%r2,%r4,%r6,%r8};
	.loc	19	127	0
	ret;
$LDWend__Z7Write2DI7ushort4EvT_PS1_iii:
	} // _Z7Write2DI7ushort4EvT_PS1_iii

	.visible .func _Z7Write2DI6float4EvT_PS1_iii (.param .align 16 .b8 __cudaparmf1__Z7Write2DI6float4EvT_PS1_iii[16], .param .u64 __cudaparmf2__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI6float4EvT_PS1_iii)
	{
	.reg .u32 %r<10>;
	.reg .u64 %rd<7>;
	.reg .f32 %f<10>;
	.loc	19	125	0
$LDWbegin__Z7Write2DI6float4EvT_PS1_iii:
	ld.param.f32 	%f1, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+12];
	mov.f32 	%f8, %f7;
	ld.param.u64 	%rd1, [__cudaparmf2__Z7Write2DI6float4EvT_PS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf3__Z7Write2DI6float4EvT_PS1_iii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf4__Z7Write2DI6float4EvT_PS1_iii];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf5__Z7Write2DI6float4EvT_PS1_iii];
	mov.s32 	%r6, %r5;
	.loc	19	126	0
	mul.lo.s32 	%r7, %r2, %r6;
	add.s32 	%r8, %r4, %r7;
	cvt.s64.s32 	%rd3, %r8;
	mul.wide.s32 	%rd4, %r8, 16;
	add.u64 	%rd5, %rd2, %rd4;
	st.v4.f32 	[%rd5+0], {%f2,%f4,%f6,%f8};
	.loc	19	127	0
	ret;
$LDWend__Z7Write2DI6float4EvT_PS1_iii:
	} // _Z7Write2DI6float4EvT_PS1_iii

	.visible .func (.param .align 16 .b8 __cudaretf__Z18UnpremultiplyPixel8PixelRGB[16]) _Z18UnpremultiplyPixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z18UnpremultiplyPixel8PixelRGB[16])
	{
	.reg .f32 %f<23>;
	.reg .pred %p<3>;
	.loc	3	206	0
$LDWbegin__Z18UnpremultiplyPixel8PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+12];
	mov.f32 	%f8, %f7;
	.loc	3	208	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f9, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f12, %f13;
	@%p1 bra 	$Lt_11_1282;
	.loc	3	213	0
	rcp.approx.ftz.f32 	%f14, %f9;
	mul.ftz.f32 	%f15, %f14, %f6;
	.loc	3	214	0
	mul.ftz.f32 	%f16, %f14, %f4;
	.loc	3	215	0
	mul.ftz.f32 	%f17, %f14, %f2;
	bra.uni 	$Lt_11_1026;
$Lt_11_1282:
	.loc	3	219	0
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	mov.f32 	%f17, 0f00000000;    	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
$Lt_11_1026:
	.loc	3	224	0
	mov.f32 	%f18, %f17;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+0], %f18;
	mov.f32 	%f19, %f16;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+4], %f19;
	mov.f32 	%f20, %f15;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+8], %f20;
	mov.f32 	%f21, %f10;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+12], %f21;
	ret;
$LDWend__Z18UnpremultiplyPixel8PixelRGB:
	} // _Z18UnpremultiplyPixel8PixelRGB

	.visible .func (.param .f32 __cudaretf__Z13ToLinearColorf) _Z13ToLinearColorf (.param .f32 __cudaparmf1__Z13ToLinearColorf)
	{
	.reg .f32 %f<15>;
	.reg .pred %p<3>;
	.loc	3	231	0
$LDWbegin__Z13ToLinearColorf:
	ld.param.f32 	%f1, [__cudaparmf1__Z13ToLinearColorf];
	mov.f32 	%f2, %f1;
	mov.f32 	%f3, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f3;
	@!%p1 bra 	$Lt_12_1026;
	.loc	3	234	0
	neg.ftz.f32 	%f4, %f2;
	lg2.approx.ftz.f32 	%f5, %f4;
	mov.f32 	%f6, 0f400ccccd;     	// 2.2
	mul.ftz.f32 	%f7, %f5, %f6;
	ex2.approx.ftz.f32 	%f8, %f7;
	neg.ftz.f32 	%f9, %f8;
	bra.uni 	$LBB4__Z13ToLinearColorf;
$Lt_12_1026:
	.loc	3	236	0
	lg2.approx.ftz.f32 	%f10, %f2;
	mov.f32 	%f11, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f12, %f10, %f11;
	ex2.approx.ftz.f32 	%f9, %f12;
$LBB4__Z13ToLinearColorf:
	mov.f32 	%f13, %f9;
	st.param.f32 	[__cudaretf__Z13ToLinearColorf], %f13;
	ret;
$LDWend__Z13ToLinearColorf:
	} // _Z13ToLinearColorf

	.visible .func (.param .f32 __cudaretf__Z15FromLinearColorf) _Z15FromLinearColorf (.param .f32 __cudaparmf1__Z15FromLinearColorf)
	{
	.reg .f32 %f<15>;
	.reg .pred %p<3>;
	.loc	3	239	0
$LDWbegin__Z15FromLinearColorf:
	ld.param.f32 	%f1, [__cudaparmf1__Z15FromLinearColorf];
	mov.f32 	%f2, %f1;
	mov.f32 	%f3, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f3;
	@!%p1 bra 	$Lt_13_1026;
	.loc	3	242	0
	neg.ftz.f32 	%f4, %f2;
	lg2.approx.ftz.f32 	%f5, %f4;
	mov.f32 	%f6, 0f3ee8ba2e;     	// 0.454545
	mul.ftz.f32 	%f7, %f5, %f6;
	ex2.approx.ftz.f32 	%f8, %f7;
	neg.ftz.f32 	%f9, %f8;
	bra.uni 	$LBB4__Z15FromLinearColorf;
$Lt_13_1026:
	.loc	3	244	0
	lg2.approx.ftz.f32 	%f10, %f2;
	mov.f32 	%f11, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f12, %f10, %f11;
	ex2.approx.ftz.f32 	%f9, %f12;
$LBB4__Z15FromLinearColorf:
	mov.f32 	%f13, %f9;
	st.param.f32 	[__cudaretf__Z15FromLinearColorf], %f13;
	ret;
$LDWend__Z15FromLinearColorf:
	} // _Z15FromLinearColorf

	.visible .func (.param .align 16 .b8 __cudaretf__Z25PremultiplyLinearizePixel8PixelRGB[16]) _Z25PremultiplyLinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB[16])
	{
	.reg .f32 %f<47>;
	.reg .pred %p<5>;
	.loc	3	252	0
$LDWbegin__Z25PremultiplyLinearizePixel8PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+12];
	mov.f32 	%f8, %f7;
	.loc	3	254	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	.loc	3	255	0
	mov.f32 	%f10, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f10;
	@!%p1 bra 	$Lt_14_4098;
	.loc	3	234	0
	neg.ftz.f32 	%f11, %f2;
	lg2.approx.ftz.f32 	%f12, %f11;
	mov.f32 	%f13, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f15, %f14;
	neg.ftz.f32 	%f16, %f15;
	bra.uni 	$LDWendi___log2f_191_5;
$Lt_14_4098:
	.loc	3	236	0
	lg2.approx.ftz.f32 	%f17, %f2;
	mov.f32 	%f18, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f16, %f19;
$LDWendi___log2f_191_5:
	.loc	3	256	0
	mov.f32 	%f20, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f4, %f20;
	@!%p2 bra 	$Lt_14_4610;
	.loc	3	234	0
	neg.ftz.f32 	%f21, %f4;
	lg2.approx.ftz.f32 	%f22, %f21;
	mov.f32 	%f23, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f25, %f24;
	neg.ftz.f32 	%f26, %f25;
	bra.uni 	$LDWendi___log2f_191_3;
$Lt_14_4610:
	.loc	3	236	0
	lg2.approx.ftz.f32 	%f27, %f4;
	mov.f32 	%f28, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f26, %f29;
$LDWendi___log2f_191_3:
	.loc	3	257	0
	mov.f32 	%f30, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f6, %f30;
	@!%p3 bra 	$Lt_14_5122;
	.loc	3	234	0
	neg.ftz.f32 	%f31, %f6;
	lg2.approx.ftz.f32 	%f32, %f31;
	mov.f32 	%f33, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f35, %f34;
	neg.ftz.f32 	%f36, %f35;
	bra.uni 	$LDWendi___log2f_191_1;
$Lt_14_5122:
	.loc	3	236	0
	lg2.approx.ftz.f32 	%f37, %f6;
	mov.f32 	%f38, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f39, %f37, %f38;
	ex2.approx.ftz.f32 	%f36, %f39;
$LDWendi___log2f_191_1:
	.loc	3	259	0
	mul.ftz.f32 	%f40, %f36, %f9;
	mul.ftz.f32 	%f41, %f26, %f9;
	mul.ftz.f32 	%f42, %f16, %f9;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+0], %f42;
	mov.f32 	%f43, %f41;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+4], %f43;
	mov.f32 	%f44, %f40;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+8], %f44;
	mov.f32 	%f45, %f9;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+12], %f45;
	ret;
$LDWend__Z25PremultiplyLinearizePixel8PixelRGB:
	} // _Z25PremultiplyLinearizePixel8PixelRGB

	.visible .func (.param .align 16 .b8 __cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16]) _Z29UnpremultiplyUnlinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16])
	{
	.reg .f32 %f<53>;
	.reg .pred %p<6>;
	.loc	3	263	0
$LDWbegin__Z29UnpremultiplyUnlinearizePixel8PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+12];
	mov.f32 	%f8, %f7;
	.loc	3	208	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f9, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f12, %f13;
	@%p1 bra 	$Lt_15_5122;
	.loc	3	213	0
	rcp.approx.ftz.f32 	%f14, %f9;
	mul.ftz.f32 	%f15, %f14, %f6;
	.loc	3	214	0
	mul.ftz.f32 	%f16, %f14, %f4;
	.loc	3	215	0
	mul.ftz.f32 	%f17, %f14, %f2;
	bra.uni 	$Lt_15_4866;
$Lt_15_5122:
	.loc	3	219	0
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	mov.f32 	%f17, 0f00000000;    	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
$Lt_15_4866:
	.loc	3	266	0
	mov.f32 	%f18, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f17, %f18;
	@!%p2 bra 	$Lt_15_5378;
	.loc	3	242	0
	neg.ftz.f32 	%f19, %f17;
	lg2.approx.ftz.f32 	%f20, %f19;
	mov.f32 	%f21, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f22, %f20, %f21;
	ex2.approx.ftz.f32 	%f23, %f22;
	neg.ftz.f32 	%f24, %f23;
	bra.uni 	$LDWendi___log2f_192_5;
$Lt_15_5378:
	.loc	3	244	0
	lg2.approx.ftz.f32 	%f25, %f17;
	mov.f32 	%f26, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f27, %f25, %f26;
	ex2.approx.ftz.f32 	%f24, %f27;
$LDWendi___log2f_192_5:
	.loc	3	267	0
	mov.f32 	%f28, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f16, %f28;
	@!%p3 bra 	$Lt_15_5890;
	.loc	3	242	0
	neg.ftz.f32 	%f29, %f16;
	lg2.approx.ftz.f32 	%f30, %f29;
	mov.f32 	%f31, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f32, %f30, %f31;
	ex2.approx.ftz.f32 	%f33, %f32;
	neg.ftz.f32 	%f34, %f33;
	bra.uni 	$LDWendi___log2f_192_3;
$Lt_15_5890:
	.loc	3	244	0
	lg2.approx.ftz.f32 	%f35, %f16;
	mov.f32 	%f36, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f37, %f35, %f36;
	ex2.approx.ftz.f32 	%f34, %f37;
$LDWendi___log2f_192_3:
	.loc	3	268	0
	mov.f32 	%f38, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f15, %f38;
	@!%p4 bra 	$Lt_15_6402;
	.loc	3	242	0
	neg.ftz.f32 	%f39, %f15;
	lg2.approx.ftz.f32 	%f40, %f39;
	mov.f32 	%f41, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f42, %f40, %f41;
	ex2.approx.ftz.f32 	%f43, %f42;
	neg.ftz.f32 	%f44, %f43;
	bra.uni 	$LDWendi___log2f_192_1;
$Lt_15_6402:
	.loc	3	244	0
	lg2.approx.ftz.f32 	%f45, %f15;
	mov.f32 	%f46, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f47, %f45, %f46;
	ex2.approx.ftz.f32 	%f44, %f47;
$LDWendi___log2f_192_1:
	.loc	3	269	0
	mov.f32 	%f48, %f24;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+0], %f48;
	mov.f32 	%f49, %f34;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+4], %f49;
	mov.f32 	%f50, %f44;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+8], %f50;
	mov.f32 	%f51, %f10;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+12], %f51;
	ret;
$LDWend__Z29UnpremultiplyUnlinearizePixel8PixelRGB:
	} // _Z29UnpremultiplyUnlinearizePixel8PixelRGB

	.visible .func (.param .align 16 .b8 __cudaretf__Z20PremultiplyLinearize6float4[16]) _Z20PremultiplyLinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z20PremultiplyLinearize6float4[16])
	{
	.reg .f32 %f<47>;
	.reg .pred %p<5>;
	.loc	3	277	0
$LDWbegin__Z20PremultiplyLinearize6float4:
	ld.param.f32 	%f1, [__cudaparmf1__Z20PremultiplyLinearize6float4+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z20PremultiplyLinearize6float4+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z20PremultiplyLinearize6float4+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z20PremultiplyLinearize6float4+12];
	mov.f32 	%f8, %f7;
	.loc	3	254	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	.loc	3	255	0
	mov.f32 	%f10, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f10;
	@!%p1 bra 	$Lt_16_4098;
	.loc	3	234	0
	neg.ftz.f32 	%f11, %f2;
	lg2.approx.ftz.f32 	%f12, %f11;
	mov.f32 	%f13, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f15, %f14;
	neg.ftz.f32 	%f16, %f15;
	bra.uni 	$LDWendi___log2f_193_5;
$Lt_16_4098:
	.loc	3	236	0
	lg2.approx.ftz.f32 	%f17, %f2;
	mov.f32 	%f18, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f16, %f19;
$LDWendi___log2f_193_5:
	.loc	3	256	0
	mov.f32 	%f20, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f4, %f20;
	@!%p2 bra 	$Lt_16_4610;
	.loc	3	234	0
	neg.ftz.f32 	%f21, %f4;
	lg2.approx.ftz.f32 	%f22, %f21;
	mov.f32 	%f23, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f25, %f24;
	neg.ftz.f32 	%f26, %f25;
	bra.uni 	$LDWendi___log2f_193_3;
$Lt_16_4610:
	.loc	3	236	0
	lg2.approx.ftz.f32 	%f27, %f4;
	mov.f32 	%f28, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f26, %f29;
$LDWendi___log2f_193_3:
	.loc	3	257	0
	mov.f32 	%f30, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f6, %f30;
	@!%p3 bra 	$Lt_16_5122;
	.loc	3	234	0
	neg.ftz.f32 	%f31, %f6;
	lg2.approx.ftz.f32 	%f32, %f31;
	mov.f32 	%f33, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f35, %f34;
	neg.ftz.f32 	%f36, %f35;
	bra.uni 	$LDWendi___log2f_193_1;
$Lt_16_5122:
	.loc	3	236	0
	lg2.approx.ftz.f32 	%f37, %f6;
	mov.f32 	%f38, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f39, %f37, %f38;
	ex2.approx.ftz.f32 	%f36, %f39;
$LDWendi___log2f_193_1:
	.loc	3	259	0
	mul.ftz.f32 	%f40, %f36, %f9;
	mul.ftz.f32 	%f41, %f26, %f9;
	.loc	3	278	0
	mul.ftz.f32 	%f42, %f16, %f9;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+0], %f42;
	mov.f32 	%f43, %f41;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+4], %f43;
	mov.f32 	%f44, %f40;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+8], %f44;
	mov.f32 	%f45, %f9;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+12], %f45;
	ret;
$LDWend__Z20PremultiplyLinearize6float4:
	} // _Z20PremultiplyLinearize6float4

	.visible .func (.param .align 16 .b8 __cudaretf__Z24UnpremultiplyUnlinearize6float4[16]) _Z24UnpremultiplyUnlinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z24UnpremultiplyUnlinearize6float4[16])
	{
	.reg .f32 %f<53>;
	.reg .pred %p<6>;
	.loc	3	284	0
$LDWbegin__Z24UnpremultiplyUnlinearize6float4:
	ld.param.f32 	%f1, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+12];
	mov.f32 	%f8, %f7;
	.loc	3	208	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f9, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f12, %f13;
	@%p1 bra 	$Lt_17_5122;
	.loc	3	213	0
	rcp.approx.ftz.f32 	%f14, %f9;
	mul.ftz.f32 	%f15, %f14, %f6;
	.loc	3	214	0
	mul.ftz.f32 	%f16, %f14, %f4;
	.loc	3	215	0
	mul.ftz.f32 	%f17, %f14, %f2;
	bra.uni 	$Lt_17_4866;
$Lt_17_5122:
	.loc	3	219	0
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	mov.f32 	%f17, 0f00000000;    	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
$Lt_17_4866:
	.loc	3	266	0
	mov.f32 	%f18, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f17, %f18;
	@!%p2 bra 	$Lt_17_5378;
	.loc	3	242	0
	neg.ftz.f32 	%f19, %f17;
	lg2.approx.ftz.f32 	%f20, %f19;
	mov.f32 	%f21, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f22, %f20, %f21;
	ex2.approx.ftz.f32 	%f23, %f22;
	neg.ftz.f32 	%f24, %f23;
	bra.uni 	$LDWendi___log2f_194_5;
$Lt_17_5378:
	.loc	3	244	0
	lg2.approx.ftz.f32 	%f25, %f17;
	mov.f32 	%f26, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f27, %f25, %f26;
	ex2.approx.ftz.f32 	%f24, %f27;
$LDWendi___log2f_194_5:
	.loc	3	267	0
	mov.f32 	%f28, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f16, %f28;
	@!%p3 bra 	$Lt_17_5890;
	.loc	3	242	0
	neg.ftz.f32 	%f29, %f16;
	lg2.approx.ftz.f32 	%f30, %f29;
	mov.f32 	%f31, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f32, %f30, %f31;
	ex2.approx.ftz.f32 	%f33, %f32;
	neg.ftz.f32 	%f34, %f33;
	bra.uni 	$LDWendi___log2f_194_3;
$Lt_17_5890:
	.loc	3	244	0
	lg2.approx.ftz.f32 	%f35, %f16;
	mov.f32 	%f36, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f37, %f35, %f36;
	ex2.approx.ftz.f32 	%f34, %f37;
$LDWendi___log2f_194_3:
	.loc	3	268	0
	mov.f32 	%f38, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f15, %f38;
	@!%p4 bra 	$Lt_17_6402;
	.loc	3	242	0
	neg.ftz.f32 	%f39, %f15;
	lg2.approx.ftz.f32 	%f40, %f39;
	mov.f32 	%f41, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f42, %f40, %f41;
	ex2.approx.ftz.f32 	%f43, %f42;
	neg.ftz.f32 	%f44, %f43;
	bra.uni 	$LDWendi___log2f_194_1;
$Lt_17_6402:
	.loc	3	244	0
	lg2.approx.ftz.f32 	%f45, %f15;
	mov.f32 	%f46, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f47, %f45, %f46;
	ex2.approx.ftz.f32 	%f44, %f47;
$LDWendi___log2f_194_1:
	.loc	3	285	0
	mov.f32 	%f48, %f24;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+0], %f48;
	mov.f32 	%f49, %f34;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+4], %f49;
	mov.f32 	%f50, %f44;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+8], %f50;
	mov.f32 	%f51, %f10;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+12], %f51;
	ret;
$LDWend__Z24UnpremultiplyUnlinearize6float4:
	} // _Z24UnpremultiplyUnlinearize6float4

	.visible .func _Z23computeInverseTransformRK4QuadR10Homography (.param .u64 __cudaparmf1__Z23computeInverseTransformRK4QuadR10Homography, .param .u64 __cudaparmf2__Z23computeInverseTransformRK4QuadR10Homography)
	{
	.reg .u64 %rd<6>;
	.reg .f32 %f<66>;
	.reg .f64 %fd<6>;
	.reg .pred %p<4>;
	.loc	4	145	0
$LDWbegin__Z23computeInverseTransformRK4QuadR10Homography:
	ld.param.u64 	%rd1, [__cudaparmf1__Z23computeInverseTransformRK4QuadR10Homography];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf2__Z23computeInverseTransformRK4QuadR10Homography];
	mov.s64 	%rd4, %rd3;
	ld.v2.f32 	{%f1,%f2}, [%rd2+24];
	ld.v2.f32 	{%f3,%f4}, [%rd2+0];
	ld.v2.f32 	{%f5,%f6}, [%rd2+8];
	ld.v2.f32 	{%f7,%f8}, [%rd2+16];
	.loc	4	151	0
	sub.ftz.f32 	%f9, %f3, %f5;
	add.ftz.f32 	%f10, %f1, %f9;
	sub.ftz.f32 	%f11, %f10, %f7;
	.loc	4	152	0
	sub.ftz.f32 	%f12, %f4, %f6;
	add.ftz.f32 	%f13, %f2, %f12;
	sub.ftz.f32 	%f14, %f13, %f8;
	.loc	4	145	0
	abs.ftz.f32 	%f15, %f11;
	cvt.ftz.f64.f32 	%fd1, %f15;
	mov.f64 	%fd2, 0d3ee4f8b588e368f1;	// 1e-005
	setp.lt.f64 	%p1, %fd1, %fd2;
	@%p1 bra 	$L_18_1794;
	abs.ftz.f32 	%f16, %f14;
	cvt.ftz.f64.f32 	%fd3, %f16;
	mov.f64 	%fd4, 0d3ee4f8b588e368f1;	// 1e-005
	setp.lt.f64 	%p2, %fd3, %fd4;
	@!%p2 bra 	$L_18_1538;
$L_18_1794:
	.loc	4	156	0
	sub.ftz.f32 	%f17, %f5, %f3;
	.loc	4	157	0
	sub.ftz.f32 	%f18, %f1, %f5;
	.loc	4	159	0
	sub.ftz.f32 	%f19, %f6, %f4;
	.loc	4	160	0
	sub.ftz.f32 	%f20, %f2, %f6;
	.loc	4	162	0
	mov.f32 	%f21, 0f00000000;    	// 0
	mov.f32 	%f22, 0f00000000;    	// 0
	bra.uni 	$L_18_1282;
$L_18_1538:
	.loc	4	167	0
	sub.ftz.f32 	%f23, %f5, %f1;
	.loc	4	168	0
	sub.ftz.f32 	%f24, %f7, %f1;
	.loc	4	169	0
	sub.ftz.f32 	%f25, %f6, %f2;
	.loc	4	170	0
	sub.ftz.f32 	%f26, %f8, %f2;
	.loc	4	172	0
	mul.ftz.f32 	%f27, %f14, %f24;
	mul.ftz.f32 	%f28, %f24, %f25;
	mul.ftz.f32 	%f29, %f11, %f26;
	sub.ftz.f32 	%f30, %f29, %f27;
	mul.ftz.f32 	%f31, %f23, %f26;
	sub.ftz.f32 	%f32, %f31, %f28;
	div.approx.ftz.f32 	%f33, %f30, %f32;
	mov.f32 	%f21, %f33;
	.loc	4	173	0
	mul.ftz.f32 	%f34, %f11, %f25;
	mul.ftz.f32 	%f35, %f14, %f23;
	sub.ftz.f32 	%f36, %f35, %f34;
	div.approx.ftz.f32 	%f37, %f36, %f32;
	mov.f32 	%f22, %f37;
	.loc	4	174	0
	sub.ftz.f32 	%f38, %f5, %f3;
	fma.rn.ftz.f32 	%f17, %f5, %f33, %f38;
	.loc	4	175	0
	sub.ftz.f32 	%f39, %f7, %f3;
	fma.rn.ftz.f32 	%f18, %f7, %f37, %f39;
	.loc	4	177	0
	sub.ftz.f32 	%f40, %f6, %f4;
	fma.rn.ftz.f32 	%f19, %f6, %f33, %f40;
	.loc	4	178	0
	sub.ftz.f32 	%f41, %f8, %f4;
	fma.rn.ftz.f32 	%f20, %f8, %f37, %f41;
$L_18_1282:
	.loc	4	184	0
	mul.ftz.f32 	%f42, %f4, %f22;
	sub.ftz.f32 	%f43, %f20, %f42;
	st.f32 	[%rd4+0], %f43;
	.loc	4	185	0
	mul.ftz.f32 	%f44, %f4, %f21;
	sub.ftz.f32 	%f45, %f44, %f19;
	st.f32 	[%rd4+4], %f45;
	.loc	4	186	0
	mul.ftz.f32 	%f46, %f20, %f21;
	mul.ftz.f32 	%f47, %f19, %f22;
	sub.ftz.f32 	%f48, %f47, %f46;
	st.f32 	[%rd4+8], %f48;
	.loc	4	187	0
	mul.ftz.f32 	%f49, %f3, %f22;
	sub.ftz.f32 	%f50, %f49, %f18;
	st.f32 	[%rd4+12], %f50;
	.loc	4	188	0
	mul.ftz.f32 	%f51, %f3, %f21;
	sub.ftz.f32 	%f52, %f17, %f51;
	st.f32 	[%rd4+16], %f52;
	.loc	4	189	0
	mul.ftz.f32 	%f53, %f17, %f22;
	mul.ftz.f32 	%f54, %f18, %f21;
	sub.ftz.f32 	%f55, %f54, %f53;
	st.f32 	[%rd4+20], %f55;
	.loc	4	190	0
	mul.ftz.f32 	%f56, %f3, %f20;
	mul.ftz.f32 	%f57, %f18, %f4;
	sub.ftz.f32 	%f58, %f57, %f56;
	st.f32 	[%rd4+24], %f58;
	.loc	4	191	0
	mul.ftz.f32 	%f59, %f4, %f17;
	mul.ftz.f32 	%f60, %f3, %f19;
	sub.ftz.f32 	%f61, %f60, %f59;
	st.f32 	[%rd4+28], %f61;
	.loc	4	192	0
	mul.ftz.f32 	%f62, %f18, %f19;
	mul.ftz.f32 	%f63, %f17, %f20;
	sub.ftz.f32 	%f64, %f63, %f62;
	st.f32 	[%rd4+32], %f64;
	.loc	4	193	0
	ret;
$LDWend__Z23computeInverseTransformRK4QuadR10Homography:
	} // _Z23computeInverseTransformRK4QuadR10Homography
	.global .texref inputTex;

	.entry StabilizerWarpKernel (
		.param .u32 __cudaparm_StabilizerWarpKernel_inDeviceFormat,
		.param .u64 __cudaparm_StabilizerWarpKernel_inOutput,
		.param .s32 __cudaparm_StabilizerWarpKernel_inOutputWidth,
		.param .s32 __cudaparm_StabilizerWarpKernel_inOutputHeight,
		.param .s32 __cudaparm_StabilizerWarpKernel_inOutputPitch,
		.param .s32 __cudaparm_StabilizerWarpKernel_inInputWidth,
		.param .s32 __cudaparm_StabilizerWarpKernel_inInputHeight,
		.param .u64 __cudaparm_StabilizerWarpKernel_inVarVector,
		.param .s32 __cudaparm_StabilizerWarpKernel_inGridWidth,
		.param .s32 __cudaparm_StabilizerWarpKernel_inGridHeight,
		.param .align 16 .b8 __cudaparm_StabilizerWarpKernel_inBoundingBox[16],
		.param .align 16 .b8 __cudaparm_StabilizerWarpKernel_inCropWindow[16],
		.param .s8 __cudaparm_StabilizerWarpKernel_doCrop,
		.param .s32 __cudaparm_StabilizerWarpKernel_inOutputLeft,
		.param .s32 __cudaparm_StabilizerWarpKernel_inOutputTop,
		.param .s32 __cudaparm_StabilizerWarpKernel_inOutputRight,
		.param .s32 __cudaparm_StabilizerWarpKernel_inOutputBottom,
		.param .f32 __cudaparm_StabilizerWarpKernel_inZoomFactor)
	{
	.reg .u32 %r<142>;
	.reg .u64 %rd<15>;
	.reg .f32 %f<198>;
	.reg .f64 %fd<6>;
	.reg .pred %p<28>;
	.loc	4	214	0
$LDWbegin_StabilizerWarpKernel:
	.loc	4	217	0
	ld.param.s32 	%r1, [__cudaparm_StabilizerWarpKernel_inGridWidth];
	sub.s32 	%r2, %r1, 1;
	cvt.s32.u32 	%r3, %ntid.y;
	cvt.s32.u32 	%r4, %ctaid.x;
	mul.lo.s32 	%r5, %r4, %r3;
	ld.param.s32 	%r6, [__cudaparm_StabilizerWarpKernel_inGridHeight];
	sub.s32 	%r7, %r6, 1;
	cvt.s32.u32 	%r8, %ctaid.y;
	mul.lo.s32 	%r9, %r8, %r3;
	mov.u32 	%r10, %tid.x;
	add.u32 	%r11, %r5, %r10;
	mov.u32 	%r12, %tid.y;
	add.u32 	%r13, %r9, %r12;
	set.gt.u32.s32 	%r14, %r2, %r11;
	neg.s32 	%r15, %r14;
	set.gt.u32.s32 	%r16, %r7, %r13;
	neg.s32 	%r17, %r16;
	and.b32 	%r18, %r15, %r17;
	mov.u32 	%r19, 0;
	setp.eq.s32 	%p1, %r18, %r19;
	@%p1 bra 	$Lt_19_24578;
	.loc	4	251	0
	mul.lo.s32 	%r20, %r1, %r13;
	ld.param.u64 	%rd1, [__cudaparm_StabilizerWarpKernel_inVarVector];
	add.s32 	%r21, %r20, %r11;
	mul.lo.s32 	%r22, %r21, 2;
	cvt.s64.s32 	%rd2, %r22;
	mul.wide.s32 	%rd3, %r22, 4;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.f32 	%f1, [%rd4+0];
	ld.global.f32 	%f2, [%rd4+4];
	ld.global.f32 	%f3, [%rd4+8];
	ld.global.f32 	%f4, [%rd4+12];
	add.s32 	%r23, %r13, 1;
	mul.lo.s32 	%r24, %r23, %r1;
	add.s32 	%r25, %r24, %r11;
	mul.lo.s32 	%r26, %r25, 2;
	cvt.s64.s32 	%rd5, %r26;
	mul.wide.s32 	%rd6, %r26, 4;
	add.u64 	%rd7, %rd1, %rd6;
	ld.global.f32 	%f5, [%rd7+0];
	ld.global.f32 	%f6, [%rd7+4];
	ld.global.f32 	%f7, [%rd7+8];
	ld.global.f32 	%f8, [%rd7+12];
	.loc	4	253	0
	ld.param.s8 	%r27, [__cudaparm_StabilizerWarpKernel_doCrop];
	mov.u32 	%r28, 0;
	setp.eq.s32 	%p2, %r27, %r28;
	@%p2 bra 	$Lt_19_24322;
	.loc	4	273	0
	ld.param.s32 	%r29, [__cudaparm_StabilizerWarpKernel_inOutputLeft];
	.loc	4	274	0
	ld.param.s32 	%r30, [__cudaparm_StabilizerWarpKernel_inOutputRight];
	.loc	4	275	0
	ld.param.s32 	%r31, [__cudaparm_StabilizerWarpKernel_inOutputTop];
	.loc	4	276	0
	ld.param.s32 	%r32, [__cudaparm_StabilizerWarpKernel_inOutputBottom];
	bra.uni 	$Lt_19_24066;
$Lt_19_24322:
	.loc	4	281	0
	ld.param.s32 	%r33, [__cudaparm_StabilizerWarpKernel_inOutputWidth];
	sub.s32 	%r30, %r33, 1;
	.loc	4	282	0
	ld.param.s32 	%r34, [__cudaparm_StabilizerWarpKernel_inOutputHeight];
	sub.s32 	%r32, %r34, 1;
	mov.s32 	%r29, 0;
	mov.s32 	%r31, 0;
$Lt_19_24066:
	.loc	4	285	0
	sub.ftz.f32 	%f9, %f1, %f3;
	add.ftz.f32 	%f10, %f9, %f7;
	sub.ftz.f32 	%f11, %f10, %f5;
	abs.ftz.f32 	%f12, %f11;
	cvt.ftz.f64.f32 	%fd1, %f12;
	mov.f64 	%fd2, 0d3ee4f8b588e368f1;	// 1e-005
	setp.lt.f64 	%p3, %fd1, %fd2;
	@%p3 bra 	$L_19_20482;
	sub.ftz.f32 	%f13, %f2, %f4;
	add.ftz.f32 	%f14, %f13, %f8;
	sub.ftz.f32 	%f15, %f14, %f6;
	abs.ftz.f32 	%f16, %f15;
	cvt.ftz.f64.f32 	%fd3, %f16;
	mov.f64 	%fd4, 0d3ee4f8b588e368f1;	// 1e-005
	setp.lt.f64 	%p4, %fd3, %fd4;
	@!%p4 bra 	$L_19_20226;
$L_19_20482:
	.loc	4	156	0
	sub.ftz.f32 	%f17, %f3, %f1;
	mov.f32 	%f18, %f17;
	.loc	4	157	0
	sub.ftz.f32 	%f19, %f7, %f3;
	.loc	4	159	0
	sub.ftz.f32 	%f20, %f4, %f2;
	mov.f32 	%f21, %f20;
	.loc	4	160	0
	sub.ftz.f32 	%f22, %f8, %f4;
	.loc	4	162	0
	mov.f32 	%f23, 0f00000000;    	// 0
	mov.f32 	%f24, 0f00000000;    	// 0
	bra.uni 	$L_19_19970;
$L_19_20226:
	.loc	4	172	0
	sub.ftz.f32 	%f25, %f6, %f8;
	sub.ftz.f32 	%f26, %f5, %f7;
	sub.ftz.f32 	%f27, %f4, %f8;
	sub.ftz.f32 	%f28, %f3, %f7;
	mul.ftz.f32 	%f29, %f27, %f26;
	mul.ftz.f32 	%f30, %f28, %f25;
	sub.ftz.f32 	%f31, %f30, %f29;
	mul.ftz.f32 	%f32, %f15, %f26;
	mul.ftz.f32 	%f33, %f11, %f25;
	sub.ftz.f32 	%f34, %f33, %f32;
	div.approx.ftz.f32 	%f35, %f34, %f31;
	mov.f32 	%f23, %f35;
	.loc	4	173	0
	mul.ftz.f32 	%f36, %f27, %f11;
	mul.ftz.f32 	%f37, %f15, %f28;
	sub.ftz.f32 	%f38, %f37, %f36;
	div.approx.ftz.f32 	%f39, %f38, %f31;
	mov.f32 	%f24, %f39;
	.loc	4	174	0
	sub.ftz.f32 	%f17, %f3, %f1;
	fma.rn.ftz.f32 	%f18, %f35, %f3, %f17;
	.loc	4	175	0
	sub.ftz.f32 	%f40, %f5, %f1;
	fma.rn.ftz.f32 	%f19, %f39, %f5, %f40;
	.loc	4	177	0
	sub.ftz.f32 	%f20, %f4, %f2;
	fma.rn.ftz.f32 	%f21, %f35, %f4, %f20;
	.loc	4	178	0
	sub.ftz.f32 	%f41, %f6, %f2;
	fma.rn.ftz.f32 	%f22, %f39, %f6, %f41;
$L_19_19970:
	.loc	4	292	0
	min.ftz.f32 	%f42, %f4, %f2;
	min.ftz.f32 	%f43, %f42, %f6;
	min.ftz.f32 	%f44, %f43, %f8;
	ld.param.f32 	%f45, [__cudaparm_StabilizerWarpKernel_inZoomFactor];
	mul.ftz.f32 	%f46, %f44, %f45;
	cvt.rzi.ftz.s32.f32 	%r35, %f46;
	sub.s32 	%r36, %r35, 1;
	mov.s32 	%r37, %r36;
	max.ftz.f32 	%f47, %f4, %f2;
	ld.param.f32 	%f48, [__cudaparm_StabilizerWarpKernel_inBoundingBox+4];
	ld.param.f32 	%f49, [__cudaparm_StabilizerWarpKernel_inBoundingBox+12];
	sub.ftz.f32 	%f50, %f49, %f48;
	max.ftz.f32 	%f51, %f47, %f6;
	cvt.rzi.ftz.s32.f32 	%r38, %f50;
	max.ftz.f32 	%f52, %f51, %f8;
	cvt.rn.f32.s32 	%f53, %r38;
	mul.ftz.f32 	%f54, %f52, %f45;
	mul.ftz.f32 	%f55, %f53, %f45;
	cvt.rzi.ftz.s32.f32 	%r39, %f54;
	cvt.rzi.ftz.s32.f32 	%r40, %f55;
	add.s32 	%r41, %r39, 1;
	min.s32 	%r42, %r40, %r41;
	setp.lt.s32 	%p5, %r42, %r36;
	@%p5 bra 	$Lt_19_24578;
	ld.param.f32 	%f56, [__cudaparm_StabilizerWarpKernel_inCropWindow+4];
	add.ftz.f32 	%f57, %f56, %f48;
	ld.param.f32 	%f58, [__cudaparm_StabilizerWarpKernel_inCropWindow+12];
	add.ftz.f32 	%f59, %f58, %f48;
	mul.ftz.f32 	%f60, %f57, %f45;
	mul.ftz.f32 	%f61, %f59, %f45;
	add.ftz.f32 	%f62, %f60, %f61;
	mov.f32 	%f63, 0f40000000;    	// 2
	div.approx.ftz.f32 	%f64, %f62, %f63;
	cvt.rzi.ftz.s32.f32 	%r43, %f64;
	cvt.rn.f32.s32 	%f65, %r43;
	sub.ftz.f32 	%f66, %f64, %f65;
	min.ftz.f32 	%f67, %f3, %f1;
	min.ftz.f32 	%f68, %f67, %f5;
	min.ftz.f32 	%f69, %f68, %f7;
	mul.ftz.f32 	%f70, %f69, %f45;
	cvt.rzi.ftz.s32.f32 	%r44, %f70;
	sub.s32 	%r45, %r44, 1;
	max.ftz.f32 	%f71, %f3, %f1;
	ld.param.f32 	%f72, [__cudaparm_StabilizerWarpKernel_inBoundingBox+0];
	ld.param.f32 	%f73, [__cudaparm_StabilizerWarpKernel_inBoundingBox+8];
	sub.ftz.f32 	%f74, %f73, %f72;
	max.ftz.f32 	%f75, %f71, %f5;
	cvt.rzi.ftz.s32.f32 	%r46, %f74;
	max.ftz.f32 	%f76, %f75, %f7;
	cvt.rn.f32.s32 	%f77, %r46;
	mul.ftz.f32 	%f78, %f76, %f45;
	mul.ftz.f32 	%f79, %f77, %f45;
	cvt.rzi.ftz.s32.f32 	%r47, %f78;
	cvt.rzi.ftz.s32.f32 	%r48, %f79;
	add.s32 	%r49, %r47, 1;
	min.s32 	%r50, %r48, %r49;
	ld.param.s32 	%r51, [__cudaparm_StabilizerWarpKernel_inOutputWidth];
	shr.s32 	%r52, %r51, 31;
	mov.s32 	%r53, 1;
	and.b32 	%r54, %r52, %r53;
	add.s32 	%r55, %r54, %r51;
	shr.s32 	%r56, %r55, 1;
	ld.param.f32 	%f80, [__cudaparm_StabilizerWarpKernel_inCropWindow+0];
	add.ftz.f32 	%f81, %f80, %f72;
	ld.param.f32 	%f82, [__cudaparm_StabilizerWarpKernel_inCropWindow+8];
	add.ftz.f32 	%f83, %f82, %f72;
	mul.ftz.f32 	%f84, %f81, %f45;
	mul.ftz.f32 	%f85, %f83, %f45;
	add.ftz.f32 	%f86, %f84, %f85;
	mov.f32 	%f87, 0f40000000;    	// 2
	div.approx.ftz.f32 	%f88, %f86, %f87;
	cvt.rzi.ftz.s32.f32 	%r57, %f88;
	cvt.rn.f32.s32 	%f89, %r57;
	sub.ftz.f32 	%f90, %f88, %f89;
	mul.ftz.f32 	%f91, %f19, %f21;
	mul.ftz.f32 	%f92, %f18, %f22;
	sub.ftz.f32 	%f93, %f92, %f91;
	mul.ftz.f32 	%f94, %f18, %f24;
	mul.ftz.f32 	%f95, %f19, %f23;
	sub.ftz.f32 	%f96, %f95, %f94;
	mul.ftz.f32 	%f97, %f22, %f23;
	mul.ftz.f32 	%f98, %f21, %f24;
	sub.ftz.f32 	%f99, %f98, %f97;
	mul.ftz.f32 	%f100, %f22, %f1;
	mul.ftz.f32 	%f101, %f19, %f2;
	sub.ftz.f32 	%f102, %f101, %f100;
	mul.ftz.f32 	%f103, %f1, %f24;
	sub.ftz.f32 	%f104, %f103, %f19;
	mul.ftz.f32 	%f105, %f2, %f24;
	sub.ftz.f32 	%f106, %f22, %f105;
	mul.ftz.f32 	%f107, %f18, %f2;
	mul.ftz.f32 	%f108, %f1, %f21;
	sub.ftz.f32 	%f109, %f108, %f107;
	mul.ftz.f32 	%f110, %f1, %f23;
	sub.ftz.f32 	%f111, %f18, %f110;
	mul.ftz.f32 	%f112, %f2, %f23;
	sub.ftz.f32 	%f113, %f112, %f21;
	ld.param.s32 	%r58, [__cudaparm_StabilizerWarpKernel_inInputWidth];
	sub.s32 	%r59, %r58, 1;
	cvt.rn.f32.s32 	%f114, %r2;
	cvt.rn.f32.s32 	%f115, %r59;
	div.approx.ftz.f32 	%f116, %f115, %f114;
	ld.param.s32 	%r60, [__cudaparm_StabilizerWarpKernel_inInputHeight];
	sub.s32 	%r61, %r60, 1;
	cvt.rn.f32.s32 	%f117, %r7;
	cvt.rn.f32.s32 	%f118, %r61;
	div.approx.ftz.f32 	%f119, %f118, %f117;
	sub.s32 	%r62, %r42, %r36;
	add.s32 	%r63, %r62, 1;
	ld.param.s32 	%r64, [__cudaparm_StabilizerWarpKernel_inOutputHeight];
	shr.s32 	%r65, %r64, 31;
	mov.s32 	%r66, 1;
	and.b32 	%r67, %r65, %r66;
	add.s32 	%r68, %r67, %r64;
	shr.s32 	%r69, %r68, 1;
	add.s32 	%r70, %r42, 1;
	mov.s32 	%r71, %r63;
$Lt_19_25090:
 //<loop> Loop body line 292, nesting depth: 1, estimated iterations: unknown
	.loc	4	295	0
	add.s32 	%r72, %r69, %r37;
	sub.s32 	%r73, %r72, %r43;
	set.gt.u32.s32 	%r74, %r73, %r32;
	neg.s32 	%r75, %r74;
	set.lt.u32.s32 	%r76, %r73, %r31;
	neg.s32 	%r77, %r76;
	or.b32 	%r78, %r75, %r77;
	mov.u32 	%r79, 0;
	setp.ne.s32 	%p6, %r78, %r79;
	@%p6 bra 	$Lt_19_25346;
 //<loop> Part of loop body line 292, head labeled $Lt_19_25090
	.loc	23	529	0
	cvt.rn.f32.s32 	%f120, %r37;
	add.ftz.f32 	%f121, %f120, %f66;
	div.approx.ftz.f32 	%f122, %f121, %f45;
	.loc	4	299	0
	mov.s32 	%r80, %r45;
	setp.lt.s32 	%p7, %r50, %r45;
	@%p7 bra 	$Lt_19_25346;
 //<loop> Part of loop body line 292, head labeled $Lt_19_25090
	sub.s32 	%r81, %r50, %r45;
	add.s32 	%r82, %r81, 1;
	add.s32 	%r83, %r50, 1;
 //<loop> Part of loop body line 292, head labeled $Lt_19_25090
	mov.s32 	%r84, %r82;
$Lt_19_25858:
 //<loop> Loop body line 299, nesting depth: 2, estimated iterations: unknown
	.loc	4	304	0
	add.s32 	%r85, %r56, %r80;
	sub.s32 	%r86, %r85, %r57;
	set.gt.u32.s32 	%r87, %r86, %r30;
	neg.s32 	%r88, %r87;
	set.lt.u32.s32 	%r89, %r86, %r29;
	neg.s32 	%r90, %r89;
	or.b32 	%r91, %r88, %r90;
	mov.u32 	%r92, 0;
	setp.ne.s32 	%p8, %r91, %r92;
	@%p8 bra 	$Lt_19_32770;
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	23	529	0
	cvt.rn.f32.s32 	%f123, %r80;
	add.ftz.f32 	%f124, %f123, %f90;
	div.approx.ftz.f32 	%f125, %f124, %f45;
	.loc	4	80	0
	setp.eq.ftz.f32 	%p9, %f4, %f2;
	@!%p9 bra 	$Lt_19_26114;
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	91	0
	mov.s32 	%r93, 0;
	bra.uni 	$LDWendi_fdividef_196_9;
$Lt_19_26114:
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	93	0
	sub.ftz.f32 	%f126, %f122, %f2;
	div.approx.ftz.f32 	%f127, %f126, %f20;
	mov.f32 	%f128, 0f00000000;   	// 0
	set.ge.ftz.u32.f32 	%r94, %f127, %f128;
	neg.s32 	%r95, %r94;
	mov.f32 	%f129, 0f3f800000;   	// 1
	set.lt.ftz.u32.f32 	%r96, %f127, %f129;
	neg.s32 	%r97, %r96;
	and.b32 	%r98, %r95, %r97;
	mov.u32 	%r99, 0;
	setp.eq.s32 	%p10, %r98, %r99;
	@%p10 bra 	$Lt_19_30466;
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	fma.rn.ftz.f32 	%f130, %f127, %f17, %f1;
	setp.le.ftz.f32 	%p11, %f125, %f130;
	@!%p11 bra 	$Lt_19_30466;
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	98	0
	mov.s32 	%r93, 1;
	bra.uni 	$LDWendi_fdividef_196_9;
$Lt_19_30466:
$L_19_20738:
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	101	0
	mov.s32 	%r93, 0;
$LDWendi_fdividef_196_9:
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	81	0
	setp.eq.ftz.f32 	%p12, %f8, %f4;
	@!%p12 bra 	$Lt_19_26626;
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	91	0
	mov.s32 	%r100, 0;
	bra.uni 	$LDWendi_fdividef_196_7;
$Lt_19_26626:
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	93	0
	sub.ftz.f32 	%f131, %f8, %f4;
	sub.ftz.f32 	%f132, %f122, %f4;
	div.approx.ftz.f32 	%f133, %f132, %f131;
	mov.f32 	%f134, 0f00000000;   	// 0
	set.ge.ftz.u32.f32 	%r101, %f133, %f134;
	neg.s32 	%r102, %r101;
	mov.f32 	%f135, 0f3f800000;   	// 1
	set.lt.ftz.u32.f32 	%r103, %f133, %f135;
	neg.s32 	%r104, %r103;
	and.b32 	%r105, %r102, %r104;
	mov.u32 	%r106, 0;
	setp.eq.s32 	%p13, %r105, %r106;
	@%p13 bra 	$Lt_19_30978;
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	sub.ftz.f32 	%f136, %f7, %f3;
	fma.rn.ftz.f32 	%f137, %f133, %f136, %f3;
	setp.le.ftz.f32 	%p14, %f125, %f137;
	@!%p14 bra 	$Lt_19_30978;
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	98	0
	mov.s32 	%r100, 1;
	bra.uni 	$LDWendi_fdividef_196_7;
$Lt_19_30978:
$L_19_21250:
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	101	0
	mov.s32 	%r100, 0;
$LDWendi_fdividef_196_7:
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	82	0
	setp.eq.ftz.f32 	%p15, %f8, %f6;
	@!%p15 bra 	$Lt_19_27138;
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	91	0
	mov.s32 	%r107, 0;
	bra.uni 	$LDWendi_fdividef_196_5;
$Lt_19_27138:
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	93	0
	sub.ftz.f32 	%f138, %f122, %f8;
	sub.ftz.f32 	%f139, %f6, %f8;
	div.approx.ftz.f32 	%f140, %f138, %f139;
	mov.f32 	%f141, 0f00000000;   	// 0
	set.ge.ftz.u32.f32 	%r108, %f140, %f141;
	neg.s32 	%r109, %r108;
	mov.f32 	%f142, 0f3f800000;   	// 1
	set.lt.ftz.u32.f32 	%r110, %f140, %f142;
	neg.s32 	%r111, %r110;
	and.b32 	%r112, %r109, %r111;
	mov.u32 	%r113, 0;
	setp.eq.s32 	%p16, %r112, %r113;
	@%p16 bra 	$Lt_19_31490;
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	sub.ftz.f32 	%f143, %f5, %f7;
	fma.rn.ftz.f32 	%f144, %f140, %f143, %f7;
	setp.le.ftz.f32 	%p17, %f125, %f144;
	@!%p17 bra 	$Lt_19_31490;
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	98	0
	mov.s32 	%r107, 1;
	bra.uni 	$LDWendi_fdividef_196_5;
$Lt_19_31490:
$L_19_21762:
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	101	0
	mov.s32 	%r107, 0;
$LDWendi_fdividef_196_5:
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	83	0
	setp.eq.ftz.f32 	%p18, %f6, %f2;
	@!%p18 bra 	$Lt_19_27650;
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	91	0
	mov.s32 	%r114, 0;
	bra.uni 	$LDWendi_fdividef_196_3;
$Lt_19_27650:
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	93	0
	sub.ftz.f32 	%f145, %f122, %f6;
	sub.ftz.f32 	%f146, %f2, %f6;
	div.approx.ftz.f32 	%f147, %f145, %f146;
	mov.f32 	%f148, 0f00000000;   	// 0
	set.ge.ftz.u32.f32 	%r115, %f147, %f148;
	neg.s32 	%r116, %r115;
	mov.f32 	%f149, 0f3f800000;   	// 1
	set.lt.ftz.u32.f32 	%r117, %f147, %f149;
	neg.s32 	%r118, %r117;
	and.b32 	%r119, %r116, %r118;
	mov.u32 	%r120, 0;
	setp.eq.s32 	%p19, %r119, %r120;
	@%p19 bra 	$Lt_19_32002;
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	sub.ftz.f32 	%f150, %f1, %f5;
	fma.rn.ftz.f32 	%f151, %f147, %f150, %f5;
	setp.le.ftz.f32 	%p20, %f125, %f151;
	@!%p20 bra 	$Lt_19_32002;
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	98	0
	mov.s32 	%r114, 1;
	bra.uni 	$LDWendi_fdividef_196_3;
$Lt_19_32002:
$L_19_22274:
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	101	0
	mov.s32 	%r114, 0;
$LDWendi_fdividef_196_3:
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	83	0
	add.s32 	%r121, %r114, %r107;
	add.s32 	%r122, %r100, %r93;
	add.s32 	%r123, %r121, %r122;
	shr.s32 	%r124, %r123, 31;
	mov.s32 	%r125, 1;
	and.b32 	%r126, %r124, %r125;
	add.s32 	%r127, %r126, %r123;
	shr.s32 	%r128, %r127, 1;
	mul.lo.s32 	%r129, %r128, 2;
	sub.s32 	%r130, %r123, %r129;
	mov.u32 	%r131, 1;
	setp.ne.s32 	%p21, %r130, %r131;
	@%p21 bra 	$Lt_19_32770;
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	cvt.rn.f32.s32 	%f152, %r51;
	setp.gt.ftz.f32 	%p22, %f152, %f125;
	@!%p22 bra 	$Lt_19_32770;
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	cvt.rn.f32.s32 	%f153, %r64;
	setp.gt.ftz.f32 	%p23, %f153, %f122;
	@!%p23 bra 	$Lt_19_32770;
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	317	0
	mul.ftz.f32 	%f154, %f122, %f96;
	fma.rn.ftz.f32 	%f155, %f99, %f125, %f154;
	add.ftz.f32 	%f156, %f93, %f155;
	cvt.rn.f32.s32 	%f157, %r58;
	mov.f32 	%f158, 0fbf800347;   	// -1.0001
	add.ftz.f32 	%f159, %f157, %f158;
	cvt.rn.f32.s32 	%f160, %r11;
	mul.ftz.f32 	%f161, %f122, %f104;
	fma.rn.ftz.f32 	%f162, %f106, %f125, %f161;
	add.ftz.f32 	%f163, %f102, %f162;
	div.approx.ftz.f32 	%f164, %f163, %f156;
	add.ftz.f32 	%f165, %f160, %f164;
	mul.ftz.f32 	%f166, %f116, %f165;
	mov.f32 	%f167, 0f38d1b717;   	// 0.0001
	max.ftz.f32 	%f168, %f166, %f167;
	min.ftz.f32 	%f169, %f159, %f168;
	mov.f32 	%f170, %f169;
	cvt.rn.f32.s32 	%f171, %r60;
	mov.f32 	%f172, 0fbf800347;   	// -1.0001
	add.ftz.f32 	%f173, %f171, %f172;
	cvt.rn.f32.s32 	%f174, %r13;
	mul.ftz.f32 	%f175, %f122, %f111;
	fma.rn.ftz.f32 	%f176, %f113, %f125, %f175;
	add.ftz.f32 	%f177, %f109, %f176;
	div.approx.ftz.f32 	%f178, %f177, %f156;
	add.ftz.f32 	%f179, %f174, %f178;
	mul.ftz.f32 	%f180, %f119, %f179;
	mov.f32 	%f181, 0f38d1b717;   	// 0.0001
	max.ftz.f32 	%f182, %f180, %f181;
	min.ftz.f32 	%f183, %f173, %f182;
	mov.f32 	%f184, %f183;
	mov.f32 	%f185, 0f00000000;   	// 0
	mov.f32 	%f186, %f185;
	mov.f32 	%f187, 0f00000000;   	// 0
	mov.f32 	%f188, %f187;
	tex.2d.v4.f32.f32 {%f189,%f190,%f191,%f192},[inputTex,{%f170,%f184,%f186,%f188}];
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	328	0
	mov.f32 	%f193, %f189;
	mov.f32 	%f194, %f190;
	mov.f32 	%f195, %f191;
	mov.f32 	%f196, %f192;
	ld.param.s32 	%r132, [__cudaparm_StabilizerWarpKernel_inOutputPitch];
	mul.lo.s32 	%r133, %r132, %r73;
	add.s32 	%r134, %r86, %r133;
	cvt.s64.s32 	%rd8, %r134;
	ld.param.u64 	%rd9, [__cudaparm_StabilizerWarpKernel_inOutput];
	ld.param.s32 	%r135, [__cudaparm_StabilizerWarpKernel_inDeviceFormat];
	mov.u32 	%r136, 0;
	setp.ne.s32 	%p24, %r135, %r136;
	@%p24 bra 	$Lt_19_28418;
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	19	126	0
	mul.lo.u64 	%rd10, %rd8, 8;
	add.u64 	%rd11, %rd9, %rd10;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f193;
	mov.b32		%r137, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f194;
	mov.b32		%r138, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f195;
	mov.b32		%r139, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f196;
	mov.b32		%r140, %b1; }
	st.global.v4.u16 	[%rd11+0], {%r137,%r138,%r139,%r140};
	.loc	4	331	0
	bra.uni 	$Lt_19_32770;
$Lt_19_28418:
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	19	126	0
	mul.lo.u64 	%rd12, %rd8, 16;
	add.u64 	%rd13, %rd9, %rd12;
	st.global.v4.f32 	[%rd13+0], {%f193,%f194,%f195,%f196};
$Lt_19_32770:
$L_19_22786:
$Lt_19_514:
 //<loop> Part of loop body line 299, head labeled $Lt_19_25858
	.loc	4	340	0
	add.s32 	%r80, %r80, 1;
	setp.ne.s32 	%p25, %r83, %r80;
	@%p25 bra 	$Lt_19_25858;
$Lt_19_25346:
$Lt_19_258:
 //<loop> Part of loop body line 292, head labeled $Lt_19_25090
	.loc	4	341	0
	add.s32 	%r37, %r37, 1;
	setp.ne.s32 	%p26, %r70, %r37;
	@%p26 bra 	$Lt_19_25090;
$Lt_19_24578:
$Lt_19_23554:
	.loc	4	343	0
	exit;
$LDWend_StabilizerWarpKernel:
	} // StabilizerWarpKernel

	.visible .func _ZNK4Quad11boundingBoxERfS0_S0_S0_ (.param .u64 __cudaparmf1__ZNK4Quad11boundingBoxERfS0_S0_S0_, .param .u64 __cudaparmf2__ZNK4Quad11boundingBoxERfS0_S0_S0_, .param .u64 __cudaparmf3__ZNK4Quad11boundingBoxERfS0_S0_S0_, .param .u64 __cudaparmf4__ZNK4Quad11boundingBoxERfS0_S0_S0_, .param .u64 __cudaparmf5__ZNK4Quad11boundingBoxERfS0_S0_S0_)
	{
	.reg .u64 %rd<12>;
	.reg .f32 %f<40>;
	.loc	4	64	0
$LDWbegin__ZNK4Quad11boundingBoxERfS0_S0_S0_:
	ld.param.u64 	%rd1, [__cudaparmf1__ZNK4Quad11boundingBoxERfS0_S0_S0_];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf2__ZNK4Quad11boundingBoxERfS0_S0_S0_];
	mov.s64 	%rd4, %rd3;
	ld.param.u64 	%rd5, [__cudaparmf3__ZNK4Quad11boundingBoxERfS0_S0_S0_];
	mov.s64 	%rd6, %rd5;
	ld.param.u64 	%rd7, [__cudaparmf4__ZNK4Quad11boundingBoxERfS0_S0_S0_];
	mov.s64 	%rd8, %rd7;
	ld.param.u64 	%rd9, [__cudaparmf5__ZNK4Quad11boundingBoxERfS0_S0_S0_];
	mov.s64 	%rd10, %rd9;
	ld.v2.f32 	{%f1,%f2}, [%rd2+0];
	.loc	4	65	0
	st.f32 	[%rd6+0], %f1;
	st.f32 	[%rd4+0], %f1;
	.loc	4	66	0
	st.f32 	[%rd10+0], %f2;
	st.f32 	[%rd8+0], %f2;
	.loc	4	69	0
	ld.f32 	%f3, [%rd4+0];
	ld.v2.f32 	{%f4,%f5}, [%rd2+8];
	min.ftz.f32 	%f6, %f3, %f4;
	st.f32 	[%rd4+0], %f6;
	.loc	4	70	0
	ld.f32 	%f7, [%rd6+0];
	ld.v2.f32 	{%f8,%f9}, [%rd2+8];
	max.ftz.f32 	%f10, %f7, %f8;
	st.f32 	[%rd6+0], %f10;
	.loc	4	71	0
	ld.f32 	%f11, [%rd8+0];
	min.ftz.f32 	%f12, %f11, %f5;
	st.f32 	[%rd8+0], %f12;
	.loc	4	72	0
	ld.f32 	%f13, [%rd10+0];
	max.ftz.f32 	%f14, %f13, %f9;
	st.f32 	[%rd10+0], %f14;
	.loc	4	69	0
	ld.f32 	%f15, [%rd4+0];
	ld.v2.f32 	{%f16,%f17}, [%rd2+16];
	min.ftz.f32 	%f18, %f15, %f16;
	st.f32 	[%rd4+0], %f18;
	.loc	4	70	0
	ld.f32 	%f19, [%rd6+0];
	ld.v2.f32 	{%f20,%f21}, [%rd2+16];
	max.ftz.f32 	%f22, %f19, %f20;
	st.f32 	[%rd6+0], %f22;
	.loc	4	71	0
	ld.f32 	%f23, [%rd8+0];
	min.ftz.f32 	%f24, %f23, %f17;
	st.f32 	[%rd8+0], %f24;
	.loc	4	72	0
	ld.f32 	%f25, [%rd10+0];
	max.ftz.f32 	%f26, %f25, %f21;
	st.f32 	[%rd10+0], %f26;
	.loc	4	69	0
	ld.f32 	%f27, [%rd4+0];
	ld.v2.f32 	{%f28,%f29}, [%rd2+24];
	min.ftz.f32 	%f30, %f27, %f28;
	st.f32 	[%rd4+0], %f30;
	.loc	4	70	0
	ld.f32 	%f31, [%rd6+0];
	ld.v2.f32 	{%f32,%f33}, [%rd2+24];
	max.ftz.f32 	%f34, %f31, %f32;
	st.f32 	[%rd6+0], %f34;
	.loc	4	71	0
	ld.f32 	%f35, [%rd8+0];
	min.ftz.f32 	%f36, %f35, %f29;
	st.f32 	[%rd8+0], %f36;
	.loc	4	72	0
	ld.f32 	%f37, [%rd10+0];
	max.ftz.f32 	%f38, %f37, %f33;
	st.f32 	[%rd10+0], %f38;
	.loc	4	74	0
	ret;
$LDWend__ZNK4Quad11boundingBoxERfS0_S0_S0_:
	} // _ZNK4Quad11boundingBoxERfS0_S0_S0_

	.visible .func (.param .s32 __cudaretf__ZNK4Quad13containsPointERK6float2) _ZNK4Quad13containsPointERK6float2 (.param .u64 __cudaparmf1__ZNK4Quad13containsPointERK6float2, .param .u64 __cudaparmf2__ZNK4Quad13containsPointERK6float2)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<6>;
	.reg .f32 %f<41>;
	.reg .pred %p<14>;
	.loc	4	78	0
$LDWbegin__ZNK4Quad13containsPointERK6float2:
	ld.param.u64 	%rd1, [__cudaparmf1__ZNK4Quad13containsPointERK6float2];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf2__ZNK4Quad13containsPointERK6float2];
	mov.s64 	%rd4, %rd3;
	.loc	4	80	0
	ld.f32 	%f1, [%rd2+4];
	ld.f32 	%f2, [%rd2+12];
	setp.eq.ftz.f32 	%p1, %f1, %f2;
	@!%p1 bra 	$Lt_21_12546;
	.loc	4	91	0
	mov.s32 	%r1, 0;
	bra.uni 	$LDWendi_fdividef_198_7;
$Lt_21_12546:
	.loc	4	93	0
	ld.f32 	%f3, [%rd4+4];
	sub.ftz.f32 	%f4, %f3, %f1;
	sub.ftz.f32 	%f5, %f2, %f1;
	div.approx.ftz.f32 	%f6, %f4, %f5;
	mov.f32 	%f7, 0f00000000;     	// 0
	set.ge.ftz.u32.f32 	%r2, %f6, %f7;
	neg.s32 	%r3, %r2;
	mov.f32 	%f8, 0f3f800000;     	// 1
	set.lt.ftz.u32.f32 	%r4, %f6, %f8;
	neg.s32 	%r5, %r4;
	and.b32 	%r6, %r3, %r5;
	mov.u32 	%r7, 0;
	setp.eq.s32 	%p2, %r6, %r7;
	@%p2 bra 	$Lt_21_14594;
	ld.f32 	%f9, [%rd2+0];
	ld.f32 	%f10, [%rd4+0];
	ld.f32 	%f11, [%rd2+8];
	sub.ftz.f32 	%f12, %f11, %f9;
	fma.rn.ftz.f32 	%f13, %f6, %f12, %f9;
	setp.le.ftz.f32 	%p3, %f10, %f13;
	@!%p3 bra 	$L_21_10498;
	.loc	4	98	0
	mov.s32 	%r1, 1;
	bra.uni 	$LDWendi_fdividef_198_7;
$Lt_21_14594:
$L_21_10498:
	.loc	4	101	0
	mov.s32 	%r1, 0;
$LDWendi_fdividef_198_7:
	.loc	4	81	0
	ld.f32 	%f14, [%rd2+28];
	setp.eq.ftz.f32 	%p4, %f2, %f14;
	@!%p4 bra 	$Lt_21_13058;
	.loc	4	91	0
	mov.s32 	%r8, 0;
	bra.uni 	$LDWendi_fdividef_198_5;
$Lt_21_13058:
	.loc	4	93	0
	ld.f32 	%f15, [%rd4+4];
	sub.ftz.f32 	%f4, %f15, %f2;
	sub.ftz.f32 	%f5, %f14, %f2;
	div.approx.ftz.f32 	%f6, %f4, %f5;
	mov.f32 	%f16, 0f00000000;    	// 0
	set.ge.ftz.u32.f32 	%r9, %f6, %f16;
	neg.s32 	%r10, %r9;
	mov.f32 	%f17, 0f3f800000;    	// 1
	set.lt.ftz.u32.f32 	%r11, %f6, %f17;
	neg.s32 	%r12, %r11;
	and.b32 	%r13, %r10, %r12;
	mov.u32 	%r14, 0;
	setp.eq.s32 	%p5, %r13, %r14;
	@%p5 bra 	$Lt_21_15106;
	ld.f32 	%f18, [%rd2+8];
	ld.f32 	%f19, [%rd4+0];
	ld.f32 	%f20, [%rd2+24];
	sub.ftz.f32 	%f21, %f20, %f18;
	fma.rn.ftz.f32 	%f22, %f6, %f21, %f18;
	setp.le.ftz.f32 	%p6, %f19, %f22;
	@!%p6 bra 	$L_21_11010;
	.loc	4	98	0
	mov.s32 	%r8, 1;
	bra.uni 	$LDWendi_fdividef_198_5;
$Lt_21_15106:
$L_21_11010:
	.loc	4	101	0
	mov.s32 	%r8, 0;
$LDWendi_fdividef_198_5:
	.loc	4	82	0
	ld.f32 	%f23, [%rd2+20];
	setp.eq.ftz.f32 	%p7, %f14, %f23;
	@!%p7 bra 	$Lt_21_13570;
	.loc	4	91	0
	mov.s32 	%r15, 0;
	bra.uni 	$LDWendi_fdividef_198_3;
$Lt_21_13570:
	.loc	4	93	0
	ld.f32 	%f24, [%rd4+4];
	sub.ftz.f32 	%f4, %f24, %f14;
	sub.ftz.f32 	%f5, %f23, %f14;
	div.approx.ftz.f32 	%f6, %f4, %f5;
	mov.f32 	%f25, 0f00000000;    	// 0
	set.ge.ftz.u32.f32 	%r16, %f6, %f25;
	neg.s32 	%r17, %r16;
	mov.f32 	%f26, 0f3f800000;    	// 1
	set.lt.ftz.u32.f32 	%r18, %f6, %f26;
	neg.s32 	%r19, %r18;
	and.b32 	%r20, %r17, %r19;
	mov.u32 	%r21, 0;
	setp.eq.s32 	%p8, %r20, %r21;
	@%p8 bra 	$Lt_21_15618;
	ld.f32 	%f27, [%rd2+24];
	ld.f32 	%f28, [%rd4+0];
	ld.f32 	%f29, [%rd2+16];
	sub.ftz.f32 	%f30, %f29, %f27;
	fma.rn.ftz.f32 	%f31, %f6, %f30, %f27;
	setp.le.ftz.f32 	%p9, %f28, %f31;
	@!%p9 bra 	$L_21_11522;
	.loc	4	98	0
	mov.s32 	%r15, 1;
	bra.uni 	$LDWendi_fdividef_198_3;
$Lt_21_15618:
$L_21_11522:
	.loc	4	101	0
	mov.s32 	%r15, 0;
$LDWendi_fdividef_198_3:
	.loc	4	83	0
	setp.eq.ftz.f32 	%p10, %f1, %f23;
	@!%p10 bra 	$Lt_21_14082;
	.loc	4	91	0
	mov.s32 	%r22, 0;
	bra.uni 	$LDWendi_fdividef_198_1;
$Lt_21_14082:
	.loc	4	93	0
	ld.f32 	%f32, [%rd4+4];
	sub.ftz.f32 	%f4, %f32, %f23;
	sub.ftz.f32 	%f5, %f1, %f23;
	div.approx.ftz.f32 	%f6, %f4, %f5;
	mov.f32 	%f33, 0f00000000;    	// 0
	set.ge.ftz.u32.f32 	%r23, %f6, %f33;
	neg.s32 	%r24, %r23;
	mov.f32 	%f34, 0f3f800000;    	// 1
	set.lt.ftz.u32.f32 	%r25, %f6, %f34;
	neg.s32 	%r26, %r25;
	and.b32 	%r27, %r24, %r26;
	mov.u32 	%r28, 0;
	setp.eq.s32 	%p11, %r27, %r28;
	@%p11 bra 	$Lt_21_16130;
	ld.f32 	%f35, [%rd2+16];
	ld.f32 	%f36, [%rd4+0];
	ld.f32 	%f37, [%rd2+0];
	sub.ftz.f32 	%f38, %f37, %f35;
	fma.rn.ftz.f32 	%f39, %f6, %f38, %f35;
	setp.le.ftz.f32 	%p12, %f36, %f39;
	@!%p12 bra 	$L_21_12034;
	.loc	4	98	0
	mov.s32 	%r22, 1;
	bra.uni 	$LDWendi_fdividef_198_1;
$Lt_21_16130:
$L_21_12034:
	.loc	4	101	0
	mov.s32 	%r22, 0;
$LDWendi_fdividef_198_1:
	.loc	4	85	0
	add.s32 	%r29, %r22, %r15;
	add.s32 	%r30, %r8, %r1;
	add.s32 	%r31, %r29, %r30;
	shr.s32 	%r32, %r31, 31;
	mov.s32 	%r33, 1;
	and.b32 	%r34, %r32, %r33;
	add.s32 	%r35, %r34, %r31;
	shr.s32 	%r36, %r35, 1;
	mul.lo.s32 	%r37, %r36, 2;
	sub.s32 	%r38, %r31, %r37;
	mov.s32 	%r39, 1;
	set.eq.u32.s32 	%r40, %r38, %r39;
	neg.s32 	%r41, %r40;
	st.param.s32 	[__cudaretf__ZNK4Quad13containsPointERK6float2], %r41;
	ret;
$LDWend__ZNK4Quad13containsPointERK6float2:
	} // _ZNK4Quad13containsPointERK6float2

	.visible .func (.param .s32 __cudaretf__ZNK4Quad18countIntersectionsERK6float2S2_S2_) _ZNK4Quad18countIntersectionsERK6float2S2_S2_ (.param .u64 __cudaparmf1__ZNK4Quad18countIntersectionsERK6float2S2_S2_, .param .u64 __cudaparmf2__ZNK4Quad18countIntersectionsERK6float2S2_S2_, .param .u64 __cudaparmf3__ZNK4Quad18countIntersectionsERK6float2S2_S2_, .param .u64 __cudaparmf4__ZNK4Quad18countIntersectionsERK6float2S2_S2_)
	{
	.reg .u32 %r<10>;
	.reg .u64 %rd<8>;
	.reg .f32 %f<15>;
	.reg .pred %p<5>;
	.loc	4	90	0
$LDWbegin__ZNK4Quad18countIntersectionsERK6float2S2_S2_:
	ld.param.u64 	%rd1, [__cudaparmf2__ZNK4Quad18countIntersectionsERK6float2S2_S2_];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf3__ZNK4Quad18countIntersectionsERK6float2S2_S2_];
	mov.s64 	%rd4, %rd3;
	ld.param.u64 	%rd5, [__cudaparmf4__ZNK4Quad18countIntersectionsERK6float2S2_S2_];
	mov.s64 	%rd6, %rd5;
	ld.f32 	%f1, [%rd4+4];
	ld.f32 	%f2, [%rd6+4];
	setp.eq.ftz.f32 	%p1, %f1, %f2;
	@!%p1 bra 	$Lt_22_2818;
	.loc	4	91	0
	mov.s32 	%r1, 0;
	bra.uni 	$LBB7__ZNK4Quad18countIntersectionsERK6float2S2_S2_;
$Lt_22_2818:
	.loc	4	93	0
	ld.f32 	%f3, [%rd2+4];
	sub.ftz.f32 	%f4, %f3, %f1;
	sub.ftz.f32 	%f5, %f2, %f1;
	div.approx.ftz.f32 	%f6, %f4, %f5;
	mov.f32 	%f7, 0f00000000;     	// 0
	set.ge.ftz.u32.f32 	%r2, %f6, %f7;
	neg.s32 	%r3, %r2;
	mov.f32 	%f8, 0f3f800000;     	// 1
	set.lt.ftz.u32.f32 	%r4, %f6, %f8;
	neg.s32 	%r5, %r4;
	and.b32 	%r6, %r3, %r5;
	mov.u32 	%r7, 0;
	setp.eq.s32 	%p2, %r6, %r7;
	@%p2 bra 	$Lt_22_3330;
	ld.f32 	%f9, [%rd4+0];
	ld.f32 	%f10, [%rd2+0];
	ld.f32 	%f11, [%rd6+0];
	sub.ftz.f32 	%f12, %f11, %f9;
	fma.rn.ftz.f32 	%f13, %f6, %f12, %f9;
	setp.le.ftz.f32 	%p3, %f10, %f13;
	@!%p3 bra 	$L_22_2306;
	.loc	4	98	0
	mov.s32 	%r1, 1;
	bra.uni 	$LBB7__ZNK4Quad18countIntersectionsERK6float2S2_S2_;
$Lt_22_3330:
$L_22_2306:
	.loc	4	101	0
	mov.s32 	%r1, 0;
$LBB7__ZNK4Quad18countIntersectionsERK6float2S2_S2_:
	mov.s32 	%r8, %r1;
	st.param.s32 	[__cudaretf__ZNK4Quad18countIntersectionsERK6float2S2_S2_], %r8;
	ret;
$LDWend__ZNK4Quad18countIntersectionsERK6float2S2_S2_:
	} // _ZNK4Quad18countIntersectionsERK6float2S2_S2_

