	.version 2.2
	.target sm_20
	// compiled with ../../../External/3rdParty/NVIDIA/CUDA/win/bin/../open64/lib//be.exe
	// nvopencc 3.2 built on 2010-11-04

	.visible .func (.param .s32 __cudaretf__Z15IntegerMultiplyii) _Z15IntegerMultiplyii (.param .s32 __cudaparmf1__Z15IntegerMultiplyii, .param .s32 __cudaparmf2__Z15IntegerMultiplyii)

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelXv) _Z17Standard2DKernelXv ()

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelYv) _Z17Standard2DKernelYv ()

	.visible .func (.param .align 16 .b8 __cudaretf__Z13Half4ToFloat47ushort4[16]) _Z13Half4ToFloat47ushort4 (.param .align 8 .b8 __cudaparmf1__Z13Half4ToFloat47ushort4[8])

	.visible .func (.param .align 8 .b8 __cudaretf__Z13Float4ToHalf46float4[8]) _Z13Float4ToHalf46float4 (.param .align 16 .b8 __cudaparmf1__Z13Float4ToHalf46float4[16])

	.visible .func (.param .u32 __cudaretf__Z4Mix3RjS_S_) _Z4Mix3RjS_S_ (.param .u64 __cudaparmf1__Z4Mix3RjS_S_, .param .u64 __cudaparmf2__Z4Mix3RjS_S_, .param .u64 __cudaparmf3__Z4Mix3RjS_S_)

	.visible .func (.param .s32 __cudaretf__Z4Randj) _Z4Randj (.param .u32 __cudaparmf1__Z4Randj)

	.visible .func (.param .s32 __cudaretf__Z6Rand2Djjj) _Z6Rand2Djjj (.param .u32 __cudaparmf1__Z6Rand2Djjj, .param .u32 __cudaparmf2__Z6Rand2Djjj, .param .u32 __cudaparmf3__Z6Rand2Djjj)

	.visible .func (.param .s32 __cudaretf__Z6Rand2Dj) _Z6Rand2Dj (.param .u32 __cudaparmf1__Z6Rand2Dj)

	.visible .func (.param .f32 __cudaretf__Z9distanceX6float36float2) _Z9distanceX6float36float2 (.param .align 4 .b8 __cudaparmf1__Z9distanceX6float36float2[12], .param .align 8 .b8 __cudaparmf2__Z9distanceX6float36float2[8])

	.visible .func _Z13FillSegment32ILi16E7ushort4EvPT0_6float2P6float3 (.param .u64 __cudaparmf1__Z13FillSegment32ILi16E7ushort4EvPT0_6float2P6float3, .param .align 8 .b8 __cudaparmf2__Z13FillSegment32ILi16E7ushort4EvPT0_6float2P6float3[8], .param .u64 __cudaparmf3__Z13FillSegment32ILi16E7ushort4EvPT0_6float2P6float3)

	.visible .func _Z13PointDistanceILi16EEv6float2P6float3Pf (.param .align 8 .b8 __cudaparmf1__Z13PointDistanceILi16EEv6float2P6float3Pf[8], .param .u64 __cudaparmf2__Z13PointDistanceILi16EEv6float2P6float3Pf, .param .u64 __cudaparmf3__Z13PointDistanceILi16EEv6float2P6float3Pf)

	.visible .func _Z13FillSegment32ILi8E7ushort4EvPT0_6float2P6float3 (.param .u64 __cudaparmf1__Z13FillSegment32ILi8E7ushort4EvPT0_6float2P6float3, .param .align 8 .b8 __cudaparmf2__Z13FillSegment32ILi8E7ushort4EvPT0_6float2P6float3[8], .param .u64 __cudaparmf3__Z13FillSegment32ILi8E7ushort4EvPT0_6float2P6float3)

	.visible .func _Z13PointDistanceILi8EEv6float2P6float3Pf (.param .align 8 .b8 __cudaparmf1__Z13PointDistanceILi8EEv6float2P6float3Pf[8], .param .u64 __cudaparmf2__Z13PointDistanceILi8EEv6float2P6float3Pf, .param .u64 __cudaparmf3__Z13PointDistanceILi8EEv6float2P6float3Pf)

	.visible .func _Z13FillSegment32ILi4E7ushort4EvPT0_6float2P6float3 (.param .u64 __cudaparmf1__Z13FillSegment32ILi4E7ushort4EvPT0_6float2P6float3, .param .align 8 .b8 __cudaparmf2__Z13FillSegment32ILi4E7ushort4EvPT0_6float2P6float3[8], .param .u64 __cudaparmf3__Z13FillSegment32ILi4E7ushort4EvPT0_6float2P6float3)

	.visible .func _Z13PointDistanceILi4EEv6float2P6float3Pf (.param .align 8 .b8 __cudaparmf1__Z13PointDistanceILi4EEv6float2P6float3Pf[8], .param .u64 __cudaparmf2__Z13PointDistanceILi4EEv6float2P6float3Pf, .param .u64 __cudaparmf3__Z13PointDistanceILi4EEv6float2P6float3Pf)

	.visible .func _Z13FillSegment32ILi16E6float4EvPT0_6float2P6float3 (.param .u64 __cudaparmf1__Z13FillSegment32ILi16E6float4EvPT0_6float2P6float3, .param .align 8 .b8 __cudaparmf2__Z13FillSegment32ILi16E6float4EvPT0_6float2P6float3[8], .param .u64 __cudaparmf3__Z13FillSegment32ILi16E6float4EvPT0_6float2P6float3)

	.visible .func _Z13FillSegment32ILi8E6float4EvPT0_6float2P6float3 (.param .u64 __cudaparmf1__Z13FillSegment32ILi8E6float4EvPT0_6float2P6float3, .param .align 8 .b8 __cudaparmf2__Z13FillSegment32ILi8E6float4EvPT0_6float2P6float3[8], .param .u64 __cudaparmf3__Z13FillSegment32ILi8E6float4EvPT0_6float2P6float3)

	.visible .func _Z13FillSegment32ILi4E6float4EvPT0_6float2P6float3 (.param .u64 __cudaparmf1__Z13FillSegment32ILi4E6float4EvPT0_6float2P6float3, .param .align 8 .b8 __cudaparmf2__Z13FillSegment32ILi4E6float4EvPT0_6float2P6float3[8], .param .u64 __cudaparmf3__Z13FillSegment32ILi4E6float4EvPT0_6float2P6float3)

	//-----------------------------------------------------------
	// Compiling C:/Users/dvaeng/AppData/Local/Temp/tmpxft_00003f00_00000000-11_GarbageMatte.cpp3.i (C:/Users/dvaeng/AppData/Local/Temp/ccBI#.a15620)
	//-----------------------------------------------------------

	//-----------------------------------------------------------
	// Options:
	//-----------------------------------------------------------
	//  Target:ptx, ISA:sm_20, Endian:little, Pointer Size:64
	//  -O3	(Optimization level)
	//  -g0	(Debug level)
	//  -m2	(Report advisories)
	//-----------------------------------------------------------

	.file	1	"C:/Users/dvaeng/AppData/Local/Temp/tmpxft_00003f00_00000000-10_GarbageMatte.cudafe2.gpu"
	.file	2	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/PixelFormat.h"
	.file	3	"C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\include\crtdefs.h"
	.file	4	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\crt/device_runtime.h"
	.file	5	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\host_defines.h"
	.file	6	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\builtin_types.h"
	.file	7	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\device_types.h"
	.file	8	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\driver_types.h"
	.file	9	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\surface_types.h"
	.file	10	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\texture_types.h"
	.file	11	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\vector_types.h"
	.file	12	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\builtin_types.h"
	.file	13	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\host_defines.h"
	.file	14	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\device_launch_parameters.h"
	.file	15	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\crt\storage_class.h"
	.file	16	"C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\include\time.h"
	.file	17	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/Utils.h"
	.file	18	"c:/Mulder64/shared/adobe/MediaCore/Display/Src/CUDA/Effects/GarbageMatte.cu"
	.file	19	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\common_functions.h"
	.file	20	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_functions.h"
	.file	21	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_constants.h"
	.file	22	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\device_functions.h"
	.file	23	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_11_atomic_functions.h"
	.file	24	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_12_atomic_functions.h"
	.file	25	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_13_double_functions.h"
	.file	26	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_20_atomic_functions.h"
	.file	27	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_20_intrinsics.h"
	.file	28	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\surface_functions.h"
	.file	29	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\texture_fetch_functions.h"
	.file	30	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_functions_dbl_ptx3.h"


	.visible .func (.param .s32 __cudaretf__Z15IntegerMultiplyii) _Z15IntegerMultiplyii (.param .s32 __cudaparmf1__Z15IntegerMultiplyii, .param .s32 __cudaparmf2__Z15IntegerMultiplyii)
	{
	.reg .u32 %r<7>;
	.loc	17	60	0
$LDWbegin__Z15IntegerMultiplyii:
	ld.param.u32 	%r1, [__cudaparmf1__Z15IntegerMultiplyii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf2__Z15IntegerMultiplyii];
	mov.s32 	%r4, %r3;
	.loc	17	64	0
	mul.lo.s32 	%r5, %r2, %r4;
	st.param.s32 	[__cudaretf__Z15IntegerMultiplyii], %r5;
	ret;
$LDWend__Z15IntegerMultiplyii:
	} // _Z15IntegerMultiplyii

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelXv) _Z17Standard2DKernelXv ()
	{
	.reg .u32 %r<7>;
	.loc	17	73	0
$LDWbegin__Z17Standard2DKernelXv:
	.loc	17	74	0
	mov.u32 	%r1, %tid.x;
	cvt.s32.u32 	%r2, %ctaid.x;
	cvt.s32.u32 	%r3, %ntid.x;
	mul.lo.s32 	%r4, %r2, %r3;
	add.u32 	%r5, %r1, %r4;
	st.param.s32 	[__cudaretf__Z17Standard2DKernelXv], %r5;
	ret;
$LDWend__Z17Standard2DKernelXv:
	} // _Z17Standard2DKernelXv

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelYv) _Z17Standard2DKernelYv ()
	{
	.reg .u32 %r<7>;
	.loc	17	77	0
$LDWbegin__Z17Standard2DKernelYv:
	.loc	17	78	0
	mov.u32 	%r1, %tid.y;
	cvt.s32.u32 	%r2, %ctaid.y;
	cvt.s32.u32 	%r3, %ntid.y;
	mul.lo.s32 	%r4, %r2, %r3;
	add.u32 	%r5, %r1, %r4;
	st.param.s32 	[__cudaretf__Z17Standard2DKernelYv], %r5;
	ret;
$LDWend__Z17Standard2DKernelYv:
	} // _Z17Standard2DKernelYv

	.visible .func (.param .align 16 .b8 __cudaretf__Z13Half4ToFloat47ushort4[16]) _Z13Half4ToFloat47ushort4 (.param .align 8 .b8 __cudaparmf1__Z13Half4ToFloat47ushort4[8])
	{
	.reg .u32 %r<14>;
	.reg .f32 %f<9>;
	.loc	17	86	0
$LDWbegin__Z13Half4ToFloat47ushort4:
	ld.param.u16 	%r1, [__cudaparmf1__Z13Half4ToFloat47ushort4+0];
	mov.s32 	%r2, %r1;
	ld.param.u16 	%r3, [__cudaparmf1__Z13Half4ToFloat47ushort4+2];
	mov.s32 	%r4, %r3;
	ld.param.u16 	%r5, [__cudaparmf1__Z13Half4ToFloat47ushort4+4];
	mov.s32 	%r6, %r5;
	ld.param.u16 	%r7, [__cudaparmf1__Z13Half4ToFloat47ushort4+6];
	mov.s32 	%r8, %r7;
	.loc	17	87	0
	cvt.u16.u32 	%r9, %r4;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r9;
	cvt.ftz.f32.f16	%f1, %b1; }
	cvt.u16.u32 	%r10, %r6;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r10;
	cvt.ftz.f32.f16	%f2, %b1; }
	cvt.u16.u32 	%r11, %r8;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r11;
	cvt.ftz.f32.f16	%f3, %b1; }
	cvt.u16.u32 	%r12, %r2;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r12;
	cvt.ftz.f32.f16	%f4, %b1; }
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+0], %f4;
	mov.f32 	%f5, %f1;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+4], %f5;
	mov.f32 	%f6, %f2;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+8], %f6;
	mov.f32 	%f7, %f3;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+12], %f7;
	ret;
$LDWend__Z13Half4ToFloat47ushort4:
	} // _Z13Half4ToFloat47ushort4

	.visible .func (.param .align 8 .b8 __cudaretf__Z13Float4ToHalf46float4[8]) _Z13Float4ToHalf46float4 (.param .align 16 .b8 __cudaparmf1__Z13Float4ToHalf46float4[16])
	{
	.reg .u32 %r<13>;
	.reg .f32 %f<10>;
	.loc	17	95	0
$LDWbegin__Z13Float4ToHalf46float4:
	ld.param.f32 	%f1, [__cudaparmf1__Z13Float4ToHalf46float4+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z13Float4ToHalf46float4+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z13Float4ToHalf46float4+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z13Float4ToHalf46float4+12];
	mov.f32 	%f8, %f7;
	.loc	17	96	0
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r1, %b1; }
	cvt.u16.u32 	%r2, %r1;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r3, %b1; }
	cvt.u16.u32 	%r4, %r3;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f8;
	mov.b32		%r5, %b1; }
	cvt.u16.u32 	%r6, %r5;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f2;
	mov.b32		%r7, %b1; }
	cvt.u16.u32 	%r8, %r7;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+0], %r8;
	mov.s32 	%r9, %r2;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+2], %r9;
	mov.s32 	%r10, %r4;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+4], %r10;
	mov.s32 	%r11, %r6;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+6], %r11;
	ret;
$LDWend__Z13Float4ToHalf46float4:
	} // _Z13Float4ToHalf46float4

	.visible .func (.param .u32 __cudaretf__Z4Mix3RjS_S_) _Z4Mix3RjS_S_ (.param .u64 __cudaparmf1__Z4Mix3RjS_S_, .param .u64 __cudaparmf2__Z4Mix3RjS_S_, .param .u64 __cudaparmf3__Z4Mix3RjS_S_)
	{
	.reg .u32 %r<75>;
	.reg .u64 %rd<8>;
	.loc	17	138	0
$LDWbegin__Z4Mix3RjS_S_:
	ld.param.u64 	%rd1, [__cudaparmf1__Z4Mix3RjS_S_];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf2__Z4Mix3RjS_S_];
	mov.s64 	%rd4, %rd3;
	ld.param.u64 	%rd5, [__cudaparmf3__Z4Mix3RjS_S_];
	mov.s64 	%rd6, %rd5;
	.loc	17	139	0
	ld.u32 	%r1, [%rd2+0];
	ld.u32 	%r2, [%rd4+0];
	sub.u32 	%r3, %r1, %r2;
	st.u32 	[%rd2+0], %r3;
	ld.u32 	%r4, [%rd6+0];
	sub.u32 	%r5, %r3, %r4;
	st.u32 	[%rd2+0], %r5;
	ld.u32 	%r6, [%rd6+0];
	shr.u32 	%r7, %r6, 13;
	xor.b32 	%r8, %r5, %r7;
	st.u32 	[%rd2+0], %r8;
	.loc	17	140	0
	ld.u32 	%r9, [%rd4+0];
	ld.u32 	%r10, [%rd6+0];
	sub.u32 	%r11, %r9, %r10;
	st.u32 	[%rd4+0], %r11;
	ld.u32 	%r12, [%rd2+0];
	sub.u32 	%r13, %r11, %r12;
	st.u32 	[%rd4+0], %r13;
	ld.u32 	%r14, [%rd2+0];
	shl.b32 	%r15, %r14, 8;
	xor.b32 	%r16, %r13, %r15;
	st.u32 	[%rd4+0], %r16;
	.loc	17	141	0
	ld.u32 	%r17, [%rd6+0];
	ld.u32 	%r18, [%rd2+0];
	sub.u32 	%r19, %r17, %r18;
	st.u32 	[%rd6+0], %r19;
	ld.u32 	%r20, [%rd4+0];
	sub.u32 	%r21, %r19, %r20;
	st.u32 	[%rd6+0], %r21;
	ld.u32 	%r22, [%rd4+0];
	shr.u32 	%r23, %r22, 13;
	xor.b32 	%r24, %r21, %r23;
	st.u32 	[%rd6+0], %r24;
	.loc	17	142	0
	ld.u32 	%r25, [%rd2+0];
	ld.u32 	%r26, [%rd4+0];
	sub.u32 	%r27, %r25, %r26;
	st.u32 	[%rd2+0], %r27;
	ld.u32 	%r28, [%rd6+0];
	sub.u32 	%r29, %r27, %r28;
	st.u32 	[%rd2+0], %r29;
	ld.u32 	%r30, [%rd6+0];
	shr.u32 	%r31, %r30, 12;
	xor.b32 	%r32, %r29, %r31;
	st.u32 	[%rd2+0], %r32;
	.loc	17	143	0
	ld.u32 	%r33, [%rd4+0];
	ld.u32 	%r34, [%rd6+0];
	sub.u32 	%r35, %r33, %r34;
	st.u32 	[%rd4+0], %r35;
	ld.u32 	%r36, [%rd2+0];
	sub.u32 	%r37, %r35, %r36;
	st.u32 	[%rd4+0], %r37;
	ld.u32 	%r38, [%rd2+0];
	shl.b32 	%r39, %r38, 16;
	xor.b32 	%r40, %r37, %r39;
	st.u32 	[%rd4+0], %r40;
	.loc	17	144	0
	ld.u32 	%r41, [%rd6+0];
	ld.u32 	%r42, [%rd2+0];
	sub.u32 	%r43, %r41, %r42;
	st.u32 	[%rd6+0], %r43;
	ld.u32 	%r44, [%rd4+0];
	sub.u32 	%r45, %r43, %r44;
	st.u32 	[%rd6+0], %r45;
	ld.u32 	%r46, [%rd4+0];
	shr.u32 	%r47, %r46, 5;
	xor.b32 	%r48, %r45, %r47;
	st.u32 	[%rd6+0], %r48;
	.loc	17	145	0
	ld.u32 	%r49, [%rd2+0];
	ld.u32 	%r50, [%rd4+0];
	sub.u32 	%r51, %r49, %r50;
	st.u32 	[%rd2+0], %r51;
	ld.u32 	%r52, [%rd6+0];
	sub.u32 	%r53, %r51, %r52;
	st.u32 	[%rd2+0], %r53;
	ld.u32 	%r54, [%rd6+0];
	shr.u32 	%r55, %r54, 3;
	xor.b32 	%r56, %r53, %r55;
	st.u32 	[%rd2+0], %r56;
	.loc	17	146	0
	ld.u32 	%r57, [%rd4+0];
	ld.u32 	%r58, [%rd6+0];
	sub.u32 	%r59, %r57, %r58;
	st.u32 	[%rd4+0], %r59;
	ld.u32 	%r60, [%rd2+0];
	sub.u32 	%r61, %r59, %r60;
	st.u32 	[%rd4+0], %r61;
	ld.u32 	%r62, [%rd2+0];
	shl.b32 	%r63, %r62, 10;
	xor.b32 	%r64, %r61, %r63;
	st.u32 	[%rd4+0], %r64;
	.loc	17	147	0
	ld.u32 	%r65, [%rd6+0];
	ld.u32 	%r66, [%rd2+0];
	sub.u32 	%r67, %r65, %r66;
	st.u32 	[%rd6+0], %r67;
	ld.u32 	%r68, [%rd4+0];
	sub.u32 	%r69, %r67, %r68;
	st.u32 	[%rd6+0], %r69;
	ld.u32 	%r70, [%rd4+0];
	shr.u32 	%r71, %r70, 15;
	xor.b32 	%r72, %r69, %r71;
	st.u32 	[%rd6+0], %r72;
	.loc	17	148	0
	mov.s32 	%r73, %r72;
	st.param.u32 	[__cudaretf__Z4Mix3RjS_S_], %r73;
	ret;
$LDWend__Z4Mix3RjS_S_:
	} // _Z4Mix3RjS_S_

	.visible .func (.param .s32 __cudaretf__Z4Randj) _Z4Randj (.param .u32 __cudaparmf1__Z4Randj)
	{
	.reg .u32 %r<14>;
	.loc	17	152	0
$LDWbegin__Z4Randj:
	ld.param.u32 	%r1, [__cudaparmf1__Z4Randj];
	mov.s32 	%r2, %r1;
	.loc	17	163	0
	mul.lo.u32 	%r3, %r2, 1103515245;
	add.u32 	%r4, %r3, 12345;
	shr.u32 	%r5, %r4, 16;
	and.b32 	%r6, %r5, 255;
	shl.b32 	%r7, %r6, 7;
	mul.lo.u32 	%r8, %r2, -1029531031;
	sub.u32 	%r9, %r8, 740551042;
	shr.u32 	%r10, %r9, 16;
	and.b32 	%r11, %r10, 255;
	xor.b32 	%r12, %r7, %r11;
	st.param.s32 	[__cudaretf__Z4Randj], %r12;
	ret;
$LDWend__Z4Randj:
	} // _Z4Randj

	.visible .func (.param .s32 __cudaretf__Z6Rand2Djjj) _Z6Rand2Djjj (.param .u32 __cudaparmf1__Z6Rand2Djjj, .param .u32 __cudaparmf2__Z6Rand2Djjj, .param .u32 __cudaparmf3__Z6Rand2Djjj)
	{
	.reg .u32 %r<54>;
	.loc	17	169	0
$LDWbegin__Z6Rand2Djjj:
	ld.param.u32 	%r1, [__cudaparmf1__Z6Rand2Djjj];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf2__Z6Rand2Djjj];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf3__Z6Rand2Djjj];
	mov.s32 	%r6, %r5;
	.loc	17	139	0
	sub.u32 	%r7, %r2, %r4;
	sub.u32 	%r8, %r7, %r6;
	shr.u32 	%r9, %r6, 13;
	xor.b32 	%r10, %r8, %r9;
	.loc	17	140	0
	sub.u32 	%r11, %r4, %r6;
	sub.u32 	%r12, %r11, %r10;
	shl.b32 	%r13, %r10, 8;
	xor.b32 	%r14, %r12, %r13;
	.loc	17	141	0
	sub.u32 	%r15, %r6, %r10;
	sub.u32 	%r16, %r15, %r14;
	shr.u32 	%r17, %r14, 13;
	xor.b32 	%r18, %r16, %r17;
	.loc	17	142	0
	sub.u32 	%r19, %r10, %r14;
	sub.u32 	%r20, %r19, %r18;
	shr.u32 	%r21, %r18, 12;
	xor.b32 	%r22, %r20, %r21;
	.loc	17	143	0
	sub.u32 	%r23, %r14, %r18;
	sub.u32 	%r24, %r23, %r22;
	shl.b32 	%r25, %r22, 16;
	xor.b32 	%r26, %r24, %r25;
	.loc	17	144	0
	sub.u32 	%r27, %r18, %r22;
	sub.u32 	%r28, %r27, %r26;
	shr.u32 	%r29, %r26, 5;
	xor.b32 	%r30, %r28, %r29;
	.loc	17	145	0
	sub.u32 	%r31, %r22, %r26;
	sub.u32 	%r32, %r31, %r30;
	shr.u32 	%r33, %r30, 3;
	xor.b32 	%r34, %r32, %r33;
	.loc	17	146	0
	sub.u32 	%r35, %r26, %r30;
	sub.u32 	%r36, %r35, %r34;
	shl.b32 	%r37, %r34, 10;
	xor.b32 	%r38, %r36, %r37;
	.loc	17	147	0
	sub.u32 	%r39, %r30, %r34;
	sub.u32 	%r40, %r39, %r38;
	shr.u32 	%r41, %r38, 15;
	xor.b32 	%r42, %r40, %r41;
	.loc	17	170	0
	mul.lo.u32 	%r43, %r42, 1103515245;
	add.u32 	%r44, %r43, 12345;
	shr.u32 	%r45, %r44, 16;
	and.b32 	%r46, %r45, 255;
	shl.b32 	%r47, %r46, 7;
	mul.lo.u32 	%r48, %r42, -1029531031;
	sub.u32 	%r49, %r48, 740551042;
	shr.u32 	%r50, %r49, 16;
	and.b32 	%r51, %r50, 255;
	xor.b32 	%r52, %r47, %r51;
	st.param.s32 	[__cudaretf__Z6Rand2Djjj], %r52;
	ret;
$LDWend__Z6Rand2Djjj:
	} // _Z6Rand2Djjj

	.visible .func (.param .s32 __cudaretf__Z6Rand2Dj) _Z6Rand2Dj (.param .u32 __cudaparmf1__Z6Rand2Dj)
	{
	.reg .u32 %r<60>;
	.loc	17	175	0
$LDWbegin__Z6Rand2Dj:
	ld.param.u32 	%r1, [__cudaparmf1__Z6Rand2Dj];
	mov.s32 	%r2, %r1;
	.loc	17	143	0
	cvt.s32.u32 	%r3, %ctaid.y;
	cvt.s32.u32 	%r4, %ntid.y;
	mul.lo.s32 	%r5, %r3, %r4;
	cvt.s32.u32 	%r6, %ctaid.x;
	cvt.s32.u32 	%r7, %ntid.x;
	mul.lo.s32 	%r8, %r6, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r5, %r9;
	mov.u32 	%r11, %tid.x;
	add.u32 	%r12, %r8, %r11;
	shr.u32 	%r13, %r10, 13;
	sub.u32 	%r14, %r2, %r12;
	sub.u32 	%r15, %r12, %r10;
	sub.u32 	%r16, %r14, %r10;
	xor.b32 	%r17, %r13, %r16;
	shl.b32 	%r18, %r17, 8;
	sub.u32 	%r19, %r15, %r17;
	sub.u32 	%r20, %r10, %r17;
	xor.b32 	%r21, %r18, %r19;
	shr.u32 	%r22, %r21, 13;
	sub.u32 	%r23, %r20, %r21;
	sub.u32 	%r24, %r17, %r21;
	xor.b32 	%r25, %r22, %r23;
	shr.u32 	%r26, %r25, 12;
	sub.u32 	%r27, %r24, %r25;
	xor.b32 	%r28, %r26, %r27;
	sub.u32 	%r29, %r21, %r25;
	sub.u32 	%r30, %r29, %r28;
	shl.b32 	%r31, %r28, 16;
	xor.b32 	%r32, %r30, %r31;
	.loc	17	144	0
	sub.u32 	%r33, %r25, %r28;
	sub.u32 	%r34, %r33, %r32;
	shr.u32 	%r35, %r32, 5;
	xor.b32 	%r36, %r34, %r35;
	.loc	17	145	0
	sub.u32 	%r37, %r28, %r32;
	sub.u32 	%r38, %r37, %r36;
	shr.u32 	%r39, %r36, 3;
	xor.b32 	%r40, %r38, %r39;
	.loc	17	146	0
	sub.u32 	%r41, %r32, %r36;
	sub.u32 	%r42, %r41, %r40;
	shl.b32 	%r43, %r40, 10;
	xor.b32 	%r44, %r42, %r43;
	.loc	17	147	0
	sub.u32 	%r45, %r36, %r40;
	sub.u32 	%r46, %r45, %r44;
	shr.u32 	%r47, %r44, 15;
	xor.b32 	%r48, %r46, %r47;
	.loc	17	176	0
	mul.lo.u32 	%r49, %r48, 1103515245;
	add.u32 	%r50, %r49, 12345;
	shr.u32 	%r51, %r50, 16;
	and.b32 	%r52, %r51, 255;
	shl.b32 	%r53, %r52, 7;
	mul.lo.u32 	%r54, %r48, -1029531031;
	sub.u32 	%r55, %r54, 740551042;
	shr.u32 	%r56, %r55, 16;
	and.b32 	%r57, %r56, 255;
	xor.b32 	%r58, %r53, %r57;
	st.param.s32 	[__cudaretf__Z6Rand2Dj], %r58;
	ret;
$LDWend__Z6Rand2Dj:
	} // _Z6Rand2Dj

	.visible .func (.param .f32 __cudaretf__Z9distanceX6float36float2) _Z9distanceX6float36float2 (.param .align 4 .b8 __cudaparmf1__Z9distanceX6float36float2[12], .param .align 8 .b8 __cudaparmf2__Z9distanceX6float36float2[8])
	{
	.reg .f32 %f<16>;
	.loc	18	29	0
$LDWbegin__Z9distanceX6float36float2:
	ld.param.f32 	%f1, [__cudaparmf1__Z9distanceX6float36float2+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z9distanceX6float36float2+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z9distanceX6float36float2+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf2__Z9distanceX6float36float2+0];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z9distanceX6float36float2+4];
	mov.f32 	%f10, %f9;
	.loc	18	30	0
	sub.ftz.f32 	%f11, %f8, %f2;
	sub.ftz.f32 	%f12, %f10, %f4;
	mul.ftz.f32 	%f13, %f6, %f12;
	sub.ftz.f32 	%f14, %f13, %f11;
	st.param.f32 	[__cudaretf__Z9distanceX6float36float2], %f14;
	ret;
$LDWend__Z9distanceX6float36float2:
	} // _Z9distanceX6float36float2

	.visible .func _Z13FillSegment32ILi16E7ushort4EvPT0_6float2P6float3 (.param .u64 __cudaparmf1__Z13FillSegment32ILi16E7ushort4EvPT0_6float2P6float3, .param .align 8 .b8 __cudaparmf2__Z13FillSegment32ILi16E7ushort4EvPT0_6float2P6float3[8], .param .u64 __cudaparmf3__Z13FillSegment32ILi16E7ushort4EvPT0_6float2P6float3)
	{
	.reg .u32 %r<51>;
	.reg .u64 %rd<9>;
	.reg .f32 %f<188>;
	.reg .pred %p<87>;
	// __cuda_local_var_91085_8_non_const_dist = 0
	.loc	18	46	0
$LDWbegin__Z13FillSegment32ILi16E7ushort4EvPT0_6float2P6float3:
	ld.param.u64 	%rd1, [__cudaparmf1__Z13FillSegment32ILi16E7ushort4EvPT0_6float2P6float3];
	mov.s64 	%rd2, %rd1;
	ld.param.f32 	%f1, [__cudaparmf2__Z13FillSegment32ILi16E7ushort4EvPT0_6float2P6float3+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z13FillSegment32ILi16E7ushort4EvPT0_6float2P6float3+4];
	mov.f32 	%f4, %f3;
	ld.param.u64 	%rd3, [__cudaparmf3__Z13FillSegment32ILi16E7ushort4EvPT0_6float2P6float3];
	mov.s64 	%rd4, %rd3;
	.loc	18	51	0
	mov.f32 	%f5, 0fbf800000;     	// -1
	mov.f32 	%f6, %f5;
	mov.f32 	%f7, 0fbf800000;     	// -1
	mov.f32 	%f8, %f7;
	mov.f32 	%f9, 0fbf800000;     	// -1
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fbf800000;    	// -1
	mov.f32 	%f12, %f11;
	mov.f32 	%f13, 0fbf800000;    	// -1
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fbf800000;    	// -1
	mov.f32 	%f16, %f15;
	mov.f32 	%f17, 0fbf800000;    	// -1
	mov.f32 	%f18, %f17;
	mov.f32 	%f19, 0fbf800000;    	// -1
	mov.f32 	%f20, %f19;
	mov.f32 	%f21, 0fbf800000;    	// -1
	mov.f32 	%f22, %f21;
	mov.f32 	%f23, 0fbf800000;    	// -1
	mov.f32 	%f24, %f23;
	mov.f32 	%f25, 0fbf800000;    	// -1
	mov.f32 	%f26, %f25;
	mov.f32 	%f27, 0fbf800000;    	// -1
	mov.f32 	%f28, %f27;
	mov.f32 	%f29, 0fbf800000;    	// -1
	mov.f32 	%f30, %f29;
	mov.f32 	%f31, 0fbf800000;    	// -1
	mov.f32 	%f32, %f31;
	mov.f32 	%f33, 0fbf800000;    	// -1
	mov.f32 	%f34, %f33;
	mov.f32 	%f35, 0fbf800000;    	// -1
	mov.f32 	%f36, %f35;
	.loc	18	24	0
	ld.f32 	%f37, [%rd4+4];
	ld.f32 	%f38, [%rd4+16];
	setp.gt.ftz.f32 	%p1, %f38, %f4;
	setp.le.ftz.f32 	%p2, %f37, %f4;
	xor.pred 	%p3, %p1, %p2;
	@%p3 bra 	$Lt_10_14338;
	.loc	18	41	0
	ld.f32 	%f39, [%rd4+0];
	sub.ftz.f32 	%f40, %f2, %f39;
	ld.f32 	%f41, [%rd4+8];
	sub.ftz.f32 	%f42, %f4, %f37;
	mul.ftz.f32 	%f43, %f41, %f42;
	sub.ftz.f32 	%f44, %f43, %f40;
	mov.f32 	%f6, %f44;
$Lt_10_14338:
	ld.f32 	%f45, [%rd4+28];
	setp.gt.ftz.f32 	%p4, %f45, %f4;
	setp.le.ftz.f32 	%p5, %f38, %f4;
	xor.pred 	%p6, %p4, %p5;
	@%p6 bra 	$Lt_10_14850;
	ld.f32 	%f46, [%rd4+12];
	sub.ftz.f32 	%f47, %f2, %f46;
	ld.f32 	%f48, [%rd4+20];
	sub.ftz.f32 	%f49, %f4, %f38;
	mul.ftz.f32 	%f50, %f48, %f49;
	sub.ftz.f32 	%f51, %f50, %f47;
	mov.f32 	%f8, %f51;
$Lt_10_14850:
	ld.f32 	%f52, [%rd4+40];
	setp.gt.ftz.f32 	%p7, %f52, %f4;
	setp.le.ftz.f32 	%p8, %f45, %f4;
	xor.pred 	%p9, %p7, %p8;
	@%p9 bra 	$Lt_10_15362;
	ld.f32 	%f53, [%rd4+24];
	sub.ftz.f32 	%f54, %f2, %f53;
	ld.f32 	%f55, [%rd4+32];
	sub.ftz.f32 	%f56, %f4, %f45;
	mul.ftz.f32 	%f57, %f55, %f56;
	sub.ftz.f32 	%f58, %f57, %f54;
	mov.f32 	%f10, %f58;
$Lt_10_15362:
	ld.f32 	%f59, [%rd4+52];
	setp.gt.ftz.f32 	%p10, %f59, %f4;
	setp.le.ftz.f32 	%p11, %f52, %f4;
	xor.pred 	%p12, %p10, %p11;
	@%p12 bra 	$Lt_10_15874;
	ld.f32 	%f60, [%rd4+36];
	sub.ftz.f32 	%f61, %f2, %f60;
	ld.f32 	%f62, [%rd4+44];
	sub.ftz.f32 	%f63, %f4, %f52;
	mul.ftz.f32 	%f64, %f62, %f63;
	sub.ftz.f32 	%f65, %f64, %f61;
	mov.f32 	%f12, %f65;
$Lt_10_15874:
	ld.f32 	%f66, [%rd4+64];
	setp.gt.ftz.f32 	%p13, %f66, %f4;
	setp.le.ftz.f32 	%p14, %f59, %f4;
	xor.pred 	%p15, %p13, %p14;
	@%p15 bra 	$Lt_10_16386;
	ld.f32 	%f67, [%rd4+48];
	sub.ftz.f32 	%f68, %f2, %f67;
	ld.f32 	%f69, [%rd4+56];
	sub.ftz.f32 	%f70, %f4, %f59;
	mul.ftz.f32 	%f71, %f69, %f70;
	sub.ftz.f32 	%f72, %f71, %f68;
	mov.f32 	%f14, %f72;
$Lt_10_16386:
	ld.f32 	%f73, [%rd4+76];
	setp.gt.ftz.f32 	%p16, %f73, %f4;
	setp.le.ftz.f32 	%p17, %f66, %f4;
	xor.pred 	%p18, %p16, %p17;
	@%p18 bra 	$Lt_10_16898;
	ld.f32 	%f74, [%rd4+60];
	sub.ftz.f32 	%f75, %f2, %f74;
	ld.f32 	%f76, [%rd4+68];
	sub.ftz.f32 	%f77, %f4, %f66;
	mul.ftz.f32 	%f78, %f76, %f77;
	sub.ftz.f32 	%f79, %f78, %f75;
	mov.f32 	%f16, %f79;
$Lt_10_16898:
	ld.f32 	%f80, [%rd4+88];
	setp.gt.ftz.f32 	%p19, %f80, %f4;
	setp.le.ftz.f32 	%p20, %f73, %f4;
	xor.pred 	%p21, %p19, %p20;
	@%p21 bra 	$Lt_10_17410;
	ld.f32 	%f81, [%rd4+72];
	sub.ftz.f32 	%f82, %f2, %f81;
	ld.f32 	%f83, [%rd4+80];
	sub.ftz.f32 	%f84, %f4, %f73;
	mul.ftz.f32 	%f85, %f83, %f84;
	sub.ftz.f32 	%f86, %f85, %f82;
	mov.f32 	%f18, %f86;
$Lt_10_17410:
	ld.f32 	%f87, [%rd4+100];
	setp.gt.ftz.f32 	%p22, %f87, %f4;
	setp.le.ftz.f32 	%p23, %f80, %f4;
	xor.pred 	%p24, %p22, %p23;
	@%p24 bra 	$Lt_10_17922;
	ld.f32 	%f88, [%rd4+84];
	sub.ftz.f32 	%f89, %f2, %f88;
	ld.f32 	%f90, [%rd4+92];
	sub.ftz.f32 	%f91, %f4, %f80;
	mul.ftz.f32 	%f92, %f90, %f91;
	sub.ftz.f32 	%f93, %f92, %f89;
	mov.f32 	%f20, %f93;
$Lt_10_17922:
	ld.f32 	%f94, [%rd4+112];
	setp.gt.ftz.f32 	%p25, %f94, %f4;
	setp.le.ftz.f32 	%p26, %f87, %f4;
	xor.pred 	%p27, %p25, %p26;
	@%p27 bra 	$Lt_10_18434;
	ld.f32 	%f95, [%rd4+96];
	sub.ftz.f32 	%f96, %f2, %f95;
	ld.f32 	%f97, [%rd4+104];
	sub.ftz.f32 	%f98, %f4, %f87;
	mul.ftz.f32 	%f99, %f97, %f98;
	sub.ftz.f32 	%f100, %f99, %f96;
	mov.f32 	%f22, %f100;
$Lt_10_18434:
	ld.f32 	%f101, [%rd4+124];
	setp.gt.ftz.f32 	%p28, %f101, %f4;
	setp.le.ftz.f32 	%p29, %f94, %f4;
	xor.pred 	%p30, %p28, %p29;
	@%p30 bra 	$Lt_10_18946;
	ld.f32 	%f102, [%rd4+108];
	sub.ftz.f32 	%f103, %f2, %f102;
	ld.f32 	%f104, [%rd4+116];
	sub.ftz.f32 	%f105, %f4, %f94;
	mul.ftz.f32 	%f106, %f104, %f105;
	sub.ftz.f32 	%f107, %f106, %f103;
	mov.f32 	%f24, %f107;
$Lt_10_18946:
	ld.f32 	%f108, [%rd4+136];
	setp.gt.ftz.f32 	%p31, %f108, %f4;
	setp.le.ftz.f32 	%p32, %f101, %f4;
	xor.pred 	%p33, %p31, %p32;
	@%p33 bra 	$Lt_10_19458;
	ld.f32 	%f109, [%rd4+120];
	sub.ftz.f32 	%f110, %f2, %f109;
	ld.f32 	%f111, [%rd4+128];
	sub.ftz.f32 	%f112, %f4, %f101;
	mul.ftz.f32 	%f113, %f111, %f112;
	sub.ftz.f32 	%f114, %f113, %f110;
	mov.f32 	%f26, %f114;
$Lt_10_19458:
	ld.f32 	%f115, [%rd4+148];
	setp.gt.ftz.f32 	%p34, %f115, %f4;
	setp.le.ftz.f32 	%p35, %f108, %f4;
	xor.pred 	%p36, %p34, %p35;
	@%p36 bra 	$Lt_10_19970;
	ld.f32 	%f116, [%rd4+132];
	sub.ftz.f32 	%f117, %f2, %f116;
	ld.f32 	%f118, [%rd4+140];
	sub.ftz.f32 	%f119, %f4, %f108;
	mul.ftz.f32 	%f120, %f118, %f119;
	sub.ftz.f32 	%f121, %f120, %f117;
	mov.f32 	%f28, %f121;
$Lt_10_19970:
	ld.f32 	%f122, [%rd4+160];
	setp.gt.ftz.f32 	%p37, %f122, %f4;
	setp.le.ftz.f32 	%p38, %f115, %f4;
	xor.pred 	%p39, %p37, %p38;
	@%p39 bra 	$Lt_10_20482;
	ld.f32 	%f123, [%rd4+144];
	sub.ftz.f32 	%f124, %f2, %f123;
	ld.f32 	%f125, [%rd4+152];
	sub.ftz.f32 	%f126, %f4, %f115;
	mul.ftz.f32 	%f127, %f125, %f126;
	sub.ftz.f32 	%f128, %f127, %f124;
	mov.f32 	%f30, %f128;
$Lt_10_20482:
	ld.f32 	%f129, [%rd4+172];
	setp.gt.ftz.f32 	%p40, %f129, %f4;
	setp.le.ftz.f32 	%p41, %f122, %f4;
	xor.pred 	%p42, %p40, %p41;
	@%p42 bra 	$Lt_10_20994;
	ld.f32 	%f130, [%rd4+156];
	sub.ftz.f32 	%f131, %f2, %f130;
	ld.f32 	%f132, [%rd4+164];
	sub.ftz.f32 	%f133, %f4, %f122;
	mul.ftz.f32 	%f134, %f132, %f133;
	sub.ftz.f32 	%f135, %f134, %f131;
	mov.f32 	%f32, %f135;
$Lt_10_20994:
	ld.f32 	%f136, [%rd4+184];
	setp.gt.ftz.f32 	%p43, %f136, %f4;
	setp.le.ftz.f32 	%p44, %f129, %f4;
	xor.pred 	%p45, %p43, %p44;
	@%p45 bra 	$Lt_10_21506;
	ld.f32 	%f137, [%rd4+168];
	sub.ftz.f32 	%f138, %f2, %f137;
	ld.f32 	%f139, [%rd4+176];
	sub.ftz.f32 	%f140, %f4, %f129;
	mul.ftz.f32 	%f141, %f139, %f140;
	sub.ftz.f32 	%f142, %f141, %f138;
	mov.f32 	%f34, %f142;
$Lt_10_21506:
	ld.f32 	%f143, [%rd4+196];
	setp.gt.ftz.f32 	%p46, %f143, %f4;
	setp.le.ftz.f32 	%p47, %f136, %f4;
	xor.pred 	%p48, %p46, %p47;
	@%p48 bra 	$Lt_10_22018;
	ld.f32 	%f144, [%rd4+180];
	sub.ftz.f32 	%f145, %f2, %f144;
	ld.f32 	%f146, [%rd4+188];
	sub.ftz.f32 	%f147, %f4, %f136;
	mul.ftz.f32 	%f148, %f146, %f147;
	sub.ftz.f32 	%f149, %f148, %f145;
	mov.f32 	%f36, %f149;
$Lt_10_22018:
	.loc	18	52	0
	mov.s32 	%r1, 0;
$Lt_10_23042:
 //<loop> Loop body line 52, nesting depth: 1, iterations: 4
	cvt.rn.f32.s32 	%f150, %r1;
	mov.f32 	%f151, %f6;
	setp.lt.ftz.f32 	%p49, %f150, %f151;
	@!%p49 bra 	$Lt_10_23554;
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	.loc	18	62	0
	mov.f32 	%f152, 0f47c35000;   	// 100000
	min.ftz.f32 	%f153, %f151, %f152;
	mov.s32 	%r2, 1;
	bra.uni 	$Lt_10_23298;
$Lt_10_23554:
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	mov.s32 	%r2, 0;
	mov.f32 	%f153, 0f47c35000;   	// 100000
$Lt_10_23298:
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	mov.f32 	%f154, %f8;
	setp.lt.ftz.f32 	%p50, %f150, %f154;
	@!%p50 bra 	$Lt_10_23810;
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f154;
$Lt_10_23810:
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	mov.f32 	%f155, %f10;
	setp.lt.ftz.f32 	%p51, %f150, %f155;
	@!%p51 bra 	$Lt_10_24322;
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f155;
$Lt_10_24322:
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	mov.f32 	%f156, %f12;
	setp.lt.ftz.f32 	%p52, %f150, %f156;
	@!%p52 bra 	$Lt_10_24834;
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f156;
$Lt_10_24834:
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	mov.f32 	%f157, %f14;
	setp.lt.ftz.f32 	%p53, %f150, %f157;
	@!%p53 bra 	$Lt_10_25346;
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f157;
$Lt_10_25346:
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	mov.f32 	%f158, %f16;
	setp.lt.ftz.f32 	%p54, %f150, %f158;
	@!%p54 bra 	$Lt_10_25858;
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f158;
$Lt_10_25858:
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	mov.f32 	%f159, %f18;
	setp.lt.ftz.f32 	%p55, %f150, %f159;
	@!%p55 bra 	$Lt_10_26370;
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f159;
$Lt_10_26370:
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	mov.f32 	%f160, %f20;
	setp.lt.ftz.f32 	%p56, %f150, %f160;
	@!%p56 bra 	$Lt_10_26882;
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f160;
$Lt_10_26882:
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	mov.f32 	%f161, %f22;
	setp.lt.ftz.f32 	%p57, %f150, %f161;
	@!%p57 bra 	$Lt_10_27394;
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f161;
$Lt_10_27394:
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	mov.f32 	%f162, %f24;
	setp.lt.ftz.f32 	%p58, %f150, %f162;
	@!%p58 bra 	$Lt_10_27906;
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f162;
$Lt_10_27906:
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	mov.f32 	%f163, %f26;
	setp.lt.ftz.f32 	%p59, %f150, %f163;
	@!%p59 bra 	$Lt_10_28418;
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f163;
$Lt_10_28418:
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	mov.f32 	%f164, %f28;
	setp.lt.ftz.f32 	%p60, %f150, %f164;
	@!%p60 bra 	$Lt_10_28930;
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f164;
$Lt_10_28930:
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	mov.f32 	%f165, %f30;
	setp.lt.ftz.f32 	%p61, %f150, %f165;
	@!%p61 bra 	$Lt_10_29442;
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f165;
$Lt_10_29442:
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	mov.f32 	%f166, %f32;
	setp.lt.ftz.f32 	%p62, %f150, %f166;
	@!%p62 bra 	$Lt_10_29954;
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f166;
$Lt_10_29954:
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	mov.f32 	%f167, %f34;
	setp.lt.ftz.f32 	%p63, %f150, %f167;
	@!%p63 bra 	$Lt_10_30466;
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f167;
$Lt_10_30466:
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	mov.f32 	%f168, %f36;
	setp.lt.ftz.f32 	%p64, %f150, %f168;
	@!%p64 bra 	$Lt_10_30978;
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f168;
$Lt_10_30978:
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	mov.s32 	%r3, 0;
	setp.eq.s32 	%p65, %r2, %r3;
	add.s32 	%r4, %r1, 8;
	cvt.rn.f32.s32 	%f169, %r4;
	setp.le.ftz.f32 	%p66, %f169, %f153;
	@!%p66 bra 	$Lt_10_31746;
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	@!%p65 bra 	$Lt_10_31490;
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	.loc	18	68	0
	mov.u32 	%r5, 0;
	st.u16 	[%rd2+6], %r5;
	mov.u32 	%r6, 0;
	st.u16 	[%rd2+14], %r6;
	mov.u32 	%r7, 0;
	st.u16 	[%rd2+22], %r7;
	mov.u32 	%r8, 0;
	st.u16 	[%rd2+30], %r8;
	mov.u32 	%r9, 0;
	st.u16 	[%rd2+38], %r9;
	mov.u32 	%r10, 0;
	st.u16 	[%rd2+46], %r10;
	mov.u32 	%r11, 0;
	st.u16 	[%rd2+54], %r11;
	mov.u32 	%r12, 0;
	st.u16 	[%rd2+62], %r12;
	bra.uni 	$Lt_10_31490;
$Lt_10_31746:
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	@!%p65 bra 	$Lt_10_32514;
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	.loc	18	72	0
	mov.u32 	%r13, 0;
	st.u16 	[%rd2+6], %r13;
$Lt_10_32514:
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	mov.s32 	%r14, 1;
$Lt_10_33538:
 //<loop> Loop body line 72, nesting depth: 2, iterations: 7
	.loc	18	75	0
	add.s32 	%r15, %r14, %r1;
	cvt.rn.f32.s32 	%f170, %r15;
	mov.f32 	%f171, %f14;
	setp.gt.ftz.f32 	%p67, %f171, %f170;
	mov.f32 	%f172, %f12;
	setp.gt.ftz.f32 	%p68, %f172, %f170;
	mov.f32 	%f173, %f10;
	setp.gt.ftz.f32 	%p69, %f173, %f170;
	mov.f32 	%f174, %f8;
	setp.gt.ftz.f32 	%p70, %f174, %f170;
	mov.f32 	%f175, %f6;
	setp.gt.ftz.f32 	%p71, %f175, %f170;
	selp.s32 	%r16, 1, 0, %p71;
	xor.b32 	%r17, %r16, 1;
	selp.s32 	%r18, %r17, %r16, %p70;
	xor.b32 	%r19, %r18, 1;
	selp.s32 	%r20, %r19, %r18, %p69;
	xor.b32 	%r21, %r20, 1;
	selp.s32 	%r22, %r21, %r20, %p68;
	xor.b32 	%r23, %r22, 1;
	selp.s32 	%r24, %r23, %r22, %p67;
	xor.b32 	%r25, %r24, 1;
	mov.f32 	%f176, %f16;
	setp.gt.ftz.f32 	%p72, %f176, %f170;
	selp.s32 	%r26, %r25, %r24, %p72;
	xor.b32 	%r27, %r26, 1;
	mov.f32 	%f177, %f18;
	setp.gt.ftz.f32 	%p73, %f177, %f170;
	selp.s32 	%r28, %r27, %r26, %p73;
	xor.b32 	%r29, %r28, 1;
	mov.f32 	%f178, %f20;
	setp.gt.ftz.f32 	%p74, %f178, %f170;
	selp.s32 	%r30, %r29, %r28, %p74;
	xor.b32 	%r31, %r30, 1;
	mov.f32 	%f179, %f22;
	setp.gt.ftz.f32 	%p75, %f179, %f170;
	selp.s32 	%r32, %r31, %r30, %p75;
	xor.b32 	%r33, %r32, 1;
	mov.f32 	%f180, %f24;
	setp.gt.ftz.f32 	%p76, %f180, %f170;
	selp.s32 	%r34, %r33, %r32, %p76;
	xor.b32 	%r35, %r34, 1;
	mov.f32 	%f181, %f26;
	setp.gt.ftz.f32 	%p77, %f181, %f170;
	selp.s32 	%r36, %r35, %r34, %p77;
	xor.b32 	%r37, %r36, 1;
	mov.f32 	%f182, %f28;
	setp.gt.ftz.f32 	%p78, %f182, %f170;
	selp.s32 	%r38, %r37, %r36, %p78;
	xor.b32 	%r39, %r38, 1;
	mov.f32 	%f183, %f30;
	setp.gt.ftz.f32 	%p79, %f183, %f170;
	selp.s32 	%r40, %r39, %r38, %p79;
	xor.b32 	%r41, %r40, 1;
	mov.f32 	%f184, %f32;
	setp.gt.ftz.f32 	%p80, %f184, %f170;
	selp.s32 	%r42, %r41, %r40, %p80;
	xor.b32 	%r43, %r42, 1;
	mov.f32 	%f185, %f34;
	setp.gt.ftz.f32 	%p81, %f185, %f170;
	selp.s32 	%r2, %r43, %r42, %p81;
	xor.b32 	%r44, %r2, 1;
	mov.f32 	%f186, %f36;
	setp.gt.ftz.f32 	%p82, %f186, %f170;
	selp.s32 	%r45, %r44, %r2, %p82;
	mov.u32 	%r46, 0;
	setp.ne.s32 	%p83, %r45, %r46;
	@%p83 bra 	$Lt_10_33794;
 //<loop> Part of loop body line 72, head labeled $Lt_10_33538
	.loc	18	76	0
	mov.u32 	%r47, 0;
	cvt.s64.s32 	%rd5, %r14;
	mul.wide.s32 	%rd6, %r14, 8;
	add.u64 	%rd7, %rd2, %rd6;
	st.u16 	[%rd7+6], %r47;
$Lt_10_33794:
 //<loop> Part of loop body line 72, head labeled $Lt_10_33538
	add.s32 	%r14, %r14, 1;
	mov.u32 	%r48, 8;
	setp.ne.s32 	%p84, %r14, %r48;
	@%p84 bra 	$Lt_10_33538;
$Lt_10_31490:
 //<loop> Part of loop body line 52, head labeled $Lt_10_23042
	.loc	18	80	0
	add.u64 	%rd2, %rd2, 64;
	mov.s32 	%r1, %r4;
	mov.u32 	%r49, 31;
	setp.le.s32 	%p85, %r4, %r49;
	@%p85 bra 	$Lt_10_23042;
	.loc	18	82	0
	ret;
$LDWend__Z13FillSegment32ILi16E7ushort4EvPT0_6float2P6float3:
	} // _Z13FillSegment32ILi16E7ushort4EvPT0_6float2P6float3

	.visible .func _Z13FillSegment32ILi8E7ushort4EvPT0_6float2P6float3 (.param .u64 __cudaparmf1__Z13FillSegment32ILi8E7ushort4EvPT0_6float2P6float3, .param .align 8 .b8 __cudaparmf2__Z13FillSegment32ILi8E7ushort4EvPT0_6float2P6float3[8], .param .u64 __cudaparmf3__Z13FillSegment32ILi8E7ushort4EvPT0_6float2P6float3)
	{
	.reg .u32 %r<35>;
	.reg .u64 %rd<9>;
	.reg .f32 %f<100>;
	.reg .pred %p<47>;
	// __cuda_local_var_91085_8_non_const_dist = 0
	.loc	18	46	0
$LDWbegin__Z13FillSegment32ILi8E7ushort4EvPT0_6float2P6float3:
	ld.param.u64 	%rd1, [__cudaparmf1__Z13FillSegment32ILi8E7ushort4EvPT0_6float2P6float3];
	mov.s64 	%rd2, %rd1;
	ld.param.f32 	%f1, [__cudaparmf2__Z13FillSegment32ILi8E7ushort4EvPT0_6float2P6float3+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z13FillSegment32ILi8E7ushort4EvPT0_6float2P6float3+4];
	mov.f32 	%f4, %f3;
	ld.param.u64 	%rd3, [__cudaparmf3__Z13FillSegment32ILi8E7ushort4EvPT0_6float2P6float3];
	mov.s64 	%rd4, %rd3;
	.loc	18	51	0
	mov.f32 	%f5, 0fbf800000;     	// -1
	mov.f32 	%f6, %f5;
	mov.f32 	%f7, 0fbf800000;     	// -1
	mov.f32 	%f8, %f7;
	mov.f32 	%f9, 0fbf800000;     	// -1
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fbf800000;    	// -1
	mov.f32 	%f12, %f11;
	mov.f32 	%f13, 0fbf800000;    	// -1
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fbf800000;    	// -1
	mov.f32 	%f16, %f15;
	mov.f32 	%f17, 0fbf800000;    	// -1
	mov.f32 	%f18, %f17;
	mov.f32 	%f19, 0fbf800000;    	// -1
	mov.f32 	%f20, %f19;
	.loc	18	24	0
	ld.f32 	%f21, [%rd4+4];
	ld.f32 	%f22, [%rd4+16];
	setp.gt.ftz.f32 	%p1, %f22, %f4;
	setp.le.ftz.f32 	%p2, %f21, %f4;
	xor.pred 	%p3, %p1, %p2;
	@%p3 bra 	$Lt_11_14338;
	.loc	18	41	0
	ld.f32 	%f23, [%rd4+0];
	sub.ftz.f32 	%f24, %f2, %f23;
	ld.f32 	%f25, [%rd4+8];
	sub.ftz.f32 	%f26, %f4, %f21;
	mul.ftz.f32 	%f27, %f25, %f26;
	sub.ftz.f32 	%f28, %f27, %f24;
	mov.f32 	%f6, %f28;
$Lt_11_14338:
	ld.f32 	%f29, [%rd4+28];
	setp.gt.ftz.f32 	%p4, %f29, %f4;
	setp.le.ftz.f32 	%p5, %f22, %f4;
	xor.pred 	%p6, %p4, %p5;
	@%p6 bra 	$Lt_11_14850;
	ld.f32 	%f30, [%rd4+12];
	sub.ftz.f32 	%f31, %f2, %f30;
	ld.f32 	%f32, [%rd4+20];
	sub.ftz.f32 	%f33, %f4, %f22;
	mul.ftz.f32 	%f34, %f32, %f33;
	sub.ftz.f32 	%f35, %f34, %f31;
	mov.f32 	%f8, %f35;
$Lt_11_14850:
	ld.f32 	%f36, [%rd4+40];
	setp.gt.ftz.f32 	%p7, %f36, %f4;
	setp.le.ftz.f32 	%p8, %f29, %f4;
	xor.pred 	%p9, %p7, %p8;
	@%p9 bra 	$Lt_11_15362;
	ld.f32 	%f37, [%rd4+24];
	sub.ftz.f32 	%f38, %f2, %f37;
	ld.f32 	%f39, [%rd4+32];
	sub.ftz.f32 	%f40, %f4, %f29;
	mul.ftz.f32 	%f41, %f39, %f40;
	sub.ftz.f32 	%f42, %f41, %f38;
	mov.f32 	%f10, %f42;
$Lt_11_15362:
	ld.f32 	%f43, [%rd4+52];
	setp.gt.ftz.f32 	%p10, %f43, %f4;
	setp.le.ftz.f32 	%p11, %f36, %f4;
	xor.pred 	%p12, %p10, %p11;
	@%p12 bra 	$Lt_11_15874;
	ld.f32 	%f44, [%rd4+36];
	sub.ftz.f32 	%f45, %f2, %f44;
	ld.f32 	%f46, [%rd4+44];
	sub.ftz.f32 	%f47, %f4, %f36;
	mul.ftz.f32 	%f48, %f46, %f47;
	sub.ftz.f32 	%f49, %f48, %f45;
	mov.f32 	%f12, %f49;
$Lt_11_15874:
	ld.f32 	%f50, [%rd4+64];
	setp.gt.ftz.f32 	%p13, %f50, %f4;
	setp.le.ftz.f32 	%p14, %f43, %f4;
	xor.pred 	%p15, %p13, %p14;
	@%p15 bra 	$Lt_11_16386;
	ld.f32 	%f51, [%rd4+48];
	sub.ftz.f32 	%f52, %f2, %f51;
	ld.f32 	%f53, [%rd4+56];
	sub.ftz.f32 	%f54, %f4, %f43;
	mul.ftz.f32 	%f55, %f53, %f54;
	sub.ftz.f32 	%f56, %f55, %f52;
	mov.f32 	%f14, %f56;
$Lt_11_16386:
	ld.f32 	%f57, [%rd4+76];
	setp.gt.ftz.f32 	%p16, %f57, %f4;
	setp.le.ftz.f32 	%p17, %f50, %f4;
	xor.pred 	%p18, %p16, %p17;
	@%p18 bra 	$Lt_11_16898;
	ld.f32 	%f58, [%rd4+60];
	sub.ftz.f32 	%f59, %f2, %f58;
	ld.f32 	%f60, [%rd4+68];
	sub.ftz.f32 	%f61, %f4, %f50;
	mul.ftz.f32 	%f62, %f60, %f61;
	sub.ftz.f32 	%f63, %f62, %f59;
	mov.f32 	%f16, %f63;
$Lt_11_16898:
	ld.f32 	%f64, [%rd4+88];
	setp.gt.ftz.f32 	%p19, %f64, %f4;
	setp.le.ftz.f32 	%p20, %f57, %f4;
	xor.pred 	%p21, %p19, %p20;
	@%p21 bra 	$Lt_11_17410;
	ld.f32 	%f65, [%rd4+72];
	sub.ftz.f32 	%f66, %f2, %f65;
	ld.f32 	%f67, [%rd4+80];
	sub.ftz.f32 	%f68, %f4, %f57;
	mul.ftz.f32 	%f69, %f67, %f68;
	sub.ftz.f32 	%f70, %f69, %f66;
	mov.f32 	%f18, %f70;
$Lt_11_17410:
	ld.f32 	%f71, [%rd4+100];
	setp.gt.ftz.f32 	%p22, %f71, %f4;
	setp.le.ftz.f32 	%p23, %f64, %f4;
	xor.pred 	%p24, %p22, %p23;
	@%p24 bra 	$Lt_11_17922;
	ld.f32 	%f72, [%rd4+84];
	sub.ftz.f32 	%f73, %f2, %f72;
	ld.f32 	%f74, [%rd4+92];
	sub.ftz.f32 	%f75, %f4, %f64;
	mul.ftz.f32 	%f76, %f74, %f75;
	sub.ftz.f32 	%f77, %f76, %f73;
	mov.f32 	%f20, %f77;
$Lt_11_17922:
	.loc	18	52	0
	mov.s32 	%r1, 0;
$Lt_11_18946:
 //<loop> Loop body line 52, nesting depth: 1, iterations: 4
	cvt.rn.f32.s32 	%f78, %r1;
	mov.f32 	%f79, %f6;
	setp.lt.ftz.f32 	%p25, %f78, %f79;
	@!%p25 bra 	$Lt_11_19458;
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	.loc	18	62	0
	mov.f32 	%f80, 0f47c35000;    	// 100000
	min.ftz.f32 	%f81, %f79, %f80;
	mov.s32 	%r2, 1;
	bra.uni 	$Lt_11_19202;
$Lt_11_19458:
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	mov.s32 	%r2, 0;
	mov.f32 	%f81, 0f47c35000;    	// 100000
$Lt_11_19202:
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	mov.f32 	%f82, %f8;
	setp.lt.ftz.f32 	%p26, %f78, %f82;
	@!%p26 bra 	$Lt_11_19714;
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f81, %f81, %f82;
$Lt_11_19714:
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	mov.f32 	%f83, %f10;
	setp.lt.ftz.f32 	%p27, %f78, %f83;
	@!%p27 bra 	$Lt_11_20226;
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f81, %f81, %f83;
$Lt_11_20226:
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	mov.f32 	%f84, %f12;
	setp.lt.ftz.f32 	%p28, %f78, %f84;
	@!%p28 bra 	$Lt_11_20738;
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f81, %f81, %f84;
$Lt_11_20738:
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	mov.f32 	%f85, %f14;
	setp.lt.ftz.f32 	%p29, %f78, %f85;
	@!%p29 bra 	$Lt_11_21250;
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f81, %f81, %f85;
$Lt_11_21250:
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	mov.f32 	%f86, %f16;
	setp.lt.ftz.f32 	%p30, %f78, %f86;
	@!%p30 bra 	$Lt_11_21762;
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f81, %f81, %f86;
$Lt_11_21762:
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	mov.f32 	%f87, %f18;
	setp.lt.ftz.f32 	%p31, %f78, %f87;
	@!%p31 bra 	$Lt_11_22274;
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f81, %f81, %f87;
$Lt_11_22274:
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	mov.f32 	%f88, %f20;
	setp.lt.ftz.f32 	%p32, %f78, %f88;
	@!%p32 bra 	$Lt_11_22786;
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f81, %f81, %f88;
$Lt_11_22786:
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	mov.s32 	%r3, 0;
	setp.eq.s32 	%p33, %r2, %r3;
	add.s32 	%r4, %r1, 8;
	cvt.rn.f32.s32 	%f89, %r4;
	setp.le.ftz.f32 	%p34, %f89, %f81;
	@!%p34 bra 	$Lt_11_23554;
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	@!%p33 bra 	$Lt_11_23298;
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	.loc	18	68	0
	mov.u32 	%r5, 0;
	st.u16 	[%rd2+6], %r5;
	mov.u32 	%r6, 0;
	st.u16 	[%rd2+14], %r6;
	mov.u32 	%r7, 0;
	st.u16 	[%rd2+22], %r7;
	mov.u32 	%r8, 0;
	st.u16 	[%rd2+30], %r8;
	mov.u32 	%r9, 0;
	st.u16 	[%rd2+38], %r9;
	mov.u32 	%r10, 0;
	st.u16 	[%rd2+46], %r10;
	mov.u32 	%r11, 0;
	st.u16 	[%rd2+54], %r11;
	mov.u32 	%r12, 0;
	st.u16 	[%rd2+62], %r12;
	bra.uni 	$Lt_11_23298;
$Lt_11_23554:
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	@!%p33 bra 	$Lt_11_24322;
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	.loc	18	72	0
	mov.u32 	%r13, 0;
	st.u16 	[%rd2+6], %r13;
$Lt_11_24322:
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	mov.s32 	%r14, 1;
$Lt_11_25346:
 //<loop> Loop body line 72, nesting depth: 2, iterations: 7
	.loc	18	75	0
	add.s32 	%r15, %r14, %r1;
	cvt.rn.f32.s32 	%f90, %r15;
	mov.f32 	%f91, %f14;
	setp.gt.ftz.f32 	%p35, %f91, %f90;
	mov.f32 	%f92, %f12;
	setp.gt.ftz.f32 	%p36, %f92, %f90;
	mov.f32 	%f93, %f10;
	setp.gt.ftz.f32 	%p37, %f93, %f90;
	mov.f32 	%f94, %f8;
	setp.gt.ftz.f32 	%p38, %f94, %f90;
	mov.f32 	%f95, %f6;
	setp.gt.ftz.f32 	%p39, %f95, %f90;
	selp.s32 	%r16, 1, 0, %p39;
	xor.b32 	%r17, %r16, 1;
	selp.s32 	%r18, %r17, %r16, %p38;
	xor.b32 	%r19, %r18, 1;
	selp.s32 	%r20, %r19, %r18, %p37;
	xor.b32 	%r21, %r20, 1;
	selp.s32 	%r22, %r21, %r20, %p36;
	xor.b32 	%r23, %r22, 1;
	selp.s32 	%r24, %r23, %r22, %p35;
	xor.b32 	%r25, %r24, 1;
	mov.f32 	%f96, %f16;
	setp.gt.ftz.f32 	%p40, %f96, %f90;
	selp.s32 	%r26, %r25, %r24, %p40;
	xor.b32 	%r27, %r26, 1;
	mov.f32 	%f97, %f18;
	setp.gt.ftz.f32 	%p41, %f97, %f90;
	selp.s32 	%r2, %r27, %r26, %p41;
	xor.b32 	%r28, %r2, 1;
	mov.f32 	%f98, %f20;
	setp.gt.ftz.f32 	%p42, %f98, %f90;
	selp.s32 	%r29, %r28, %r2, %p42;
	mov.u32 	%r30, 0;
	setp.ne.s32 	%p43, %r29, %r30;
	@%p43 bra 	$Lt_11_25602;
 //<loop> Part of loop body line 72, head labeled $Lt_11_25346
	.loc	18	76	0
	mov.u32 	%r31, 0;
	cvt.s64.s32 	%rd5, %r14;
	mul.wide.s32 	%rd6, %r14, 8;
	add.u64 	%rd7, %rd2, %rd6;
	st.u16 	[%rd7+6], %r31;
$Lt_11_25602:
 //<loop> Part of loop body line 72, head labeled $Lt_11_25346
	add.s32 	%r14, %r14, 1;
	mov.u32 	%r32, 8;
	setp.ne.s32 	%p44, %r14, %r32;
	@%p44 bra 	$Lt_11_25346;
$Lt_11_23298:
 //<loop> Part of loop body line 52, head labeled $Lt_11_18946
	.loc	18	80	0
	add.u64 	%rd2, %rd2, 64;
	mov.s32 	%r1, %r4;
	mov.u32 	%r33, 31;
	setp.le.s32 	%p45, %r4, %r33;
	@%p45 bra 	$Lt_11_18946;
	.loc	18	82	0
	ret;
$LDWend__Z13FillSegment32ILi8E7ushort4EvPT0_6float2P6float3:
	} // _Z13FillSegment32ILi8E7ushort4EvPT0_6float2P6float3

	.visible .func _Z13FillSegment32ILi4E7ushort4EvPT0_6float2P6float3 (.param .u64 __cudaparmf1__Z13FillSegment32ILi4E7ushort4EvPT0_6float2P6float3, .param .align 8 .b8 __cudaparmf2__Z13FillSegment32ILi4E7ushort4EvPT0_6float2P6float3[8], .param .u64 __cudaparmf3__Z13FillSegment32ILi4E7ushort4EvPT0_6float2P6float3)
	{
	.reg .u32 %r<28>;
	.reg .u64 %rd<9>;
	.reg .f32 %f<56>;
	.reg .pred %p<27>;
	// __cuda_local_var_91085_8_non_const_dist = 0
	.loc	18	46	0
$LDWbegin__Z13FillSegment32ILi4E7ushort4EvPT0_6float2P6float3:
	ld.param.u64 	%rd1, [__cudaparmf1__Z13FillSegment32ILi4E7ushort4EvPT0_6float2P6float3];
	mov.s64 	%rd2, %rd1;
	ld.param.f32 	%f1, [__cudaparmf2__Z13FillSegment32ILi4E7ushort4EvPT0_6float2P6float3+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z13FillSegment32ILi4E7ushort4EvPT0_6float2P6float3+4];
	mov.f32 	%f4, %f3;
	ld.param.u64 	%rd3, [__cudaparmf3__Z13FillSegment32ILi4E7ushort4EvPT0_6float2P6float3];
	mov.s64 	%rd4, %rd3;
	.loc	18	51	0
	mov.f32 	%f5, 0fbf800000;     	// -1
	mov.f32 	%f6, %f5;
	mov.f32 	%f7, 0fbf800000;     	// -1
	mov.f32 	%f8, %f7;
	mov.f32 	%f9, 0fbf800000;     	// -1
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fbf800000;    	// -1
	mov.f32 	%f12, %f11;
	.loc	18	24	0
	ld.f32 	%f13, [%rd4+4];
	ld.f32 	%f14, [%rd4+16];
	setp.gt.ftz.f32 	%p1, %f14, %f4;
	setp.le.ftz.f32 	%p2, %f13, %f4;
	xor.pred 	%p3, %p1, %p2;
	@%p3 bra 	$Lt_12_14338;
	.loc	18	41	0
	ld.f32 	%f15, [%rd4+0];
	sub.ftz.f32 	%f16, %f2, %f15;
	ld.f32 	%f17, [%rd4+8];
	sub.ftz.f32 	%f18, %f4, %f13;
	mul.ftz.f32 	%f19, %f17, %f18;
	sub.ftz.f32 	%f20, %f19, %f16;
	mov.f32 	%f6, %f20;
$Lt_12_14338:
	ld.f32 	%f21, [%rd4+28];
	setp.gt.ftz.f32 	%p4, %f21, %f4;
	setp.le.ftz.f32 	%p5, %f14, %f4;
	xor.pred 	%p6, %p4, %p5;
	@%p6 bra 	$Lt_12_14850;
	ld.f32 	%f22, [%rd4+12];
	sub.ftz.f32 	%f23, %f2, %f22;
	ld.f32 	%f24, [%rd4+20];
	sub.ftz.f32 	%f25, %f4, %f14;
	mul.ftz.f32 	%f26, %f24, %f25;
	sub.ftz.f32 	%f27, %f26, %f23;
	mov.f32 	%f8, %f27;
$Lt_12_14850:
	ld.f32 	%f28, [%rd4+40];
	setp.gt.ftz.f32 	%p7, %f28, %f4;
	setp.le.ftz.f32 	%p8, %f21, %f4;
	xor.pred 	%p9, %p7, %p8;
	@%p9 bra 	$Lt_12_15362;
	ld.f32 	%f29, [%rd4+24];
	sub.ftz.f32 	%f30, %f2, %f29;
	ld.f32 	%f31, [%rd4+32];
	sub.ftz.f32 	%f32, %f4, %f21;
	mul.ftz.f32 	%f33, %f31, %f32;
	sub.ftz.f32 	%f34, %f33, %f30;
	mov.f32 	%f10, %f34;
$Lt_12_15362:
	ld.f32 	%f35, [%rd4+52];
	setp.gt.ftz.f32 	%p10, %f35, %f4;
	setp.le.ftz.f32 	%p11, %f28, %f4;
	xor.pred 	%p12, %p10, %p11;
	@%p12 bra 	$Lt_12_15874;
	ld.f32 	%f36, [%rd4+36];
	sub.ftz.f32 	%f37, %f2, %f36;
	ld.f32 	%f38, [%rd4+44];
	sub.ftz.f32 	%f39, %f4, %f28;
	mul.ftz.f32 	%f40, %f38, %f39;
	sub.ftz.f32 	%f41, %f40, %f37;
	mov.f32 	%f12, %f41;
$Lt_12_15874:
	.loc	18	52	0
	mov.s32 	%r1, 0;
$Lt_12_16898:
 //<loop> Loop body line 52, nesting depth: 1, iterations: 4
	cvt.rn.f32.s32 	%f42, %r1;
	mov.f32 	%f43, %f6;
	setp.lt.ftz.f32 	%p13, %f42, %f43;
	@!%p13 bra 	$Lt_12_17410;
 //<loop> Part of loop body line 52, head labeled $Lt_12_16898
	.loc	18	62	0
	mov.f32 	%f44, 0f47c35000;    	// 100000
	min.ftz.f32 	%f45, %f43, %f44;
	mov.s32 	%r2, 1;
	bra.uni 	$Lt_12_17154;
$Lt_12_17410:
 //<loop> Part of loop body line 52, head labeled $Lt_12_16898
	mov.s32 	%r2, 0;
	mov.f32 	%f45, 0f47c35000;    	// 100000
$Lt_12_17154:
 //<loop> Part of loop body line 52, head labeled $Lt_12_16898
	mov.f32 	%f46, %f8;
	setp.lt.ftz.f32 	%p14, %f42, %f46;
	@!%p14 bra 	$Lt_12_17666;
 //<loop> Part of loop body line 52, head labeled $Lt_12_16898
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f45, %f45, %f46;
$Lt_12_17666:
 //<loop> Part of loop body line 52, head labeled $Lt_12_16898
	mov.f32 	%f47, %f10;
	setp.lt.ftz.f32 	%p15, %f42, %f47;
	@!%p15 bra 	$Lt_12_18178;
 //<loop> Part of loop body line 52, head labeled $Lt_12_16898
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f45, %f45, %f47;
$Lt_12_18178:
 //<loop> Part of loop body line 52, head labeled $Lt_12_16898
	mov.f32 	%f48, %f12;
	setp.lt.ftz.f32 	%p16, %f42, %f48;
	@!%p16 bra 	$Lt_12_18690;
 //<loop> Part of loop body line 52, head labeled $Lt_12_16898
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f45, %f45, %f48;
$Lt_12_18690:
 //<loop> Part of loop body line 52, head labeled $Lt_12_16898
	mov.s32 	%r3, 0;
	setp.eq.s32 	%p17, %r2, %r3;
	add.s32 	%r4, %r1, 8;
	cvt.rn.f32.s32 	%f49, %r4;
	setp.le.ftz.f32 	%p18, %f49, %f45;
	@!%p18 bra 	$Lt_12_19458;
 //<loop> Part of loop body line 52, head labeled $Lt_12_16898
	@!%p17 bra 	$Lt_12_19202;
 //<loop> Part of loop body line 52, head labeled $Lt_12_16898
	.loc	18	68	0
	mov.u32 	%r5, 0;
	st.u16 	[%rd2+6], %r5;
	mov.u32 	%r6, 0;
	st.u16 	[%rd2+14], %r6;
	mov.u32 	%r7, 0;
	st.u16 	[%rd2+22], %r7;
	mov.u32 	%r8, 0;
	st.u16 	[%rd2+30], %r8;
	mov.u32 	%r9, 0;
	st.u16 	[%rd2+38], %r9;
	mov.u32 	%r10, 0;
	st.u16 	[%rd2+46], %r10;
	mov.u32 	%r11, 0;
	st.u16 	[%rd2+54], %r11;
	mov.u32 	%r12, 0;
	st.u16 	[%rd2+62], %r12;
	bra.uni 	$Lt_12_19202;
$Lt_12_19458:
 //<loop> Part of loop body line 52, head labeled $Lt_12_16898
	@!%p17 bra 	$Lt_12_20226;
 //<loop> Part of loop body line 52, head labeled $Lt_12_16898
	.loc	18	72	0
	mov.u32 	%r13, 0;
	st.u16 	[%rd2+6], %r13;
$Lt_12_20226:
 //<loop> Part of loop body line 52, head labeled $Lt_12_16898
	mov.s32 	%r14, 1;
$Lt_12_21250:
 //<loop> Loop body line 72, nesting depth: 2, iterations: 7
	add.s32 	%r15, %r14, %r1;
	cvt.rn.f32.s32 	%f50, %r15;
	mov.f32 	%f51, %f10;
	setp.gt.ftz.f32 	%p19, %f51, %f50;
	mov.f32 	%f52, %f8;
	setp.gt.ftz.f32 	%p20, %f52, %f50;
	mov.f32 	%f53, %f6;
	setp.gt.ftz.f32 	%p21, %f53, %f50;
	selp.s32 	%r16, 1, 0, %p21;
	xor.b32 	%r17, %r16, 1;
	selp.s32 	%r18, %r17, %r16, %p20;
	xor.b32 	%r19, %r18, 1;
	selp.s32 	%r20, %r19, %r18, %p19;
	xor.b32 	%r21, %r20, 1;
	mov.f32 	%f54, %f12;
	setp.gt.ftz.f32 	%p22, %f54, %f50;
	selp.s32 	%r22, %r21, %r20, %p22;
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p23, %r22, %r23;
	@%p23 bra 	$Lt_12_21506;
 //<loop> Part of loop body line 72, head labeled $Lt_12_21250
	.loc	18	76	0
	mov.u32 	%r24, 0;
	cvt.s64.s32 	%rd5, %r14;
	mul.wide.s32 	%rd6, %r14, 8;
	add.u64 	%rd7, %rd2, %rd6;
	st.u16 	[%rd7+6], %r24;
$Lt_12_21506:
 //<loop> Part of loop body line 72, head labeled $Lt_12_21250
	add.s32 	%r14, %r14, 1;
	mov.u32 	%r25, 8;
	setp.ne.s32 	%p24, %r14, %r25;
	@%p24 bra 	$Lt_12_21250;
$Lt_12_19202:
 //<loop> Part of loop body line 52, head labeled $Lt_12_16898
	.loc	18	80	0
	add.u64 	%rd2, %rd2, 64;
	mov.s32 	%r1, %r4;
	mov.u32 	%r26, 31;
	setp.le.s32 	%p25, %r4, %r26;
	@%p25 bra 	$Lt_12_16898;
	.loc	18	82	0
	ret;
$LDWend__Z13FillSegment32ILi4E7ushort4EvPT0_6float2P6float3:
	} // _Z13FillSegment32ILi4E7ushort4EvPT0_6float2P6float3

	.visible .func _Z13FillSegment32ILi16E6float4EvPT0_6float2P6float3 (.param .u64 __cudaparmf1__Z13FillSegment32ILi16E6float4EvPT0_6float2P6float3, .param .align 8 .b8 __cudaparmf2__Z13FillSegment32ILi16E6float4EvPT0_6float2P6float3[8], .param .u64 __cudaparmf3__Z13FillSegment32ILi16E6float4EvPT0_6float2P6float3)
	{
	.reg .u32 %r<41>;
	.reg .u64 %rd<9>;
	.reg .f32 %f<198>;
	.reg .pred %p<87>;
	// __cuda_local_var_91085_8_non_const_dist = 0
	.loc	18	46	0
$LDWbegin__Z13FillSegment32ILi16E6float4EvPT0_6float2P6float3:
	ld.param.u64 	%rd1, [__cudaparmf1__Z13FillSegment32ILi16E6float4EvPT0_6float2P6float3];
	mov.s64 	%rd2, %rd1;
	ld.param.f32 	%f1, [__cudaparmf2__Z13FillSegment32ILi16E6float4EvPT0_6float2P6float3+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z13FillSegment32ILi16E6float4EvPT0_6float2P6float3+4];
	mov.f32 	%f4, %f3;
	ld.param.u64 	%rd3, [__cudaparmf3__Z13FillSegment32ILi16E6float4EvPT0_6float2P6float3];
	mov.s64 	%rd4, %rd3;
	.loc	18	51	0
	mov.f32 	%f5, 0fbf800000;     	// -1
	mov.f32 	%f6, %f5;
	mov.f32 	%f7, 0fbf800000;     	// -1
	mov.f32 	%f8, %f7;
	mov.f32 	%f9, 0fbf800000;     	// -1
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fbf800000;    	// -1
	mov.f32 	%f12, %f11;
	mov.f32 	%f13, 0fbf800000;    	// -1
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fbf800000;    	// -1
	mov.f32 	%f16, %f15;
	mov.f32 	%f17, 0fbf800000;    	// -1
	mov.f32 	%f18, %f17;
	mov.f32 	%f19, 0fbf800000;    	// -1
	mov.f32 	%f20, %f19;
	mov.f32 	%f21, 0fbf800000;    	// -1
	mov.f32 	%f22, %f21;
	mov.f32 	%f23, 0fbf800000;    	// -1
	mov.f32 	%f24, %f23;
	mov.f32 	%f25, 0fbf800000;    	// -1
	mov.f32 	%f26, %f25;
	mov.f32 	%f27, 0fbf800000;    	// -1
	mov.f32 	%f28, %f27;
	mov.f32 	%f29, 0fbf800000;    	// -1
	mov.f32 	%f30, %f29;
	mov.f32 	%f31, 0fbf800000;    	// -1
	mov.f32 	%f32, %f31;
	mov.f32 	%f33, 0fbf800000;    	// -1
	mov.f32 	%f34, %f33;
	mov.f32 	%f35, 0fbf800000;    	// -1
	mov.f32 	%f36, %f35;
	.loc	18	24	0
	ld.f32 	%f37, [%rd4+4];
	ld.f32 	%f38, [%rd4+16];
	setp.gt.ftz.f32 	%p1, %f38, %f4;
	setp.le.ftz.f32 	%p2, %f37, %f4;
	xor.pred 	%p3, %p1, %p2;
	@%p3 bra 	$Lt_13_14338;
	.loc	18	41	0
	ld.f32 	%f39, [%rd4+0];
	sub.ftz.f32 	%f40, %f2, %f39;
	ld.f32 	%f41, [%rd4+8];
	sub.ftz.f32 	%f42, %f4, %f37;
	mul.ftz.f32 	%f43, %f41, %f42;
	sub.ftz.f32 	%f44, %f43, %f40;
	mov.f32 	%f6, %f44;
$Lt_13_14338:
	ld.f32 	%f45, [%rd4+28];
	setp.gt.ftz.f32 	%p4, %f45, %f4;
	setp.le.ftz.f32 	%p5, %f38, %f4;
	xor.pred 	%p6, %p4, %p5;
	@%p6 bra 	$Lt_13_14850;
	ld.f32 	%f46, [%rd4+12];
	sub.ftz.f32 	%f47, %f2, %f46;
	ld.f32 	%f48, [%rd4+20];
	sub.ftz.f32 	%f49, %f4, %f38;
	mul.ftz.f32 	%f50, %f48, %f49;
	sub.ftz.f32 	%f51, %f50, %f47;
	mov.f32 	%f8, %f51;
$Lt_13_14850:
	ld.f32 	%f52, [%rd4+40];
	setp.gt.ftz.f32 	%p7, %f52, %f4;
	setp.le.ftz.f32 	%p8, %f45, %f4;
	xor.pred 	%p9, %p7, %p8;
	@%p9 bra 	$Lt_13_15362;
	ld.f32 	%f53, [%rd4+24];
	sub.ftz.f32 	%f54, %f2, %f53;
	ld.f32 	%f55, [%rd4+32];
	sub.ftz.f32 	%f56, %f4, %f45;
	mul.ftz.f32 	%f57, %f55, %f56;
	sub.ftz.f32 	%f58, %f57, %f54;
	mov.f32 	%f10, %f58;
$Lt_13_15362:
	ld.f32 	%f59, [%rd4+52];
	setp.gt.ftz.f32 	%p10, %f59, %f4;
	setp.le.ftz.f32 	%p11, %f52, %f4;
	xor.pred 	%p12, %p10, %p11;
	@%p12 bra 	$Lt_13_15874;
	ld.f32 	%f60, [%rd4+36];
	sub.ftz.f32 	%f61, %f2, %f60;
	ld.f32 	%f62, [%rd4+44];
	sub.ftz.f32 	%f63, %f4, %f52;
	mul.ftz.f32 	%f64, %f62, %f63;
	sub.ftz.f32 	%f65, %f64, %f61;
	mov.f32 	%f12, %f65;
$Lt_13_15874:
	ld.f32 	%f66, [%rd4+64];
	setp.gt.ftz.f32 	%p13, %f66, %f4;
	setp.le.ftz.f32 	%p14, %f59, %f4;
	xor.pred 	%p15, %p13, %p14;
	@%p15 bra 	$Lt_13_16386;
	ld.f32 	%f67, [%rd4+48];
	sub.ftz.f32 	%f68, %f2, %f67;
	ld.f32 	%f69, [%rd4+56];
	sub.ftz.f32 	%f70, %f4, %f59;
	mul.ftz.f32 	%f71, %f69, %f70;
	sub.ftz.f32 	%f72, %f71, %f68;
	mov.f32 	%f14, %f72;
$Lt_13_16386:
	ld.f32 	%f73, [%rd4+76];
	setp.gt.ftz.f32 	%p16, %f73, %f4;
	setp.le.ftz.f32 	%p17, %f66, %f4;
	xor.pred 	%p18, %p16, %p17;
	@%p18 bra 	$Lt_13_16898;
	ld.f32 	%f74, [%rd4+60];
	sub.ftz.f32 	%f75, %f2, %f74;
	ld.f32 	%f76, [%rd4+68];
	sub.ftz.f32 	%f77, %f4, %f66;
	mul.ftz.f32 	%f78, %f76, %f77;
	sub.ftz.f32 	%f79, %f78, %f75;
	mov.f32 	%f16, %f79;
$Lt_13_16898:
	ld.f32 	%f80, [%rd4+88];
	setp.gt.ftz.f32 	%p19, %f80, %f4;
	setp.le.ftz.f32 	%p20, %f73, %f4;
	xor.pred 	%p21, %p19, %p20;
	@%p21 bra 	$Lt_13_17410;
	ld.f32 	%f81, [%rd4+72];
	sub.ftz.f32 	%f82, %f2, %f81;
	ld.f32 	%f83, [%rd4+80];
	sub.ftz.f32 	%f84, %f4, %f73;
	mul.ftz.f32 	%f85, %f83, %f84;
	sub.ftz.f32 	%f86, %f85, %f82;
	mov.f32 	%f18, %f86;
$Lt_13_17410:
	ld.f32 	%f87, [%rd4+100];
	setp.gt.ftz.f32 	%p22, %f87, %f4;
	setp.le.ftz.f32 	%p23, %f80, %f4;
	xor.pred 	%p24, %p22, %p23;
	@%p24 bra 	$Lt_13_17922;
	ld.f32 	%f88, [%rd4+84];
	sub.ftz.f32 	%f89, %f2, %f88;
	ld.f32 	%f90, [%rd4+92];
	sub.ftz.f32 	%f91, %f4, %f80;
	mul.ftz.f32 	%f92, %f90, %f91;
	sub.ftz.f32 	%f93, %f92, %f89;
	mov.f32 	%f20, %f93;
$Lt_13_17922:
	ld.f32 	%f94, [%rd4+112];
	setp.gt.ftz.f32 	%p25, %f94, %f4;
	setp.le.ftz.f32 	%p26, %f87, %f4;
	xor.pred 	%p27, %p25, %p26;
	@%p27 bra 	$Lt_13_18434;
	ld.f32 	%f95, [%rd4+96];
	sub.ftz.f32 	%f96, %f2, %f95;
	ld.f32 	%f97, [%rd4+104];
	sub.ftz.f32 	%f98, %f4, %f87;
	mul.ftz.f32 	%f99, %f97, %f98;
	sub.ftz.f32 	%f100, %f99, %f96;
	mov.f32 	%f22, %f100;
$Lt_13_18434:
	ld.f32 	%f101, [%rd4+124];
	setp.gt.ftz.f32 	%p28, %f101, %f4;
	setp.le.ftz.f32 	%p29, %f94, %f4;
	xor.pred 	%p30, %p28, %p29;
	@%p30 bra 	$Lt_13_18946;
	ld.f32 	%f102, [%rd4+108];
	sub.ftz.f32 	%f103, %f2, %f102;
	ld.f32 	%f104, [%rd4+116];
	sub.ftz.f32 	%f105, %f4, %f94;
	mul.ftz.f32 	%f106, %f104, %f105;
	sub.ftz.f32 	%f107, %f106, %f103;
	mov.f32 	%f24, %f107;
$Lt_13_18946:
	ld.f32 	%f108, [%rd4+136];
	setp.gt.ftz.f32 	%p31, %f108, %f4;
	setp.le.ftz.f32 	%p32, %f101, %f4;
	xor.pred 	%p33, %p31, %p32;
	@%p33 bra 	$Lt_13_19458;
	ld.f32 	%f109, [%rd4+120];
	sub.ftz.f32 	%f110, %f2, %f109;
	ld.f32 	%f111, [%rd4+128];
	sub.ftz.f32 	%f112, %f4, %f101;
	mul.ftz.f32 	%f113, %f111, %f112;
	sub.ftz.f32 	%f114, %f113, %f110;
	mov.f32 	%f26, %f114;
$Lt_13_19458:
	ld.f32 	%f115, [%rd4+148];
	setp.gt.ftz.f32 	%p34, %f115, %f4;
	setp.le.ftz.f32 	%p35, %f108, %f4;
	xor.pred 	%p36, %p34, %p35;
	@%p36 bra 	$Lt_13_19970;
	ld.f32 	%f116, [%rd4+132];
	sub.ftz.f32 	%f117, %f2, %f116;
	ld.f32 	%f118, [%rd4+140];
	sub.ftz.f32 	%f119, %f4, %f108;
	mul.ftz.f32 	%f120, %f118, %f119;
	sub.ftz.f32 	%f121, %f120, %f117;
	mov.f32 	%f28, %f121;
$Lt_13_19970:
	ld.f32 	%f122, [%rd4+160];
	setp.gt.ftz.f32 	%p37, %f122, %f4;
	setp.le.ftz.f32 	%p38, %f115, %f4;
	xor.pred 	%p39, %p37, %p38;
	@%p39 bra 	$Lt_13_20482;
	ld.f32 	%f123, [%rd4+144];
	sub.ftz.f32 	%f124, %f2, %f123;
	ld.f32 	%f125, [%rd4+152];
	sub.ftz.f32 	%f126, %f4, %f115;
	mul.ftz.f32 	%f127, %f125, %f126;
	sub.ftz.f32 	%f128, %f127, %f124;
	mov.f32 	%f30, %f128;
$Lt_13_20482:
	ld.f32 	%f129, [%rd4+172];
	setp.gt.ftz.f32 	%p40, %f129, %f4;
	setp.le.ftz.f32 	%p41, %f122, %f4;
	xor.pred 	%p42, %p40, %p41;
	@%p42 bra 	$Lt_13_20994;
	ld.f32 	%f130, [%rd4+156];
	sub.ftz.f32 	%f131, %f2, %f130;
	ld.f32 	%f132, [%rd4+164];
	sub.ftz.f32 	%f133, %f4, %f122;
	mul.ftz.f32 	%f134, %f132, %f133;
	sub.ftz.f32 	%f135, %f134, %f131;
	mov.f32 	%f32, %f135;
$Lt_13_20994:
	ld.f32 	%f136, [%rd4+184];
	setp.gt.ftz.f32 	%p43, %f136, %f4;
	setp.le.ftz.f32 	%p44, %f129, %f4;
	xor.pred 	%p45, %p43, %p44;
	@%p45 bra 	$Lt_13_21506;
	ld.f32 	%f137, [%rd4+168];
	sub.ftz.f32 	%f138, %f2, %f137;
	ld.f32 	%f139, [%rd4+176];
	sub.ftz.f32 	%f140, %f4, %f129;
	mul.ftz.f32 	%f141, %f139, %f140;
	sub.ftz.f32 	%f142, %f141, %f138;
	mov.f32 	%f34, %f142;
$Lt_13_21506:
	ld.f32 	%f143, [%rd4+196];
	setp.gt.ftz.f32 	%p46, %f143, %f4;
	setp.le.ftz.f32 	%p47, %f136, %f4;
	xor.pred 	%p48, %p46, %p47;
	@%p48 bra 	$Lt_13_22018;
	ld.f32 	%f144, [%rd4+180];
	sub.ftz.f32 	%f145, %f2, %f144;
	ld.f32 	%f146, [%rd4+188];
	sub.ftz.f32 	%f147, %f4, %f136;
	mul.ftz.f32 	%f148, %f146, %f147;
	sub.ftz.f32 	%f149, %f148, %f145;
	mov.f32 	%f36, %f149;
$Lt_13_22018:
	.loc	18	52	0
	mov.s32 	%r1, 0;
$Lt_13_23042:
 //<loop> Loop body line 52, nesting depth: 1, iterations: 4
	cvt.rn.f32.s32 	%f150, %r1;
	mov.f32 	%f151, %f6;
	setp.lt.ftz.f32 	%p49, %f150, %f151;
	@!%p49 bra 	$Lt_13_23554;
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	.loc	18	62	0
	mov.f32 	%f152, 0f47c35000;   	// 100000
	min.ftz.f32 	%f153, %f151, %f152;
	mov.s32 	%r2, 1;
	bra.uni 	$Lt_13_23298;
$Lt_13_23554:
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	mov.s32 	%r2, 0;
	mov.f32 	%f153, 0f47c35000;   	// 100000
$Lt_13_23298:
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	mov.f32 	%f154, %f8;
	setp.lt.ftz.f32 	%p50, %f150, %f154;
	@!%p50 bra 	$Lt_13_23810;
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f154;
$Lt_13_23810:
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	mov.f32 	%f155, %f10;
	setp.lt.ftz.f32 	%p51, %f150, %f155;
	@!%p51 bra 	$Lt_13_24322;
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f155;
$Lt_13_24322:
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	mov.f32 	%f156, %f12;
	setp.lt.ftz.f32 	%p52, %f150, %f156;
	@!%p52 bra 	$Lt_13_24834;
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f156;
$Lt_13_24834:
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	mov.f32 	%f157, %f14;
	setp.lt.ftz.f32 	%p53, %f150, %f157;
	@!%p53 bra 	$Lt_13_25346;
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f157;
$Lt_13_25346:
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	mov.f32 	%f158, %f16;
	setp.lt.ftz.f32 	%p54, %f150, %f158;
	@!%p54 bra 	$Lt_13_25858;
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f158;
$Lt_13_25858:
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	mov.f32 	%f159, %f18;
	setp.lt.ftz.f32 	%p55, %f150, %f159;
	@!%p55 bra 	$Lt_13_26370;
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f159;
$Lt_13_26370:
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	mov.f32 	%f160, %f20;
	setp.lt.ftz.f32 	%p56, %f150, %f160;
	@!%p56 bra 	$Lt_13_26882;
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f160;
$Lt_13_26882:
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	mov.f32 	%f161, %f22;
	setp.lt.ftz.f32 	%p57, %f150, %f161;
	@!%p57 bra 	$Lt_13_27394;
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f161;
$Lt_13_27394:
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	mov.f32 	%f162, %f24;
	setp.lt.ftz.f32 	%p58, %f150, %f162;
	@!%p58 bra 	$Lt_13_27906;
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f162;
$Lt_13_27906:
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	mov.f32 	%f163, %f26;
	setp.lt.ftz.f32 	%p59, %f150, %f163;
	@!%p59 bra 	$Lt_13_28418;
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f163;
$Lt_13_28418:
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	mov.f32 	%f164, %f28;
	setp.lt.ftz.f32 	%p60, %f150, %f164;
	@!%p60 bra 	$Lt_13_28930;
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f164;
$Lt_13_28930:
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	mov.f32 	%f165, %f30;
	setp.lt.ftz.f32 	%p61, %f150, %f165;
	@!%p61 bra 	$Lt_13_29442;
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f165;
$Lt_13_29442:
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	mov.f32 	%f166, %f32;
	setp.lt.ftz.f32 	%p62, %f150, %f166;
	@!%p62 bra 	$Lt_13_29954;
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f166;
$Lt_13_29954:
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	mov.f32 	%f167, %f34;
	setp.lt.ftz.f32 	%p63, %f150, %f167;
	@!%p63 bra 	$Lt_13_30466;
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f167;
$Lt_13_30466:
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	mov.f32 	%f168, %f36;
	setp.lt.ftz.f32 	%p64, %f150, %f168;
	@!%p64 bra 	$Lt_13_30978;
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f153, %f153, %f168;
$Lt_13_30978:
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	mov.s32 	%r3, 0;
	setp.eq.s32 	%p65, %r2, %r3;
	add.s32 	%r4, %r1, 8;
	cvt.rn.f32.s32 	%f169, %r4;
	setp.le.ftz.f32 	%p66, %f169, %f153;
	@!%p66 bra 	$Lt_13_31746;
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	@!%p65 bra 	$Lt_13_31490;
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	.loc	18	68	0
	mov.f32 	%f170, 0f00000000;   	// 0
	st.f32 	[%rd2+12], %f170;
	mov.f32 	%f171, 0f00000000;   	// 0
	st.f32 	[%rd2+28], %f171;
	mov.f32 	%f172, 0f00000000;   	// 0
	st.f32 	[%rd2+44], %f172;
	mov.f32 	%f173, 0f00000000;   	// 0
	st.f32 	[%rd2+60], %f173;
	mov.f32 	%f174, 0f00000000;   	// 0
	st.f32 	[%rd2+76], %f174;
	mov.f32 	%f175, 0f00000000;   	// 0
	st.f32 	[%rd2+92], %f175;
	mov.f32 	%f176, 0f00000000;   	// 0
	st.f32 	[%rd2+108], %f176;
	mov.f32 	%f177, 0f00000000;   	// 0
	st.f32 	[%rd2+124], %f177;
	bra.uni 	$Lt_13_31490;
$Lt_13_31746:
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	@!%p65 bra 	$Lt_13_32514;
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	.loc	18	72	0
	mov.f32 	%f178, 0f00000000;   	// 0
	st.f32 	[%rd2+12], %f178;
$Lt_13_32514:
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	mov.s32 	%r5, 1;
$Lt_13_33538:
 //<loop> Loop body line 72, nesting depth: 2, iterations: 7
	.loc	18	75	0
	add.s32 	%r6, %r5, %r1;
	cvt.rn.f32.s32 	%f179, %r6;
	mov.f32 	%f180, %f14;
	setp.gt.ftz.f32 	%p67, %f180, %f179;
	mov.f32 	%f181, %f12;
	setp.gt.ftz.f32 	%p68, %f181, %f179;
	mov.f32 	%f182, %f10;
	setp.gt.ftz.f32 	%p69, %f182, %f179;
	mov.f32 	%f183, %f8;
	setp.gt.ftz.f32 	%p70, %f183, %f179;
	mov.f32 	%f184, %f6;
	setp.gt.ftz.f32 	%p71, %f184, %f179;
	selp.s32 	%r7, 1, 0, %p71;
	xor.b32 	%r8, %r7, 1;
	selp.s32 	%r9, %r8, %r7, %p70;
	xor.b32 	%r10, %r9, 1;
	selp.s32 	%r11, %r10, %r9, %p69;
	xor.b32 	%r12, %r11, 1;
	selp.s32 	%r13, %r12, %r11, %p68;
	xor.b32 	%r14, %r13, 1;
	selp.s32 	%r15, %r14, %r13, %p67;
	xor.b32 	%r16, %r15, 1;
	mov.f32 	%f185, %f16;
	setp.gt.ftz.f32 	%p72, %f185, %f179;
	selp.s32 	%r17, %r16, %r15, %p72;
	xor.b32 	%r18, %r17, 1;
	mov.f32 	%f186, %f18;
	setp.gt.ftz.f32 	%p73, %f186, %f179;
	selp.s32 	%r19, %r18, %r17, %p73;
	xor.b32 	%r20, %r19, 1;
	mov.f32 	%f187, %f20;
	setp.gt.ftz.f32 	%p74, %f187, %f179;
	selp.s32 	%r21, %r20, %r19, %p74;
	xor.b32 	%r22, %r21, 1;
	mov.f32 	%f188, %f22;
	setp.gt.ftz.f32 	%p75, %f188, %f179;
	selp.s32 	%r23, %r22, %r21, %p75;
	xor.b32 	%r24, %r23, 1;
	mov.f32 	%f189, %f24;
	setp.gt.ftz.f32 	%p76, %f189, %f179;
	selp.s32 	%r25, %r24, %r23, %p76;
	xor.b32 	%r26, %r25, 1;
	mov.f32 	%f190, %f26;
	setp.gt.ftz.f32 	%p77, %f190, %f179;
	selp.s32 	%r27, %r26, %r25, %p77;
	xor.b32 	%r28, %r27, 1;
	mov.f32 	%f191, %f28;
	setp.gt.ftz.f32 	%p78, %f191, %f179;
	selp.s32 	%r29, %r28, %r27, %p78;
	xor.b32 	%r30, %r29, 1;
	mov.f32 	%f192, %f30;
	setp.gt.ftz.f32 	%p79, %f192, %f179;
	selp.s32 	%r31, %r30, %r29, %p79;
	xor.b32 	%r32, %r31, 1;
	mov.f32 	%f193, %f32;
	setp.gt.ftz.f32 	%p80, %f193, %f179;
	selp.s32 	%r33, %r32, %r31, %p80;
	xor.b32 	%r34, %r33, 1;
	mov.f32 	%f194, %f34;
	setp.gt.ftz.f32 	%p81, %f194, %f179;
	selp.s32 	%r2, %r34, %r33, %p81;
	xor.b32 	%r35, %r2, 1;
	mov.f32 	%f195, %f36;
	setp.gt.ftz.f32 	%p82, %f195, %f179;
	selp.s32 	%r36, %r35, %r2, %p82;
	mov.u32 	%r37, 0;
	setp.ne.s32 	%p83, %r36, %r37;
	@%p83 bra 	$Lt_13_33794;
 //<loop> Part of loop body line 72, head labeled $Lt_13_33538
	.loc	18	76	0
	mov.f32 	%f196, 0f00000000;   	// 0
	cvt.s64.s32 	%rd5, %r5;
	mul.wide.s32 	%rd6, %r5, 16;
	add.u64 	%rd7, %rd2, %rd6;
	st.f32 	[%rd7+12], %f196;
$Lt_13_33794:
 //<loop> Part of loop body line 72, head labeled $Lt_13_33538
	add.s32 	%r5, %r5, 1;
	mov.u32 	%r38, 8;
	setp.ne.s32 	%p84, %r5, %r38;
	@%p84 bra 	$Lt_13_33538;
$Lt_13_31490:
 //<loop> Part of loop body line 52, head labeled $Lt_13_23042
	.loc	18	80	0
	add.u64 	%rd2, %rd2, 128;
	mov.s32 	%r1, %r4;
	mov.u32 	%r39, 31;
	setp.le.s32 	%p85, %r4, %r39;
	@%p85 bra 	$Lt_13_23042;
	.loc	18	82	0
	ret;
$LDWend__Z13FillSegment32ILi16E6float4EvPT0_6float2P6float3:
	} // _Z13FillSegment32ILi16E6float4EvPT0_6float2P6float3

	.visible .func _Z13FillSegment32ILi8E6float4EvPT0_6float2P6float3 (.param .u64 __cudaparmf1__Z13FillSegment32ILi8E6float4EvPT0_6float2P6float3, .param .align 8 .b8 __cudaparmf2__Z13FillSegment32ILi8E6float4EvPT0_6float2P6float3[8], .param .u64 __cudaparmf3__Z13FillSegment32ILi8E6float4EvPT0_6float2P6float3)
	{
	.reg .u32 %r<25>;
	.reg .u64 %rd<9>;
	.reg .f32 %f<110>;
	.reg .pred %p<47>;
	// __cuda_local_var_91085_8_non_const_dist = 0
	.loc	18	46	0
$LDWbegin__Z13FillSegment32ILi8E6float4EvPT0_6float2P6float3:
	ld.param.u64 	%rd1, [__cudaparmf1__Z13FillSegment32ILi8E6float4EvPT0_6float2P6float3];
	mov.s64 	%rd2, %rd1;
	ld.param.f32 	%f1, [__cudaparmf2__Z13FillSegment32ILi8E6float4EvPT0_6float2P6float3+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z13FillSegment32ILi8E6float4EvPT0_6float2P6float3+4];
	mov.f32 	%f4, %f3;
	ld.param.u64 	%rd3, [__cudaparmf3__Z13FillSegment32ILi8E6float4EvPT0_6float2P6float3];
	mov.s64 	%rd4, %rd3;
	.loc	18	51	0
	mov.f32 	%f5, 0fbf800000;     	// -1
	mov.f32 	%f6, %f5;
	mov.f32 	%f7, 0fbf800000;     	// -1
	mov.f32 	%f8, %f7;
	mov.f32 	%f9, 0fbf800000;     	// -1
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fbf800000;    	// -1
	mov.f32 	%f12, %f11;
	mov.f32 	%f13, 0fbf800000;    	// -1
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fbf800000;    	// -1
	mov.f32 	%f16, %f15;
	mov.f32 	%f17, 0fbf800000;    	// -1
	mov.f32 	%f18, %f17;
	mov.f32 	%f19, 0fbf800000;    	// -1
	mov.f32 	%f20, %f19;
	.loc	18	24	0
	ld.f32 	%f21, [%rd4+4];
	ld.f32 	%f22, [%rd4+16];
	setp.gt.ftz.f32 	%p1, %f22, %f4;
	setp.le.ftz.f32 	%p2, %f21, %f4;
	xor.pred 	%p3, %p1, %p2;
	@%p3 bra 	$Lt_14_14338;
	.loc	18	41	0
	ld.f32 	%f23, [%rd4+0];
	sub.ftz.f32 	%f24, %f2, %f23;
	ld.f32 	%f25, [%rd4+8];
	sub.ftz.f32 	%f26, %f4, %f21;
	mul.ftz.f32 	%f27, %f25, %f26;
	sub.ftz.f32 	%f28, %f27, %f24;
	mov.f32 	%f6, %f28;
$Lt_14_14338:
	ld.f32 	%f29, [%rd4+28];
	setp.gt.ftz.f32 	%p4, %f29, %f4;
	setp.le.ftz.f32 	%p5, %f22, %f4;
	xor.pred 	%p6, %p4, %p5;
	@%p6 bra 	$Lt_14_14850;
	ld.f32 	%f30, [%rd4+12];
	sub.ftz.f32 	%f31, %f2, %f30;
	ld.f32 	%f32, [%rd4+20];
	sub.ftz.f32 	%f33, %f4, %f22;
	mul.ftz.f32 	%f34, %f32, %f33;
	sub.ftz.f32 	%f35, %f34, %f31;
	mov.f32 	%f8, %f35;
$Lt_14_14850:
	ld.f32 	%f36, [%rd4+40];
	setp.gt.ftz.f32 	%p7, %f36, %f4;
	setp.le.ftz.f32 	%p8, %f29, %f4;
	xor.pred 	%p9, %p7, %p8;
	@%p9 bra 	$Lt_14_15362;
	ld.f32 	%f37, [%rd4+24];
	sub.ftz.f32 	%f38, %f2, %f37;
	ld.f32 	%f39, [%rd4+32];
	sub.ftz.f32 	%f40, %f4, %f29;
	mul.ftz.f32 	%f41, %f39, %f40;
	sub.ftz.f32 	%f42, %f41, %f38;
	mov.f32 	%f10, %f42;
$Lt_14_15362:
	ld.f32 	%f43, [%rd4+52];
	setp.gt.ftz.f32 	%p10, %f43, %f4;
	setp.le.ftz.f32 	%p11, %f36, %f4;
	xor.pred 	%p12, %p10, %p11;
	@%p12 bra 	$Lt_14_15874;
	ld.f32 	%f44, [%rd4+36];
	sub.ftz.f32 	%f45, %f2, %f44;
	ld.f32 	%f46, [%rd4+44];
	sub.ftz.f32 	%f47, %f4, %f36;
	mul.ftz.f32 	%f48, %f46, %f47;
	sub.ftz.f32 	%f49, %f48, %f45;
	mov.f32 	%f12, %f49;
$Lt_14_15874:
	ld.f32 	%f50, [%rd4+64];
	setp.gt.ftz.f32 	%p13, %f50, %f4;
	setp.le.ftz.f32 	%p14, %f43, %f4;
	xor.pred 	%p15, %p13, %p14;
	@%p15 bra 	$Lt_14_16386;
	ld.f32 	%f51, [%rd4+48];
	sub.ftz.f32 	%f52, %f2, %f51;
	ld.f32 	%f53, [%rd4+56];
	sub.ftz.f32 	%f54, %f4, %f43;
	mul.ftz.f32 	%f55, %f53, %f54;
	sub.ftz.f32 	%f56, %f55, %f52;
	mov.f32 	%f14, %f56;
$Lt_14_16386:
	ld.f32 	%f57, [%rd4+76];
	setp.gt.ftz.f32 	%p16, %f57, %f4;
	setp.le.ftz.f32 	%p17, %f50, %f4;
	xor.pred 	%p18, %p16, %p17;
	@%p18 bra 	$Lt_14_16898;
	ld.f32 	%f58, [%rd4+60];
	sub.ftz.f32 	%f59, %f2, %f58;
	ld.f32 	%f60, [%rd4+68];
	sub.ftz.f32 	%f61, %f4, %f50;
	mul.ftz.f32 	%f62, %f60, %f61;
	sub.ftz.f32 	%f63, %f62, %f59;
	mov.f32 	%f16, %f63;
$Lt_14_16898:
	ld.f32 	%f64, [%rd4+88];
	setp.gt.ftz.f32 	%p19, %f64, %f4;
	setp.le.ftz.f32 	%p20, %f57, %f4;
	xor.pred 	%p21, %p19, %p20;
	@%p21 bra 	$Lt_14_17410;
	ld.f32 	%f65, [%rd4+72];
	sub.ftz.f32 	%f66, %f2, %f65;
	ld.f32 	%f67, [%rd4+80];
	sub.ftz.f32 	%f68, %f4, %f57;
	mul.ftz.f32 	%f69, %f67, %f68;
	sub.ftz.f32 	%f70, %f69, %f66;
	mov.f32 	%f18, %f70;
$Lt_14_17410:
	ld.f32 	%f71, [%rd4+100];
	setp.gt.ftz.f32 	%p22, %f71, %f4;
	setp.le.ftz.f32 	%p23, %f64, %f4;
	xor.pred 	%p24, %p22, %p23;
	@%p24 bra 	$Lt_14_17922;
	ld.f32 	%f72, [%rd4+84];
	sub.ftz.f32 	%f73, %f2, %f72;
	ld.f32 	%f74, [%rd4+92];
	sub.ftz.f32 	%f75, %f4, %f64;
	mul.ftz.f32 	%f76, %f74, %f75;
	sub.ftz.f32 	%f77, %f76, %f73;
	mov.f32 	%f20, %f77;
$Lt_14_17922:
	.loc	18	52	0
	mov.s32 	%r1, 0;
$Lt_14_18946:
 //<loop> Loop body line 52, nesting depth: 1, iterations: 4
	cvt.rn.f32.s32 	%f78, %r1;
	mov.f32 	%f79, %f6;
	setp.lt.ftz.f32 	%p25, %f78, %f79;
	@!%p25 bra 	$Lt_14_19458;
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	.loc	18	62	0
	mov.f32 	%f80, 0f47c35000;    	// 100000
	min.ftz.f32 	%f81, %f79, %f80;
	mov.s32 	%r2, 1;
	bra.uni 	$Lt_14_19202;
$Lt_14_19458:
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	mov.s32 	%r2, 0;
	mov.f32 	%f81, 0f47c35000;    	// 100000
$Lt_14_19202:
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	mov.f32 	%f82, %f8;
	setp.lt.ftz.f32 	%p26, %f78, %f82;
	@!%p26 bra 	$Lt_14_19714;
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f81, %f81, %f82;
$Lt_14_19714:
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	mov.f32 	%f83, %f10;
	setp.lt.ftz.f32 	%p27, %f78, %f83;
	@!%p27 bra 	$Lt_14_20226;
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f81, %f81, %f83;
$Lt_14_20226:
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	mov.f32 	%f84, %f12;
	setp.lt.ftz.f32 	%p28, %f78, %f84;
	@!%p28 bra 	$Lt_14_20738;
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f81, %f81, %f84;
$Lt_14_20738:
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	mov.f32 	%f85, %f14;
	setp.lt.ftz.f32 	%p29, %f78, %f85;
	@!%p29 bra 	$Lt_14_21250;
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f81, %f81, %f85;
$Lt_14_21250:
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	mov.f32 	%f86, %f16;
	setp.lt.ftz.f32 	%p30, %f78, %f86;
	@!%p30 bra 	$Lt_14_21762;
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f81, %f81, %f86;
$Lt_14_21762:
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	mov.f32 	%f87, %f18;
	setp.lt.ftz.f32 	%p31, %f78, %f87;
	@!%p31 bra 	$Lt_14_22274;
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f81, %f81, %f87;
$Lt_14_22274:
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	mov.f32 	%f88, %f20;
	setp.lt.ftz.f32 	%p32, %f78, %f88;
	@!%p32 bra 	$Lt_14_22786;
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f81, %f81, %f88;
$Lt_14_22786:
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	mov.s32 	%r3, 0;
	setp.eq.s32 	%p33, %r2, %r3;
	add.s32 	%r4, %r1, 8;
	cvt.rn.f32.s32 	%f89, %r4;
	setp.le.ftz.f32 	%p34, %f89, %f81;
	@!%p34 bra 	$Lt_14_23554;
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	@!%p33 bra 	$Lt_14_23298;
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	.loc	18	68	0
	mov.f32 	%f90, 0f00000000;    	// 0
	st.f32 	[%rd2+12], %f90;
	mov.f32 	%f91, 0f00000000;    	// 0
	st.f32 	[%rd2+28], %f91;
	mov.f32 	%f92, 0f00000000;    	// 0
	st.f32 	[%rd2+44], %f92;
	mov.f32 	%f93, 0f00000000;    	// 0
	st.f32 	[%rd2+60], %f93;
	mov.f32 	%f94, 0f00000000;    	// 0
	st.f32 	[%rd2+76], %f94;
	mov.f32 	%f95, 0f00000000;    	// 0
	st.f32 	[%rd2+92], %f95;
	mov.f32 	%f96, 0f00000000;    	// 0
	st.f32 	[%rd2+108], %f96;
	mov.f32 	%f97, 0f00000000;    	// 0
	st.f32 	[%rd2+124], %f97;
	bra.uni 	$Lt_14_23298;
$Lt_14_23554:
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	@!%p33 bra 	$Lt_14_24322;
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	.loc	18	72	0
	mov.f32 	%f98, 0f00000000;    	// 0
	st.f32 	[%rd2+12], %f98;
$Lt_14_24322:
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	mov.s32 	%r5, 1;
$Lt_14_25346:
 //<loop> Loop body line 72, nesting depth: 2, iterations: 7
	.loc	18	75	0
	add.s32 	%r6, %r5, %r1;
	cvt.rn.f32.s32 	%f99, %r6;
	mov.f32 	%f100, %f14;
	setp.gt.ftz.f32 	%p35, %f100, %f99;
	mov.f32 	%f101, %f12;
	setp.gt.ftz.f32 	%p36, %f101, %f99;
	mov.f32 	%f102, %f10;
	setp.gt.ftz.f32 	%p37, %f102, %f99;
	mov.f32 	%f103, %f8;
	setp.gt.ftz.f32 	%p38, %f103, %f99;
	mov.f32 	%f104, %f6;
	setp.gt.ftz.f32 	%p39, %f104, %f99;
	selp.s32 	%r7, 1, 0, %p39;
	xor.b32 	%r8, %r7, 1;
	selp.s32 	%r9, %r8, %r7, %p38;
	xor.b32 	%r10, %r9, 1;
	selp.s32 	%r11, %r10, %r9, %p37;
	xor.b32 	%r12, %r11, 1;
	selp.s32 	%r13, %r12, %r11, %p36;
	xor.b32 	%r14, %r13, 1;
	selp.s32 	%r15, %r14, %r13, %p35;
	xor.b32 	%r16, %r15, 1;
	mov.f32 	%f105, %f16;
	setp.gt.ftz.f32 	%p40, %f105, %f99;
	selp.s32 	%r17, %r16, %r15, %p40;
	xor.b32 	%r18, %r17, 1;
	mov.f32 	%f106, %f18;
	setp.gt.ftz.f32 	%p41, %f106, %f99;
	selp.s32 	%r2, %r18, %r17, %p41;
	xor.b32 	%r19, %r2, 1;
	mov.f32 	%f107, %f20;
	setp.gt.ftz.f32 	%p42, %f107, %f99;
	selp.s32 	%r20, %r19, %r2, %p42;
	mov.u32 	%r21, 0;
	setp.ne.s32 	%p43, %r20, %r21;
	@%p43 bra 	$Lt_14_25602;
 //<loop> Part of loop body line 72, head labeled $Lt_14_25346
	.loc	18	76	0
	mov.f32 	%f108, 0f00000000;   	// 0
	cvt.s64.s32 	%rd5, %r5;
	mul.wide.s32 	%rd6, %r5, 16;
	add.u64 	%rd7, %rd2, %rd6;
	st.f32 	[%rd7+12], %f108;
$Lt_14_25602:
 //<loop> Part of loop body line 72, head labeled $Lt_14_25346
	add.s32 	%r5, %r5, 1;
	mov.u32 	%r22, 8;
	setp.ne.s32 	%p44, %r5, %r22;
	@%p44 bra 	$Lt_14_25346;
$Lt_14_23298:
 //<loop> Part of loop body line 52, head labeled $Lt_14_18946
	.loc	18	80	0
	add.u64 	%rd2, %rd2, 128;
	mov.s32 	%r1, %r4;
	mov.u32 	%r23, 31;
	setp.le.s32 	%p45, %r4, %r23;
	@%p45 bra 	$Lt_14_18946;
	.loc	18	82	0
	ret;
$LDWend__Z13FillSegment32ILi8E6float4EvPT0_6float2P6float3:
	} // _Z13FillSegment32ILi8E6float4EvPT0_6float2P6float3

	.visible .func _Z13FillSegment32ILi4E6float4EvPT0_6float2P6float3 (.param .u64 __cudaparmf1__Z13FillSegment32ILi4E6float4EvPT0_6float2P6float3, .param .align 8 .b8 __cudaparmf2__Z13FillSegment32ILi4E6float4EvPT0_6float2P6float3[8], .param .u64 __cudaparmf3__Z13FillSegment32ILi4E6float4EvPT0_6float2P6float3)
	{
	.reg .u32 %r<18>;
	.reg .u64 %rd<9>;
	.reg .f32 %f<66>;
	.reg .pred %p<27>;
	// __cuda_local_var_91085_8_non_const_dist = 0
	.loc	18	46	0
$LDWbegin__Z13FillSegment32ILi4E6float4EvPT0_6float2P6float3:
	ld.param.u64 	%rd1, [__cudaparmf1__Z13FillSegment32ILi4E6float4EvPT0_6float2P6float3];
	mov.s64 	%rd2, %rd1;
	ld.param.f32 	%f1, [__cudaparmf2__Z13FillSegment32ILi4E6float4EvPT0_6float2P6float3+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z13FillSegment32ILi4E6float4EvPT0_6float2P6float3+4];
	mov.f32 	%f4, %f3;
	ld.param.u64 	%rd3, [__cudaparmf3__Z13FillSegment32ILi4E6float4EvPT0_6float2P6float3];
	mov.s64 	%rd4, %rd3;
	.loc	18	51	0
	mov.f32 	%f5, 0fbf800000;     	// -1
	mov.f32 	%f6, %f5;
	mov.f32 	%f7, 0fbf800000;     	// -1
	mov.f32 	%f8, %f7;
	mov.f32 	%f9, 0fbf800000;     	// -1
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fbf800000;    	// -1
	mov.f32 	%f12, %f11;
	.loc	18	24	0
	ld.f32 	%f13, [%rd4+4];
	ld.f32 	%f14, [%rd4+16];
	setp.gt.ftz.f32 	%p1, %f14, %f4;
	setp.le.ftz.f32 	%p2, %f13, %f4;
	xor.pred 	%p3, %p1, %p2;
	@%p3 bra 	$Lt_15_14338;
	.loc	18	41	0
	ld.f32 	%f15, [%rd4+0];
	sub.ftz.f32 	%f16, %f2, %f15;
	ld.f32 	%f17, [%rd4+8];
	sub.ftz.f32 	%f18, %f4, %f13;
	mul.ftz.f32 	%f19, %f17, %f18;
	sub.ftz.f32 	%f20, %f19, %f16;
	mov.f32 	%f6, %f20;
$Lt_15_14338:
	ld.f32 	%f21, [%rd4+28];
	setp.gt.ftz.f32 	%p4, %f21, %f4;
	setp.le.ftz.f32 	%p5, %f14, %f4;
	xor.pred 	%p6, %p4, %p5;
	@%p6 bra 	$Lt_15_14850;
	ld.f32 	%f22, [%rd4+12];
	sub.ftz.f32 	%f23, %f2, %f22;
	ld.f32 	%f24, [%rd4+20];
	sub.ftz.f32 	%f25, %f4, %f14;
	mul.ftz.f32 	%f26, %f24, %f25;
	sub.ftz.f32 	%f27, %f26, %f23;
	mov.f32 	%f8, %f27;
$Lt_15_14850:
	ld.f32 	%f28, [%rd4+40];
	setp.gt.ftz.f32 	%p7, %f28, %f4;
	setp.le.ftz.f32 	%p8, %f21, %f4;
	xor.pred 	%p9, %p7, %p8;
	@%p9 bra 	$Lt_15_15362;
	ld.f32 	%f29, [%rd4+24];
	sub.ftz.f32 	%f30, %f2, %f29;
	ld.f32 	%f31, [%rd4+32];
	sub.ftz.f32 	%f32, %f4, %f21;
	mul.ftz.f32 	%f33, %f31, %f32;
	sub.ftz.f32 	%f34, %f33, %f30;
	mov.f32 	%f10, %f34;
$Lt_15_15362:
	ld.f32 	%f35, [%rd4+52];
	setp.gt.ftz.f32 	%p10, %f35, %f4;
	setp.le.ftz.f32 	%p11, %f28, %f4;
	xor.pred 	%p12, %p10, %p11;
	@%p12 bra 	$Lt_15_15874;
	ld.f32 	%f36, [%rd4+36];
	sub.ftz.f32 	%f37, %f2, %f36;
	ld.f32 	%f38, [%rd4+44];
	sub.ftz.f32 	%f39, %f4, %f28;
	mul.ftz.f32 	%f40, %f38, %f39;
	sub.ftz.f32 	%f41, %f40, %f37;
	mov.f32 	%f12, %f41;
$Lt_15_15874:
	.loc	18	52	0
	mov.s32 	%r1, 0;
$Lt_15_16898:
 //<loop> Loop body line 52, nesting depth: 1, iterations: 4
	cvt.rn.f32.s32 	%f42, %r1;
	mov.f32 	%f43, %f6;
	setp.lt.ftz.f32 	%p13, %f42, %f43;
	@!%p13 bra 	$Lt_15_17410;
 //<loop> Part of loop body line 52, head labeled $Lt_15_16898
	.loc	18	62	0
	mov.f32 	%f44, 0f47c35000;    	// 100000
	min.ftz.f32 	%f45, %f43, %f44;
	mov.s32 	%r2, 1;
	bra.uni 	$Lt_15_17154;
$Lt_15_17410:
 //<loop> Part of loop body line 52, head labeled $Lt_15_16898
	mov.s32 	%r2, 0;
	mov.f32 	%f45, 0f47c35000;    	// 100000
$Lt_15_17154:
 //<loop> Part of loop body line 52, head labeled $Lt_15_16898
	mov.f32 	%f46, %f8;
	setp.lt.ftz.f32 	%p14, %f42, %f46;
	@!%p14 bra 	$Lt_15_17666;
 //<loop> Part of loop body line 52, head labeled $Lt_15_16898
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f45, %f45, %f46;
$Lt_15_17666:
 //<loop> Part of loop body line 52, head labeled $Lt_15_16898
	mov.f32 	%f47, %f10;
	setp.lt.ftz.f32 	%p15, %f42, %f47;
	@!%p15 bra 	$Lt_15_18178;
 //<loop> Part of loop body line 52, head labeled $Lt_15_16898
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f45, %f45, %f47;
$Lt_15_18178:
 //<loop> Part of loop body line 52, head labeled $Lt_15_16898
	mov.f32 	%f48, %f12;
	setp.lt.ftz.f32 	%p16, %f42, %f48;
	@!%p16 bra 	$Lt_15_18690;
 //<loop> Part of loop body line 52, head labeled $Lt_15_16898
	.loc	18	61	0
	xor.b32 	%r2, %r2, 1;
	.loc	18	62	0
	min.ftz.f32 	%f45, %f45, %f48;
$Lt_15_18690:
 //<loop> Part of loop body line 52, head labeled $Lt_15_16898
	mov.s32 	%r3, 0;
	setp.eq.s32 	%p17, %r2, %r3;
	add.s32 	%r4, %r1, 8;
	cvt.rn.f32.s32 	%f49, %r4;
	setp.le.ftz.f32 	%p18, %f49, %f45;
	@!%p18 bra 	$Lt_15_19458;
 //<loop> Part of loop body line 52, head labeled $Lt_15_16898
	@!%p17 bra 	$Lt_15_19202;
 //<loop> Part of loop body line 52, head labeled $Lt_15_16898
	.loc	18	68	0
	mov.f32 	%f50, 0f00000000;    	// 0
	st.f32 	[%rd2+12], %f50;
	mov.f32 	%f51, 0f00000000;    	// 0
	st.f32 	[%rd2+28], %f51;
	mov.f32 	%f52, 0f00000000;    	// 0
	st.f32 	[%rd2+44], %f52;
	mov.f32 	%f53, 0f00000000;    	// 0
	st.f32 	[%rd2+60], %f53;
	mov.f32 	%f54, 0f00000000;    	// 0
	st.f32 	[%rd2+76], %f54;
	mov.f32 	%f55, 0f00000000;    	// 0
	st.f32 	[%rd2+92], %f55;
	mov.f32 	%f56, 0f00000000;    	// 0
	st.f32 	[%rd2+108], %f56;
	mov.f32 	%f57, 0f00000000;    	// 0
	st.f32 	[%rd2+124], %f57;
	bra.uni 	$Lt_15_19202;
$Lt_15_19458:
 //<loop> Part of loop body line 52, head labeled $Lt_15_16898
	@!%p17 bra 	$Lt_15_20226;
 //<loop> Part of loop body line 52, head labeled $Lt_15_16898
	.loc	18	72	0
	mov.f32 	%f58, 0f00000000;    	// 0
	st.f32 	[%rd2+12], %f58;
$Lt_15_20226:
 //<loop> Part of loop body line 52, head labeled $Lt_15_16898
	mov.s32 	%r5, 1;
$Lt_15_21250:
 //<loop> Loop body line 72, nesting depth: 2, iterations: 7
	add.s32 	%r6, %r5, %r1;
	cvt.rn.f32.s32 	%f59, %r6;
	mov.f32 	%f60, %f10;
	setp.gt.ftz.f32 	%p19, %f60, %f59;
	mov.f32 	%f61, %f8;
	setp.gt.ftz.f32 	%p20, %f61, %f59;
	mov.f32 	%f62, %f6;
	setp.gt.ftz.f32 	%p21, %f62, %f59;
	selp.s32 	%r7, 1, 0, %p21;
	xor.b32 	%r8, %r7, 1;
	selp.s32 	%r9, %r8, %r7, %p20;
	xor.b32 	%r10, %r9, 1;
	selp.s32 	%r11, %r10, %r9, %p19;
	xor.b32 	%r12, %r11, 1;
	mov.f32 	%f63, %f12;
	setp.gt.ftz.f32 	%p22, %f63, %f59;
	selp.s32 	%r13, %r12, %r11, %p22;
	mov.u32 	%r14, 0;
	setp.ne.s32 	%p23, %r13, %r14;
	@%p23 bra 	$Lt_15_21506;
 //<loop> Part of loop body line 72, head labeled $Lt_15_21250
	.loc	18	76	0
	mov.f32 	%f64, 0f00000000;    	// 0
	cvt.s64.s32 	%rd5, %r5;
	mul.wide.s32 	%rd6, %r5, 16;
	add.u64 	%rd7, %rd2, %rd6;
	st.f32 	[%rd7+12], %f64;
$Lt_15_21506:
 //<loop> Part of loop body line 72, head labeled $Lt_15_21250
	add.s32 	%r5, %r5, 1;
	mov.u32 	%r15, 8;
	setp.ne.s32 	%p24, %r5, %r15;
	@%p24 bra 	$Lt_15_21250;
$Lt_15_19202:
 //<loop> Part of loop body line 52, head labeled $Lt_15_16898
	.loc	18	80	0
	add.u64 	%rd2, %rd2, 128;
	mov.s32 	%r1, %r4;
	mov.u32 	%r16, 31;
	setp.le.s32 	%p25, %r4, %r16;
	@%p25 bra 	$Lt_15_16898;
	.loc	18	82	0
	ret;
$LDWend__Z13FillSegment32ILi4E6float4EvPT0_6float2P6float3:
	} // _Z13FillSegment32ILi4E6float4EvPT0_6float2P6float3

	.visible .func _Z13PointDistanceILi16EEv6float2P6float3Pf (.param .align 8 .b8 __cudaparmf1__Z13PointDistanceILi16EEv6float2P6float3Pf[8], .param .u64 __cudaparmf2__Z13PointDistanceILi16EEv6float2P6float3Pf, .param .u64 __cudaparmf3__Z13PointDistanceILi16EEv6float2P6float3Pf)
	{
	.reg .u64 %rd<6>;
	.reg .f32 %f<119>;
	.reg .pred %p<50>;
	.loc	18	34	0
$LDWbegin__Z13PointDistanceILi16EEv6float2P6float3Pf:
	ld.param.f32 	%f1, [__cudaparmf1__Z13PointDistanceILi16EEv6float2P6float3Pf+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z13PointDistanceILi16EEv6float2P6float3Pf+4];
	mov.f32 	%f4, %f3;
	ld.param.u64 	%rd1, [__cudaparmf2__Z13PointDistanceILi16EEv6float2P6float3Pf];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf3__Z13PointDistanceILi16EEv6float2P6float3Pf];
	mov.s64 	%rd4, %rd3;
	.loc	18	24	0
	ld.f32 	%f5, [%rd2+4];
	ld.f32 	%f6, [%rd2+16];
	setp.gt.ftz.f32 	%p1, %f6, %f4;
	setp.le.ftz.f32 	%p2, %f5, %f4;
	xor.pred 	%p3, %p1, %p2;
	@%p3 bra 	$Lt_16_3586;
	.loc	18	41	0
	ld.f32 	%f7, [%rd2+0];
	sub.ftz.f32 	%f8, %f2, %f7;
	ld.f32 	%f9, [%rd2+8];
	sub.ftz.f32 	%f10, %f4, %f5;
	mul.ftz.f32 	%f11, %f9, %f10;
	sub.ftz.f32 	%f12, %f11, %f8;
	st.f32 	[%rd4+0], %f12;
	ld.f32 	%f6, [%rd2+16];
$Lt_16_3586:
	ld.f32 	%f13, [%rd2+28];
	setp.gt.ftz.f32 	%p4, %f13, %f4;
	setp.le.ftz.f32 	%p5, %f6, %f4;
	xor.pred 	%p6, %p4, %p5;
	@%p6 bra 	$Lt_16_4098;
	ld.f32 	%f14, [%rd2+12];
	sub.ftz.f32 	%f15, %f2, %f14;
	ld.f32 	%f16, [%rd2+20];
	sub.ftz.f32 	%f17, %f4, %f6;
	mul.ftz.f32 	%f18, %f16, %f17;
	sub.ftz.f32 	%f19, %f18, %f15;
	st.f32 	[%rd4+4], %f19;
	ld.f32 	%f13, [%rd2+28];
$Lt_16_4098:
	ld.f32 	%f20, [%rd2+40];
	setp.gt.ftz.f32 	%p7, %f20, %f4;
	setp.le.ftz.f32 	%p8, %f13, %f4;
	xor.pred 	%p9, %p7, %p8;
	@%p9 bra 	$Lt_16_4610;
	ld.f32 	%f21, [%rd2+24];
	sub.ftz.f32 	%f22, %f2, %f21;
	ld.f32 	%f23, [%rd2+32];
	sub.ftz.f32 	%f24, %f4, %f13;
	mul.ftz.f32 	%f25, %f23, %f24;
	sub.ftz.f32 	%f26, %f25, %f22;
	st.f32 	[%rd4+8], %f26;
	ld.f32 	%f20, [%rd2+40];
$Lt_16_4610:
	ld.f32 	%f27, [%rd2+52];
	setp.gt.ftz.f32 	%p10, %f27, %f4;
	setp.le.ftz.f32 	%p11, %f20, %f4;
	xor.pred 	%p12, %p10, %p11;
	@%p12 bra 	$Lt_16_5122;
	ld.f32 	%f28, [%rd2+36];
	sub.ftz.f32 	%f29, %f2, %f28;
	ld.f32 	%f30, [%rd2+44];
	sub.ftz.f32 	%f31, %f4, %f20;
	mul.ftz.f32 	%f32, %f30, %f31;
	sub.ftz.f32 	%f33, %f32, %f29;
	st.f32 	[%rd4+12], %f33;
	ld.f32 	%f27, [%rd2+52];
$Lt_16_5122:
	ld.f32 	%f34, [%rd2+64];
	setp.gt.ftz.f32 	%p13, %f34, %f4;
	setp.le.ftz.f32 	%p14, %f27, %f4;
	xor.pred 	%p15, %p13, %p14;
	@%p15 bra 	$Lt_16_5634;
	ld.f32 	%f35, [%rd2+48];
	sub.ftz.f32 	%f36, %f2, %f35;
	ld.f32 	%f37, [%rd2+56];
	sub.ftz.f32 	%f38, %f4, %f27;
	mul.ftz.f32 	%f39, %f37, %f38;
	sub.ftz.f32 	%f40, %f39, %f36;
	st.f32 	[%rd4+16], %f40;
	ld.f32 	%f34, [%rd2+64];
$Lt_16_5634:
	ld.f32 	%f41, [%rd2+76];
	setp.gt.ftz.f32 	%p16, %f41, %f4;
	setp.le.ftz.f32 	%p17, %f34, %f4;
	xor.pred 	%p18, %p16, %p17;
	@%p18 bra 	$Lt_16_6146;
	ld.f32 	%f42, [%rd2+60];
	sub.ftz.f32 	%f43, %f2, %f42;
	ld.f32 	%f44, [%rd2+68];
	sub.ftz.f32 	%f45, %f4, %f34;
	mul.ftz.f32 	%f46, %f44, %f45;
	sub.ftz.f32 	%f47, %f46, %f43;
	st.f32 	[%rd4+20], %f47;
	ld.f32 	%f41, [%rd2+76];
$Lt_16_6146:
	ld.f32 	%f48, [%rd2+88];
	setp.gt.ftz.f32 	%p19, %f48, %f4;
	setp.le.ftz.f32 	%p20, %f41, %f4;
	xor.pred 	%p21, %p19, %p20;
	@%p21 bra 	$Lt_16_6658;
	ld.f32 	%f49, [%rd2+72];
	sub.ftz.f32 	%f50, %f2, %f49;
	ld.f32 	%f51, [%rd2+80];
	sub.ftz.f32 	%f52, %f4, %f41;
	mul.ftz.f32 	%f53, %f51, %f52;
	sub.ftz.f32 	%f54, %f53, %f50;
	st.f32 	[%rd4+24], %f54;
	ld.f32 	%f48, [%rd2+88];
$Lt_16_6658:
	ld.f32 	%f55, [%rd2+100];
	setp.gt.ftz.f32 	%p22, %f55, %f4;
	setp.le.ftz.f32 	%p23, %f48, %f4;
	xor.pred 	%p24, %p22, %p23;
	@%p24 bra 	$Lt_16_7170;
	ld.f32 	%f56, [%rd2+84];
	sub.ftz.f32 	%f57, %f2, %f56;
	ld.f32 	%f58, [%rd2+92];
	sub.ftz.f32 	%f59, %f4, %f48;
	mul.ftz.f32 	%f60, %f58, %f59;
	sub.ftz.f32 	%f61, %f60, %f57;
	st.f32 	[%rd4+28], %f61;
	ld.f32 	%f55, [%rd2+100];
$Lt_16_7170:
	ld.f32 	%f62, [%rd2+112];
	setp.gt.ftz.f32 	%p25, %f62, %f4;
	setp.le.ftz.f32 	%p26, %f55, %f4;
	xor.pred 	%p27, %p25, %p26;
	@%p27 bra 	$Lt_16_7682;
	ld.f32 	%f63, [%rd2+96];
	sub.ftz.f32 	%f64, %f2, %f63;
	ld.f32 	%f65, [%rd2+104];
	sub.ftz.f32 	%f66, %f4, %f55;
	mul.ftz.f32 	%f67, %f65, %f66;
	sub.ftz.f32 	%f68, %f67, %f64;
	st.f32 	[%rd4+32], %f68;
	ld.f32 	%f62, [%rd2+112];
$Lt_16_7682:
	ld.f32 	%f69, [%rd2+124];
	setp.gt.ftz.f32 	%p28, %f69, %f4;
	setp.le.ftz.f32 	%p29, %f62, %f4;
	xor.pred 	%p30, %p28, %p29;
	@%p30 bra 	$Lt_16_8194;
	ld.f32 	%f70, [%rd2+108];
	sub.ftz.f32 	%f71, %f2, %f70;
	ld.f32 	%f72, [%rd2+116];
	sub.ftz.f32 	%f73, %f4, %f62;
	mul.ftz.f32 	%f74, %f72, %f73;
	sub.ftz.f32 	%f75, %f74, %f71;
	st.f32 	[%rd4+36], %f75;
	ld.f32 	%f69, [%rd2+124];
$Lt_16_8194:
	ld.f32 	%f76, [%rd2+136];
	setp.gt.ftz.f32 	%p31, %f76, %f4;
	setp.le.ftz.f32 	%p32, %f69, %f4;
	xor.pred 	%p33, %p31, %p32;
	@%p33 bra 	$Lt_16_8706;
	ld.f32 	%f77, [%rd2+120];
	sub.ftz.f32 	%f78, %f2, %f77;
	ld.f32 	%f79, [%rd2+128];
	sub.ftz.f32 	%f80, %f4, %f69;
	mul.ftz.f32 	%f81, %f79, %f80;
	sub.ftz.f32 	%f82, %f81, %f78;
	st.f32 	[%rd4+40], %f82;
	ld.f32 	%f76, [%rd2+136];
$Lt_16_8706:
	ld.f32 	%f83, [%rd2+148];
	setp.gt.ftz.f32 	%p34, %f83, %f4;
	setp.le.ftz.f32 	%p35, %f76, %f4;
	xor.pred 	%p36, %p34, %p35;
	@%p36 bra 	$Lt_16_9218;
	ld.f32 	%f84, [%rd2+132];
	sub.ftz.f32 	%f85, %f2, %f84;
	ld.f32 	%f86, [%rd2+140];
	sub.ftz.f32 	%f87, %f4, %f76;
	mul.ftz.f32 	%f88, %f86, %f87;
	sub.ftz.f32 	%f89, %f88, %f85;
	st.f32 	[%rd4+44], %f89;
	ld.f32 	%f83, [%rd2+148];
$Lt_16_9218:
	ld.f32 	%f90, [%rd2+160];
	setp.gt.ftz.f32 	%p37, %f90, %f4;
	setp.le.ftz.f32 	%p38, %f83, %f4;
	xor.pred 	%p39, %p37, %p38;
	@%p39 bra 	$Lt_16_9730;
	ld.f32 	%f91, [%rd2+144];
	sub.ftz.f32 	%f92, %f2, %f91;
	ld.f32 	%f93, [%rd2+152];
	sub.ftz.f32 	%f94, %f4, %f83;
	mul.ftz.f32 	%f95, %f93, %f94;
	sub.ftz.f32 	%f96, %f95, %f92;
	st.f32 	[%rd4+48], %f96;
	ld.f32 	%f90, [%rd2+160];
$Lt_16_9730:
	ld.f32 	%f97, [%rd2+172];
	setp.gt.ftz.f32 	%p40, %f97, %f4;
	setp.le.ftz.f32 	%p41, %f90, %f4;
	xor.pred 	%p42, %p40, %p41;
	@%p42 bra 	$Lt_16_10242;
	ld.f32 	%f98, [%rd2+156];
	sub.ftz.f32 	%f99, %f2, %f98;
	ld.f32 	%f100, [%rd2+164];
	sub.ftz.f32 	%f101, %f4, %f90;
	mul.ftz.f32 	%f102, %f100, %f101;
	sub.ftz.f32 	%f103, %f102, %f99;
	st.f32 	[%rd4+52], %f103;
	ld.f32 	%f97, [%rd2+172];
$Lt_16_10242:
	ld.f32 	%f104, [%rd2+184];
	setp.gt.ftz.f32 	%p43, %f104, %f4;
	setp.le.ftz.f32 	%p44, %f97, %f4;
	xor.pred 	%p45, %p43, %p44;
	@%p45 bra 	$Lt_16_10754;
	ld.f32 	%f105, [%rd2+168];
	sub.ftz.f32 	%f106, %f2, %f105;
	ld.f32 	%f107, [%rd2+176];
	sub.ftz.f32 	%f108, %f4, %f97;
	mul.ftz.f32 	%f109, %f107, %f108;
	sub.ftz.f32 	%f110, %f109, %f106;
	st.f32 	[%rd4+56], %f110;
	ld.f32 	%f104, [%rd2+184];
$Lt_16_10754:
	ld.f32 	%f111, [%rd2+196];
	setp.gt.ftz.f32 	%p46, %f111, %f4;
	setp.le.ftz.f32 	%p47, %f104, %f4;
	xor.pred 	%p48, %p46, %p47;
	@%p48 bra 	$Lt_16_11266;
	ld.f32 	%f112, [%rd2+180];
	sub.ftz.f32 	%f113, %f2, %f112;
	ld.f32 	%f114, [%rd2+188];
	sub.ftz.f32 	%f115, %f4, %f104;
	mul.ftz.f32 	%f116, %f114, %f115;
	sub.ftz.f32 	%f117, %f116, %f113;
	st.f32 	[%rd4+60], %f117;
$Lt_16_11266:
	.loc	18	43	0
	ret;
$LDWend__Z13PointDistanceILi16EEv6float2P6float3Pf:
	} // _Z13PointDistanceILi16EEv6float2P6float3Pf

	.visible .func _Z13PointDistanceILi8EEv6float2P6float3Pf (.param .align 8 .b8 __cudaparmf1__Z13PointDistanceILi8EEv6float2P6float3Pf[8], .param .u64 __cudaparmf2__Z13PointDistanceILi8EEv6float2P6float3Pf, .param .u64 __cudaparmf3__Z13PointDistanceILi8EEv6float2P6float3Pf)
	{
	.reg .u64 %rd<6>;
	.reg .f32 %f<63>;
	.reg .pred %p<26>;
	.loc	18	34	0
$LDWbegin__Z13PointDistanceILi8EEv6float2P6float3Pf:
	ld.param.f32 	%f1, [__cudaparmf1__Z13PointDistanceILi8EEv6float2P6float3Pf+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z13PointDistanceILi8EEv6float2P6float3Pf+4];
	mov.f32 	%f4, %f3;
	ld.param.u64 	%rd1, [__cudaparmf2__Z13PointDistanceILi8EEv6float2P6float3Pf];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf3__Z13PointDistanceILi8EEv6float2P6float3Pf];
	mov.s64 	%rd4, %rd3;
	.loc	18	24	0
	ld.f32 	%f5, [%rd2+4];
	ld.f32 	%f6, [%rd2+16];
	setp.gt.ftz.f32 	%p1, %f6, %f4;
	setp.le.ftz.f32 	%p2, %f5, %f4;
	xor.pred 	%p3, %p1, %p2;
	@%p3 bra 	$Lt_17_3586;
	.loc	18	41	0
	ld.f32 	%f7, [%rd2+0];
	sub.ftz.f32 	%f8, %f2, %f7;
	ld.f32 	%f9, [%rd2+8];
	sub.ftz.f32 	%f10, %f4, %f5;
	mul.ftz.f32 	%f11, %f9, %f10;
	sub.ftz.f32 	%f12, %f11, %f8;
	st.f32 	[%rd4+0], %f12;
	ld.f32 	%f6, [%rd2+16];
$Lt_17_3586:
	ld.f32 	%f13, [%rd2+28];
	setp.gt.ftz.f32 	%p4, %f13, %f4;
	setp.le.ftz.f32 	%p5, %f6, %f4;
	xor.pred 	%p6, %p4, %p5;
	@%p6 bra 	$Lt_17_4098;
	ld.f32 	%f14, [%rd2+12];
	sub.ftz.f32 	%f15, %f2, %f14;
	ld.f32 	%f16, [%rd2+20];
	sub.ftz.f32 	%f17, %f4, %f6;
	mul.ftz.f32 	%f18, %f16, %f17;
	sub.ftz.f32 	%f19, %f18, %f15;
	st.f32 	[%rd4+4], %f19;
	ld.f32 	%f13, [%rd2+28];
$Lt_17_4098:
	ld.f32 	%f20, [%rd2+40];
	setp.gt.ftz.f32 	%p7, %f20, %f4;
	setp.le.ftz.f32 	%p8, %f13, %f4;
	xor.pred 	%p9, %p7, %p8;
	@%p9 bra 	$Lt_17_4610;
	ld.f32 	%f21, [%rd2+24];
	sub.ftz.f32 	%f22, %f2, %f21;
	ld.f32 	%f23, [%rd2+32];
	sub.ftz.f32 	%f24, %f4, %f13;
	mul.ftz.f32 	%f25, %f23, %f24;
	sub.ftz.f32 	%f26, %f25, %f22;
	st.f32 	[%rd4+8], %f26;
	ld.f32 	%f20, [%rd2+40];
$Lt_17_4610:
	ld.f32 	%f27, [%rd2+52];
	setp.gt.ftz.f32 	%p10, %f27, %f4;
	setp.le.ftz.f32 	%p11, %f20, %f4;
	xor.pred 	%p12, %p10, %p11;
	@%p12 bra 	$Lt_17_5122;
	ld.f32 	%f28, [%rd2+36];
	sub.ftz.f32 	%f29, %f2, %f28;
	ld.f32 	%f30, [%rd2+44];
	sub.ftz.f32 	%f31, %f4, %f20;
	mul.ftz.f32 	%f32, %f30, %f31;
	sub.ftz.f32 	%f33, %f32, %f29;
	st.f32 	[%rd4+12], %f33;
	ld.f32 	%f27, [%rd2+52];
$Lt_17_5122:
	ld.f32 	%f34, [%rd2+64];
	setp.gt.ftz.f32 	%p13, %f34, %f4;
	setp.le.ftz.f32 	%p14, %f27, %f4;
	xor.pred 	%p15, %p13, %p14;
	@%p15 bra 	$Lt_17_5634;
	ld.f32 	%f35, [%rd2+48];
	sub.ftz.f32 	%f36, %f2, %f35;
	ld.f32 	%f37, [%rd2+56];
	sub.ftz.f32 	%f38, %f4, %f27;
	mul.ftz.f32 	%f39, %f37, %f38;
	sub.ftz.f32 	%f40, %f39, %f36;
	st.f32 	[%rd4+16], %f40;
	ld.f32 	%f34, [%rd2+64];
$Lt_17_5634:
	ld.f32 	%f41, [%rd2+76];
	setp.gt.ftz.f32 	%p16, %f41, %f4;
	setp.le.ftz.f32 	%p17, %f34, %f4;
	xor.pred 	%p18, %p16, %p17;
	@%p18 bra 	$Lt_17_6146;
	ld.f32 	%f42, [%rd2+60];
	sub.ftz.f32 	%f43, %f2, %f42;
	ld.f32 	%f44, [%rd2+68];
	sub.ftz.f32 	%f45, %f4, %f34;
	mul.ftz.f32 	%f46, %f44, %f45;
	sub.ftz.f32 	%f47, %f46, %f43;
	st.f32 	[%rd4+20], %f47;
	ld.f32 	%f41, [%rd2+76];
$Lt_17_6146:
	ld.f32 	%f48, [%rd2+88];
	setp.gt.ftz.f32 	%p19, %f48, %f4;
	setp.le.ftz.f32 	%p20, %f41, %f4;
	xor.pred 	%p21, %p19, %p20;
	@%p21 bra 	$Lt_17_6658;
	ld.f32 	%f49, [%rd2+72];
	sub.ftz.f32 	%f50, %f2, %f49;
	ld.f32 	%f51, [%rd2+80];
	sub.ftz.f32 	%f52, %f4, %f41;
	mul.ftz.f32 	%f53, %f51, %f52;
	sub.ftz.f32 	%f54, %f53, %f50;
	st.f32 	[%rd4+24], %f54;
	ld.f32 	%f48, [%rd2+88];
$Lt_17_6658:
	ld.f32 	%f55, [%rd2+100];
	setp.gt.ftz.f32 	%p22, %f55, %f4;
	setp.le.ftz.f32 	%p23, %f48, %f4;
	xor.pred 	%p24, %p22, %p23;
	@%p24 bra 	$Lt_17_7170;
	ld.f32 	%f56, [%rd2+84];
	sub.ftz.f32 	%f57, %f2, %f56;
	ld.f32 	%f58, [%rd2+92];
	sub.ftz.f32 	%f59, %f4, %f48;
	mul.ftz.f32 	%f60, %f58, %f59;
	sub.ftz.f32 	%f61, %f60, %f57;
	st.f32 	[%rd4+28], %f61;
$Lt_17_7170:
	.loc	18	43	0
	ret;
$LDWend__Z13PointDistanceILi8EEv6float2P6float3Pf:
	} // _Z13PointDistanceILi8EEv6float2P6float3Pf

	.visible .func _Z13PointDistanceILi4EEv6float2P6float3Pf (.param .align 8 .b8 __cudaparmf1__Z13PointDistanceILi4EEv6float2P6float3Pf[8], .param .u64 __cudaparmf2__Z13PointDistanceILi4EEv6float2P6float3Pf, .param .u64 __cudaparmf3__Z13PointDistanceILi4EEv6float2P6float3Pf)
	{
	.reg .u64 %rd<6>;
	.reg .f32 %f<35>;
	.reg .pred %p<14>;
	.loc	18	34	0
$LDWbegin__Z13PointDistanceILi4EEv6float2P6float3Pf:
	ld.param.f32 	%f1, [__cudaparmf1__Z13PointDistanceILi4EEv6float2P6float3Pf+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z13PointDistanceILi4EEv6float2P6float3Pf+4];
	mov.f32 	%f4, %f3;
	ld.param.u64 	%rd1, [__cudaparmf2__Z13PointDistanceILi4EEv6float2P6float3Pf];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf3__Z13PointDistanceILi4EEv6float2P6float3Pf];
	mov.s64 	%rd4, %rd3;
	.loc	18	24	0
	ld.f32 	%f5, [%rd2+4];
	ld.f32 	%f6, [%rd2+16];
	setp.gt.ftz.f32 	%p1, %f6, %f4;
	setp.le.ftz.f32 	%p2, %f5, %f4;
	xor.pred 	%p3, %p1, %p2;
	@%p3 bra 	$Lt_18_3586;
	.loc	18	41	0
	ld.f32 	%f7, [%rd2+0];
	sub.ftz.f32 	%f8, %f2, %f7;
	ld.f32 	%f9, [%rd2+8];
	sub.ftz.f32 	%f10, %f4, %f5;
	mul.ftz.f32 	%f11, %f9, %f10;
	sub.ftz.f32 	%f12, %f11, %f8;
	st.f32 	[%rd4+0], %f12;
	ld.f32 	%f6, [%rd2+16];
$Lt_18_3586:
	ld.f32 	%f13, [%rd2+28];
	setp.gt.ftz.f32 	%p4, %f13, %f4;
	setp.le.ftz.f32 	%p5, %f6, %f4;
	xor.pred 	%p6, %p4, %p5;
	@%p6 bra 	$Lt_18_4098;
	ld.f32 	%f14, [%rd2+12];
	sub.ftz.f32 	%f15, %f2, %f14;
	ld.f32 	%f16, [%rd2+20];
	sub.ftz.f32 	%f17, %f4, %f6;
	mul.ftz.f32 	%f18, %f16, %f17;
	sub.ftz.f32 	%f19, %f18, %f15;
	st.f32 	[%rd4+4], %f19;
	ld.f32 	%f13, [%rd2+28];
$Lt_18_4098:
	ld.f32 	%f20, [%rd2+40];
	setp.gt.ftz.f32 	%p7, %f20, %f4;
	setp.le.ftz.f32 	%p8, %f13, %f4;
	xor.pred 	%p9, %p7, %p8;
	@%p9 bra 	$Lt_18_4610;
	ld.f32 	%f21, [%rd2+24];
	sub.ftz.f32 	%f22, %f2, %f21;
	ld.f32 	%f23, [%rd2+32];
	sub.ftz.f32 	%f24, %f4, %f13;
	mul.ftz.f32 	%f25, %f23, %f24;
	sub.ftz.f32 	%f26, %f25, %f22;
	st.f32 	[%rd4+8], %f26;
	ld.f32 	%f20, [%rd2+40];
$Lt_18_4610:
	ld.f32 	%f27, [%rd2+52];
	setp.gt.ftz.f32 	%p10, %f27, %f4;
	setp.le.ftz.f32 	%p11, %f20, %f4;
	xor.pred 	%p12, %p10, %p11;
	@%p12 bra 	$Lt_18_5122;
	ld.f32 	%f28, [%rd2+36];
	sub.ftz.f32 	%f29, %f2, %f28;
	ld.f32 	%f30, [%rd2+44];
	sub.ftz.f32 	%f31, %f4, %f20;
	mul.ftz.f32 	%f32, %f30, %f31;
	sub.ftz.f32 	%f33, %f32, %f29;
	st.f32 	[%rd4+12], %f33;
$Lt_18_5122:
	.loc	18	43	0
	ret;
$LDWend__Z13PointDistanceILi4EEv6float2P6float3Pf:
	} // _Z13PointDistanceILi4EEv6float2P6float3Pf
	.global .align 8 .b8 polyPoints[204];

	.entry cuda_kernel_bitmask (
		.param .u64 __cudaparm_cuda_kernel_bitmask_dstFrame,
		.param .s32 __cudaparm_cuda_kernel_bitmask_width,
		.param .s32 __cudaparm_cuda_kernel_bitmask_height,
		.param .s32 __cudaparm_cuda_kernel_bitmask_dstPitch,
		.param .s32 __cudaparm_cuda_kernel_bitmask_count,
		.param .u32 __cudaparm_cuda_kernel_bitmask_inDeviceFormat)
	{
	.reg .u32 %r<235>;
	.reg .u64 %rd<44>;
	.reg .f32 %f<721>;
	.reg .pred %p<320>;
	// __cuda_local_var_91085_8_non_const_dist = 208
	// __cuda_local_var_91085_8_non_const_dist = 176
	// __cuda_local_var_91085_8_non_const_dist = 112
	// __cuda_local_var_91085_8_non_const_dist = 96
	// __cuda_local_var_91085_8_non_const_dist = 64
	// __cuda_local_var_91085_8_non_const_dist = 0
	.loc	18	86	0
$LDWbegin_cuda_kernel_bitmask:
	.loc	18	89	0
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	mul.lo.s32 	%r11, %r8, 32;
	ld.param.s32 	%r12, [__cudaparm_cuda_kernel_bitmask_width];
	set.ge.u32.s32 	%r13, %r11, %r12;
	neg.s32 	%r14, %r13;
	ld.param.s32 	%r15, [__cudaparm_cuda_kernel_bitmask_height];
	set.le.u32.s32 	%r16, %r15, %r10;
	neg.s32 	%r17, %r16;
	or.b32 	%r18, %r14, %r17;
	mov.u32 	%r19, 0;
	setp.eq.s32 	%p1, %r18, %r19;
	@%p1 bra 	$Lt_19_91906;
	bra.uni 	$LBB318_cuda_kernel_bitmask;
$Lt_19_91906:
	.loc	18	90	0
	ld.param.s32 	%r20, [__cudaparm_cuda_kernel_bitmask_count];
	mov.s32 	%r21, 16;
	setp.eq.s32 	%p2, %r20, %r21;
	ld.param.s32 	%r22, [__cudaparm_cuda_kernel_bitmask_inDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p3, %r22, %r23;
	@%p3 bra 	$Lt_19_92674;
	.loc	18	98	0
	@!%p2 bra 	$Lt_19_93186;
	.loc	18	100	0
	sub.s32 	%r24, %r12, 32;
	setp.gt.s32 	%p4, %r11, %r24;
	selp.s32 	%r25, %r24, %r11, %p4;
	ld.param.u64 	%rd1, [__cudaparm_cuda_kernel_bitmask_dstFrame];
	ld.param.s32 	%r26, [__cudaparm_cuda_kernel_bitmask_dstPitch];
	mul.lo.s32 	%r27, %r26, %r10;
	add.s32 	%r28, %r25, %r27;
	cvt.s64.s32 	%rd2, %r28;
	mul.wide.s32 	%rd3, %r28, 8;
	add.u64 	%rd4, %rd1, %rd3;
	.loc	18	51	0
	mov.f32 	%f1, 0fbf800000;     	// -1
	mov.f32 	%f2, %f1;
	mov.f32 	%f3, 0fbf800000;     	// -1
	mov.f32 	%f4, %f3;
	mov.f32 	%f5, 0fbf800000;     	// -1
	mov.f32 	%f6, %f5;
	mov.f32 	%f7, 0fbf800000;     	// -1
	mov.f32 	%f8, %f7;
	mov.f32 	%f9, 0fbf800000;     	// -1
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fbf800000;    	// -1
	mov.f32 	%f12, %f11;
	mov.f32 	%f13, 0fbf800000;    	// -1
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fbf800000;    	// -1
	mov.f32 	%f16, %f15;
	mov.f32 	%f17, 0fbf800000;    	// -1
	mov.f32 	%f18, %f17;
	mov.f32 	%f19, 0fbf800000;    	// -1
	mov.f32 	%f20, %f19;
	mov.f32 	%f21, 0fbf800000;    	// -1
	mov.f32 	%f22, %f21;
	mov.f32 	%f23, 0fbf800000;    	// -1
	mov.f32 	%f24, %f23;
	mov.f32 	%f25, 0fbf800000;    	// -1
	mov.f32 	%f26, %f25;
	mov.f32 	%f27, 0fbf800000;    	// -1
	mov.f32 	%f28, %f27;
	mov.f32 	%f29, 0fbf800000;    	// -1
	mov.f32 	%f30, %f29;
	mov.f32 	%f31, 0fbf800000;    	// -1
	mov.f32 	%f32, %f31;
	.loc	18	24	0
	cvt.rn.f32.s32 	%f33, %r10;
	ld.global.f32 	%f34, [polyPoints+4];
	ld.global.f32 	%f35, [polyPoints+16];
	setp.ge.ftz.f32 	%p5, %f33, %f34;
	setp.lt.ftz.f32 	%p6, %f33, %f35;
	xor.pred 	%p7, %p5, %p6;
	@%p7 bra 	$Lt_19_93442;
	.loc	18	41	0
	cvt.rn.f32.s32 	%f36, %r25;
	ld.global.f32 	%f37, [polyPoints+0];
	sub.ftz.f32 	%f38, %f36, %f37;
	ld.global.f32 	%f39, [polyPoints+8];
	sub.ftz.f32 	%f40, %f33, %f34;
	mul.ftz.f32 	%f41, %f39, %f40;
	sub.ftz.f32 	%f42, %f41, %f38;
	mov.f32 	%f2, %f42;
$Lt_19_93442:
	ld.global.f32 	%f43, [polyPoints+28];
	setp.ge.ftz.f32 	%p8, %f33, %f35;
	setp.lt.ftz.f32 	%p9, %f33, %f43;
	xor.pred 	%p10, %p8, %p9;
	@%p10 bra 	$Lt_19_93954;
	cvt.rn.f32.s32 	%f44, %r25;
	ld.global.f32 	%f45, [polyPoints+12];
	sub.ftz.f32 	%f46, %f44, %f45;
	ld.global.f32 	%f47, [polyPoints+20];
	sub.ftz.f32 	%f48, %f33, %f35;
	mul.ftz.f32 	%f49, %f47, %f48;
	sub.ftz.f32 	%f50, %f49, %f46;
	mov.f32 	%f4, %f50;
$Lt_19_93954:
	ld.global.f32 	%f51, [polyPoints+40];
	setp.ge.ftz.f32 	%p11, %f33, %f43;
	setp.lt.ftz.f32 	%p12, %f33, %f51;
	xor.pred 	%p13, %p11, %p12;
	@%p13 bra 	$Lt_19_94466;
	cvt.rn.f32.s32 	%f52, %r25;
	ld.global.f32 	%f53, [polyPoints+24];
	sub.ftz.f32 	%f54, %f52, %f53;
	ld.global.f32 	%f55, [polyPoints+32];
	sub.ftz.f32 	%f56, %f33, %f43;
	mul.ftz.f32 	%f57, %f55, %f56;
	sub.ftz.f32 	%f58, %f57, %f54;
	mov.f32 	%f6, %f58;
$Lt_19_94466:
	ld.global.f32 	%f59, [polyPoints+52];
	setp.ge.ftz.f32 	%p14, %f33, %f51;
	setp.lt.ftz.f32 	%p15, %f33, %f59;
	xor.pred 	%p16, %p14, %p15;
	@%p16 bra 	$Lt_19_94978;
	cvt.rn.f32.s32 	%f60, %r25;
	ld.global.f32 	%f61, [polyPoints+36];
	sub.ftz.f32 	%f62, %f60, %f61;
	ld.global.f32 	%f63, [polyPoints+44];
	sub.ftz.f32 	%f64, %f33, %f51;
	mul.ftz.f32 	%f65, %f63, %f64;
	sub.ftz.f32 	%f66, %f65, %f62;
	mov.f32 	%f8, %f66;
$Lt_19_94978:
	ld.global.f32 	%f67, [polyPoints+64];
	setp.ge.ftz.f32 	%p17, %f33, %f59;
	setp.lt.ftz.f32 	%p18, %f33, %f67;
	xor.pred 	%p19, %p17, %p18;
	@%p19 bra 	$Lt_19_95490;
	cvt.rn.f32.s32 	%f68, %r25;
	ld.global.f32 	%f69, [polyPoints+48];
	sub.ftz.f32 	%f70, %f68, %f69;
	ld.global.f32 	%f71, [polyPoints+56];
	sub.ftz.f32 	%f72, %f33, %f59;
	mul.ftz.f32 	%f73, %f71, %f72;
	sub.ftz.f32 	%f74, %f73, %f70;
	mov.f32 	%f10, %f74;
$Lt_19_95490:
	ld.global.f32 	%f75, [polyPoints+76];
	setp.ge.ftz.f32 	%p20, %f33, %f67;
	setp.lt.ftz.f32 	%p21, %f33, %f75;
	xor.pred 	%p22, %p20, %p21;
	@%p22 bra 	$Lt_19_96002;
	cvt.rn.f32.s32 	%f76, %r25;
	ld.global.f32 	%f77, [polyPoints+60];
	sub.ftz.f32 	%f78, %f76, %f77;
	ld.global.f32 	%f79, [polyPoints+68];
	sub.ftz.f32 	%f80, %f33, %f67;
	mul.ftz.f32 	%f81, %f79, %f80;
	sub.ftz.f32 	%f82, %f81, %f78;
	mov.f32 	%f12, %f82;
$Lt_19_96002:
	ld.global.f32 	%f83, [polyPoints+88];
	setp.ge.ftz.f32 	%p23, %f33, %f75;
	setp.lt.ftz.f32 	%p24, %f33, %f83;
	xor.pred 	%p25, %p23, %p24;
	@%p25 bra 	$Lt_19_96514;
	cvt.rn.f32.s32 	%f84, %r25;
	ld.global.f32 	%f85, [polyPoints+72];
	sub.ftz.f32 	%f86, %f84, %f85;
	ld.global.f32 	%f87, [polyPoints+80];
	sub.ftz.f32 	%f88, %f33, %f75;
	mul.ftz.f32 	%f89, %f87, %f88;
	sub.ftz.f32 	%f90, %f89, %f86;
	mov.f32 	%f14, %f90;
$Lt_19_96514:
	ld.global.f32 	%f91, [polyPoints+100];
	setp.ge.ftz.f32 	%p26, %f33, %f83;
	setp.lt.ftz.f32 	%p27, %f33, %f91;
	xor.pred 	%p28, %p26, %p27;
	@%p28 bra 	$Lt_19_97026;
	cvt.rn.f32.s32 	%f92, %r25;
	ld.global.f32 	%f93, [polyPoints+84];
	sub.ftz.f32 	%f94, %f92, %f93;
	ld.global.f32 	%f95, [polyPoints+92];
	sub.ftz.f32 	%f96, %f33, %f83;
	mul.ftz.f32 	%f97, %f95, %f96;
	sub.ftz.f32 	%f98, %f97, %f94;
	mov.f32 	%f16, %f98;
$Lt_19_97026:
	ld.global.f32 	%f99, [polyPoints+112];
	setp.ge.ftz.f32 	%p29, %f33, %f91;
	setp.lt.ftz.f32 	%p30, %f33, %f99;
	xor.pred 	%p31, %p29, %p30;
	@%p31 bra 	$Lt_19_97538;
	cvt.rn.f32.s32 	%f100, %r25;
	ld.global.f32 	%f101, [polyPoints+96];
	sub.ftz.f32 	%f102, %f100, %f101;
	ld.global.f32 	%f103, [polyPoints+104];
	sub.ftz.f32 	%f104, %f33, %f91;
	mul.ftz.f32 	%f105, %f103, %f104;
	sub.ftz.f32 	%f106, %f105, %f102;
	mov.f32 	%f18, %f106;
$Lt_19_97538:
	ld.global.f32 	%f107, [polyPoints+124];
	setp.ge.ftz.f32 	%p32, %f33, %f99;
	setp.lt.ftz.f32 	%p33, %f33, %f107;
	xor.pred 	%p34, %p32, %p33;
	@%p34 bra 	$Lt_19_98050;
	cvt.rn.f32.s32 	%f108, %r25;
	ld.global.f32 	%f109, [polyPoints+108];
	sub.ftz.f32 	%f110, %f108, %f109;
	ld.global.f32 	%f111, [polyPoints+116];
	sub.ftz.f32 	%f112, %f33, %f99;
	mul.ftz.f32 	%f113, %f111, %f112;
	sub.ftz.f32 	%f114, %f113, %f110;
	mov.f32 	%f20, %f114;
$Lt_19_98050:
	ld.global.f32 	%f115, [polyPoints+136];
	setp.ge.ftz.f32 	%p35, %f33, %f107;
	setp.lt.ftz.f32 	%p36, %f33, %f115;
	xor.pred 	%p37, %p35, %p36;
	@%p37 bra 	$Lt_19_98562;
	cvt.rn.f32.s32 	%f116, %r25;
	ld.global.f32 	%f117, [polyPoints+120];
	sub.ftz.f32 	%f118, %f116, %f117;
	ld.global.f32 	%f119, [polyPoints+128];
	sub.ftz.f32 	%f120, %f33, %f107;
	mul.ftz.f32 	%f121, %f119, %f120;
	sub.ftz.f32 	%f122, %f121, %f118;
	mov.f32 	%f22, %f122;
$Lt_19_98562:
	ld.global.f32 	%f123, [polyPoints+148];
	setp.ge.ftz.f32 	%p38, %f33, %f115;
	setp.lt.ftz.f32 	%p39, %f33, %f123;
	xor.pred 	%p40, %p38, %p39;
	@%p40 bra 	$Lt_19_99074;
	cvt.rn.f32.s32 	%f124, %r25;
	ld.global.f32 	%f125, [polyPoints+132];
	sub.ftz.f32 	%f126, %f124, %f125;
	ld.global.f32 	%f127, [polyPoints+140];
	sub.ftz.f32 	%f128, %f33, %f115;
	mul.ftz.f32 	%f129, %f127, %f128;
	sub.ftz.f32 	%f130, %f129, %f126;
	mov.f32 	%f24, %f130;
$Lt_19_99074:
	ld.global.f32 	%f131, [polyPoints+160];
	setp.ge.ftz.f32 	%p41, %f33, %f123;
	setp.lt.ftz.f32 	%p42, %f33, %f131;
	xor.pred 	%p43, %p41, %p42;
	@%p43 bra 	$Lt_19_99586;
	cvt.rn.f32.s32 	%f132, %r25;
	ld.global.f32 	%f133, [polyPoints+144];
	sub.ftz.f32 	%f134, %f132, %f133;
	ld.global.f32 	%f135, [polyPoints+152];
	sub.ftz.f32 	%f136, %f33, %f123;
	mul.ftz.f32 	%f137, %f135, %f136;
	sub.ftz.f32 	%f138, %f137, %f134;
	mov.f32 	%f26, %f138;
$Lt_19_99586:
	ld.global.f32 	%f139, [polyPoints+172];
	setp.ge.ftz.f32 	%p44, %f33, %f131;
	setp.lt.ftz.f32 	%p45, %f33, %f139;
	xor.pred 	%p46, %p44, %p45;
	@%p46 bra 	$Lt_19_100098;
	cvt.rn.f32.s32 	%f140, %r25;
	ld.global.f32 	%f141, [polyPoints+156];
	sub.ftz.f32 	%f142, %f140, %f141;
	ld.global.f32 	%f143, [polyPoints+164];
	sub.ftz.f32 	%f144, %f33, %f131;
	mul.ftz.f32 	%f145, %f143, %f144;
	sub.ftz.f32 	%f146, %f145, %f142;
	mov.f32 	%f28, %f146;
$Lt_19_100098:
	ld.global.f32 	%f147, [polyPoints+184];
	setp.ge.ftz.f32 	%p47, %f33, %f139;
	setp.lt.ftz.f32 	%p48, %f33, %f147;
	xor.pred 	%p49, %p47, %p48;
	@%p49 bra 	$Lt_19_100610;
	cvt.rn.f32.s32 	%f148, %r25;
	ld.global.f32 	%f149, [polyPoints+168];
	sub.ftz.f32 	%f150, %f148, %f149;
	ld.global.f32 	%f151, [polyPoints+176];
	sub.ftz.f32 	%f152, %f33, %f139;
	mul.ftz.f32 	%f153, %f151, %f152;
	sub.ftz.f32 	%f154, %f153, %f150;
	mov.f32 	%f30, %f154;
$Lt_19_100610:
	setp.ge.ftz.f32 	%p50, %f33, %f147;
	ld.global.f32 	%f155, [polyPoints+196];
	setp.gt.ftz.f32 	%p51, %f155, %f33;
	xor.pred 	%p52, %p50, %p51;
	@%p52 bra 	$Lt_19_101122;
	cvt.rn.f32.s32 	%f156, %r25;
	ld.global.f32 	%f157, [polyPoints+180];
	sub.ftz.f32 	%f158, %f156, %f157;
	ld.global.f32 	%f159, [polyPoints+188];
	sub.ftz.f32 	%f160, %f33, %f147;
	mul.ftz.f32 	%f161, %f159, %f160;
	sub.ftz.f32 	%f162, %f161, %f158;
	mov.f32 	%f32, %f162;
$Lt_19_101122:
	.loc	18	52	0
	mov.s32 	%r29, 0;
$Lt_19_102146:
 //<loop> Loop body line 52, nesting depth: 1, iterations: 4
	cvt.rn.f32.s32 	%f163, %r29;
	mov.f32 	%f164, %f2;
	setp.lt.ftz.f32 	%p53, %f163, %f164;
	@!%p53 bra 	$Lt_19_102658;
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	.loc	18	62	0
	mov.f32 	%f165, 0f47c35000;   	// 100000
	min.ftz.f32 	%f166, %f164, %f165;
	mov.s32 	%r30, 1;
	bra.uni 	$Lt_19_102402;
$Lt_19_102658:
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	mov.s32 	%r30, 0;
	mov.f32 	%f166, 0f47c35000;   	// 100000
$Lt_19_102402:
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	mov.f32 	%f167, %f4;
	setp.lt.ftz.f32 	%p54, %f163, %f167;
	@!%p54 bra 	$Lt_19_102914;
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	.loc	18	61	0
	xor.b32 	%r30, %r30, 1;
	.loc	18	62	0
	min.ftz.f32 	%f166, %f166, %f167;
$Lt_19_102914:
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	mov.f32 	%f168, %f6;
	setp.lt.ftz.f32 	%p55, %f163, %f168;
	@!%p55 bra 	$Lt_19_103426;
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	.loc	18	61	0
	xor.b32 	%r30, %r30, 1;
	.loc	18	62	0
	min.ftz.f32 	%f166, %f166, %f168;
$Lt_19_103426:
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	mov.f32 	%f169, %f8;
	setp.lt.ftz.f32 	%p56, %f163, %f169;
	@!%p56 bra 	$Lt_19_103938;
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	.loc	18	61	0
	xor.b32 	%r30, %r30, 1;
	.loc	18	62	0
	min.ftz.f32 	%f166, %f166, %f169;
$Lt_19_103938:
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	mov.f32 	%f170, %f10;
	setp.lt.ftz.f32 	%p57, %f163, %f170;
	@!%p57 bra 	$Lt_19_104450;
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	.loc	18	61	0
	xor.b32 	%r30, %r30, 1;
	.loc	18	62	0
	min.ftz.f32 	%f166, %f166, %f170;
$Lt_19_104450:
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	mov.f32 	%f171, %f12;
	setp.lt.ftz.f32 	%p58, %f163, %f171;
	@!%p58 bra 	$Lt_19_104962;
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	.loc	18	61	0
	xor.b32 	%r30, %r30, 1;
	.loc	18	62	0
	min.ftz.f32 	%f166, %f166, %f171;
$Lt_19_104962:
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	mov.f32 	%f172, %f14;
	setp.lt.ftz.f32 	%p59, %f163, %f172;
	@!%p59 bra 	$Lt_19_105474;
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	.loc	18	61	0
	xor.b32 	%r30, %r30, 1;
	.loc	18	62	0
	min.ftz.f32 	%f166, %f166, %f172;
$Lt_19_105474:
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	mov.f32 	%f173, %f16;
	setp.lt.ftz.f32 	%p60, %f163, %f173;
	@!%p60 bra 	$Lt_19_105986;
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	.loc	18	61	0
	xor.b32 	%r30, %r30, 1;
	.loc	18	62	0
	min.ftz.f32 	%f166, %f166, %f173;
$Lt_19_105986:
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	mov.f32 	%f174, %f18;
	setp.lt.ftz.f32 	%p61, %f163, %f174;
	@!%p61 bra 	$Lt_19_106498;
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	.loc	18	61	0
	xor.b32 	%r30, %r30, 1;
	.loc	18	62	0
	min.ftz.f32 	%f166, %f166, %f174;
$Lt_19_106498:
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	mov.f32 	%f175, %f20;
	setp.lt.ftz.f32 	%p62, %f163, %f175;
	@!%p62 bra 	$Lt_19_107010;
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	.loc	18	61	0
	xor.b32 	%r30, %r30, 1;
	.loc	18	62	0
	min.ftz.f32 	%f166, %f166, %f175;
$Lt_19_107010:
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	mov.f32 	%f176, %f22;
	setp.lt.ftz.f32 	%p63, %f163, %f176;
	@!%p63 bra 	$Lt_19_107522;
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	.loc	18	61	0
	xor.b32 	%r30, %r30, 1;
	.loc	18	62	0
	min.ftz.f32 	%f166, %f166, %f176;
$Lt_19_107522:
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	mov.f32 	%f177, %f24;
	setp.lt.ftz.f32 	%p64, %f163, %f177;
	@!%p64 bra 	$Lt_19_108034;
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	.loc	18	61	0
	xor.b32 	%r30, %r30, 1;
	.loc	18	62	0
	min.ftz.f32 	%f166, %f166, %f177;
$Lt_19_108034:
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	mov.f32 	%f178, %f26;
	setp.lt.ftz.f32 	%p65, %f163, %f178;
	@!%p65 bra 	$Lt_19_108546;
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	.loc	18	61	0
	xor.b32 	%r30, %r30, 1;
	.loc	18	62	0
	min.ftz.f32 	%f166, %f166, %f178;
$Lt_19_108546:
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	mov.f32 	%f179, %f28;
	setp.lt.ftz.f32 	%p66, %f163, %f179;
	@!%p66 bra 	$Lt_19_109058;
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	.loc	18	61	0
	xor.b32 	%r30, %r30, 1;
	.loc	18	62	0
	min.ftz.f32 	%f166, %f166, %f179;
$Lt_19_109058:
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	mov.f32 	%f180, %f30;
	setp.lt.ftz.f32 	%p67, %f163, %f180;
	@!%p67 bra 	$Lt_19_109570;
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	.loc	18	61	0
	xor.b32 	%r30, %r30, 1;
	.loc	18	62	0
	min.ftz.f32 	%f166, %f166, %f180;
$Lt_19_109570:
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	mov.f32 	%f181, %f32;
	setp.lt.ftz.f32 	%p68, %f163, %f181;
	@!%p68 bra 	$Lt_19_110082;
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	.loc	18	61	0
	xor.b32 	%r30, %r30, 1;
	.loc	18	62	0
	min.ftz.f32 	%f166, %f166, %f181;
$Lt_19_110082:
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	mov.s32 	%r31, 0;
	setp.eq.s32 	%p69, %r30, %r31;
	add.s32 	%r32, %r29, 8;
	cvt.rn.f32.s32 	%f182, %r32;
	setp.le.ftz.f32 	%p70, %f182, %f166;
	@!%p70 bra 	$Lt_19_110850;
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	@!%p69 bra 	$Lt_19_110594;
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	.loc	18	68	0
	mov.u32 	%r33, 0;
	st.global.u16 	[%rd4+6], %r33;
	mov.u32 	%r34, 0;
	st.global.u16 	[%rd4+14], %r34;
	mov.u32 	%r35, 0;
	st.global.u16 	[%rd4+22], %r35;
	mov.u32 	%r36, 0;
	st.global.u16 	[%rd4+30], %r36;
	mov.u32 	%r37, 0;
	st.global.u16 	[%rd4+38], %r37;
	mov.u32 	%r38, 0;
	st.global.u16 	[%rd4+46], %r38;
	mov.u32 	%r39, 0;
	st.global.u16 	[%rd4+54], %r39;
	mov.u32 	%r40, 0;
	st.global.u16 	[%rd4+62], %r40;
	bra.uni 	$Lt_19_110594;
$Lt_19_110850:
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	@!%p69 bra 	$Lt_19_111618;
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	.loc	18	72	0
	mov.u32 	%r41, 0;
	st.global.u16 	[%rd4+6], %r41;
$Lt_19_111618:
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	mov.s32 	%r42, 1;
$Lt_19_112642:
 //<loop> Loop body line 72, nesting depth: 2, iterations: 7
	.loc	18	75	0
	add.s32 	%r43, %r42, %r29;
	cvt.rn.f32.s32 	%f183, %r43;
	mov.f32 	%f184, %f10;
	setp.gt.ftz.f32 	%p71, %f184, %f183;
	mov.f32 	%f185, %f8;
	setp.gt.ftz.f32 	%p72, %f185, %f183;
	mov.f32 	%f186, %f6;
	setp.gt.ftz.f32 	%p73, %f186, %f183;
	mov.f32 	%f187, %f4;
	setp.gt.ftz.f32 	%p74, %f187, %f183;
	mov.f32 	%f188, %f2;
	setp.gt.ftz.f32 	%p75, %f188, %f183;
	selp.s32 	%r44, 1, 0, %p75;
	xor.b32 	%r45, %r44, 1;
	selp.s32 	%r46, %r45, %r44, %p74;
	xor.b32 	%r47, %r46, 1;
	selp.s32 	%r48, %r47, %r46, %p73;
	xor.b32 	%r49, %r48, 1;
	selp.s32 	%r50, %r49, %r48, %p72;
	xor.b32 	%r51, %r50, 1;
	selp.s32 	%r52, %r51, %r50, %p71;
	xor.b32 	%r53, %r52, 1;
	mov.f32 	%f189, %f12;
	setp.gt.ftz.f32 	%p76, %f189, %f183;
	selp.s32 	%r54, %r53, %r52, %p76;
	xor.b32 	%r55, %r54, 1;
	mov.f32 	%f190, %f14;
	setp.gt.ftz.f32 	%p77, %f190, %f183;
	selp.s32 	%r56, %r55, %r54, %p77;
	xor.b32 	%r57, %r56, 1;
	mov.f32 	%f191, %f16;
	setp.gt.ftz.f32 	%p78, %f191, %f183;
	selp.s32 	%r58, %r57, %r56, %p78;
	xor.b32 	%r59, %r58, 1;
	mov.f32 	%f192, %f18;
	setp.gt.ftz.f32 	%p79, %f192, %f183;
	selp.s32 	%r60, %r59, %r58, %p79;
	xor.b32 	%r61, %r60, 1;
	mov.f32 	%f193, %f20;
	setp.gt.ftz.f32 	%p80, %f193, %f183;
	selp.s32 	%r62, %r61, %r60, %p80;
	xor.b32 	%r63, %r62, 1;
	mov.f32 	%f194, %f22;
	setp.gt.ftz.f32 	%p81, %f194, %f183;
	selp.s32 	%r64, %r63, %r62, %p81;
	xor.b32 	%r65, %r64, 1;
	mov.f32 	%f195, %f24;
	setp.gt.ftz.f32 	%p82, %f195, %f183;
	selp.s32 	%r66, %r65, %r64, %p82;
	xor.b32 	%r67, %r66, 1;
	mov.f32 	%f196, %f26;
	setp.gt.ftz.f32 	%p83, %f196, %f183;
	selp.s32 	%r68, %r67, %r66, %p83;
	xor.b32 	%r69, %r68, 1;
	mov.f32 	%f197, %f28;
	setp.gt.ftz.f32 	%p84, %f197, %f183;
	selp.s32 	%r70, %r69, %r68, %p84;
	xor.b32 	%r71, %r70, 1;
	mov.f32 	%f198, %f30;
	setp.gt.ftz.f32 	%p85, %f198, %f183;
	selp.s32 	%r30, %r71, %r70, %p85;
	xor.b32 	%r72, %r30, 1;
	mov.f32 	%f199, %f32;
	setp.gt.ftz.f32 	%p86, %f199, %f183;
	selp.s32 	%r73, %r72, %r30, %p86;
	mov.u32 	%r74, 0;
	setp.ne.s32 	%p87, %r73, %r74;
	@%p87 bra 	$Lt_19_112898;
 //<loop> Part of loop body line 72, head labeled $Lt_19_112642
	.loc	18	76	0
	mov.u32 	%r75, 0;
	cvt.s64.s32 	%rd5, %r42;
	mul.wide.s32 	%rd6, %r42, 8;
	add.u64 	%rd7, %rd4, %rd6;
	st.global.u16 	[%rd7+6], %r75;
$Lt_19_112898:
 //<loop> Part of loop body line 72, head labeled $Lt_19_112642
	add.s32 	%r42, %r42, 1;
	mov.u32 	%r76, 8;
	setp.ne.s32 	%p88, %r42, %r76;
	@%p88 bra 	$Lt_19_112642;
$Lt_19_110594:
 //<loop> Part of loop body line 52, head labeled $Lt_19_102146
	.loc	18	80	0
	add.u64 	%rd4, %rd4, 64;
	mov.s32 	%r29, %r32;
	mov.u32 	%r77, 31;
	setp.le.s32 	%p89, %r32, %r77;
	@%p89 bra 	$Lt_19_102146;
	bra.uni 	$LBB318_cuda_kernel_bitmask;
$Lt_19_93186:
	mov.u32 	%r78, 8;
	setp.ne.s32 	%p90, %r20, %r78;
	@%p90 bra 	$Lt_19_114178;
	.loc	18	101	0
	sub.s32 	%r24, %r12, 32;
	setp.gt.s32 	%p4, %r11, %r24;
	selp.s32 	%r25, %r24, %r11, %p4;
	ld.param.u64 	%rd8, [__cudaparm_cuda_kernel_bitmask_dstFrame];
	ld.param.s32 	%r79, [__cudaparm_cuda_kernel_bitmask_dstPitch];
	mul.lo.s32 	%r80, %r79, %r10;
	add.s32 	%r81, %r25, %r80;
	cvt.s64.s32 	%rd9, %r81;
	mul.wide.s32 	%rd10, %r81, 8;
	add.u64 	%rd11, %rd8, %rd10;
	.loc	18	51	0
	mov.f32 	%f200, 0fbf800000;   	// -1
	mov.f32 	%f201, %f200;
	mov.f32 	%f202, 0fbf800000;   	// -1
	mov.f32 	%f203, %f202;
	mov.f32 	%f204, 0fbf800000;   	// -1
	mov.f32 	%f205, %f204;
	mov.f32 	%f206, 0fbf800000;   	// -1
	mov.f32 	%f207, %f206;
	mov.f32 	%f208, 0fbf800000;   	// -1
	mov.f32 	%f209, %f208;
	mov.f32 	%f210, 0fbf800000;   	// -1
	mov.f32 	%f211, %f210;
	mov.f32 	%f212, 0fbf800000;   	// -1
	mov.f32 	%f213, %f212;
	mov.f32 	%f214, 0fbf800000;   	// -1
	mov.f32 	%f215, %f214;
	.loc	18	24	0
	cvt.rn.f32.s32 	%f33, %r10;
	ld.global.f32 	%f34, [polyPoints+4];
	ld.global.f32 	%f35, [polyPoints+16];
	setp.ge.ftz.f32 	%p91, %f33, %f34;
	setp.lt.ftz.f32 	%p92, %f33, %f35;
	xor.pred 	%p93, %p91, %p92;
	@%p93 bra 	$Lt_19_114434;
	.loc	18	41	0
	cvt.rn.f32.s32 	%f216, %r25;
	ld.global.f32 	%f217, [polyPoints+0];
	sub.ftz.f32 	%f218, %f216, %f217;
	ld.global.f32 	%f219, [polyPoints+8];
	sub.ftz.f32 	%f220, %f33, %f34;
	mul.ftz.f32 	%f221, %f219, %f220;
	sub.ftz.f32 	%f222, %f221, %f218;
	mov.f32 	%f201, %f222;
$Lt_19_114434:
	ld.global.f32 	%f43, [polyPoints+28];
	setp.ge.ftz.f32 	%p94, %f33, %f35;
	setp.lt.ftz.f32 	%p95, %f33, %f43;
	xor.pred 	%p96, %p94, %p95;
	@%p96 bra 	$Lt_19_114946;
	cvt.rn.f32.s32 	%f223, %r25;
	ld.global.f32 	%f224, [polyPoints+12];
	sub.ftz.f32 	%f225, %f223, %f224;
	ld.global.f32 	%f226, [polyPoints+20];
	sub.ftz.f32 	%f227, %f33, %f35;
	mul.ftz.f32 	%f228, %f226, %f227;
	sub.ftz.f32 	%f229, %f228, %f225;
	mov.f32 	%f203, %f229;
$Lt_19_114946:
	ld.global.f32 	%f51, [polyPoints+40];
	setp.ge.ftz.f32 	%p97, %f33, %f43;
	setp.lt.ftz.f32 	%p98, %f33, %f51;
	xor.pred 	%p99, %p97, %p98;
	@%p99 bra 	$Lt_19_115458;
	cvt.rn.f32.s32 	%f230, %r25;
	ld.global.f32 	%f231, [polyPoints+24];
	sub.ftz.f32 	%f232, %f230, %f231;
	ld.global.f32 	%f233, [polyPoints+32];
	sub.ftz.f32 	%f234, %f33, %f43;
	mul.ftz.f32 	%f235, %f233, %f234;
	sub.ftz.f32 	%f236, %f235, %f232;
	mov.f32 	%f205, %f236;
$Lt_19_115458:
	ld.global.f32 	%f237, [polyPoints+52];
	setp.ge.ftz.f32 	%p100, %f33, %f51;
	setp.lt.ftz.f32 	%p101, %f33, %f237;
	xor.pred 	%p102, %p100, %p101;
	@%p102 bra 	$Lt_19_115970;
	cvt.rn.f32.s32 	%f238, %r25;
	ld.global.f32 	%f239, [polyPoints+36];
	sub.ftz.f32 	%f240, %f238, %f239;
	ld.global.f32 	%f241, [polyPoints+44];
	sub.ftz.f32 	%f242, %f33, %f51;
	mul.ftz.f32 	%f243, %f241, %f242;
	sub.ftz.f32 	%f244, %f243, %f240;
	mov.f32 	%f207, %f244;
$Lt_19_115970:
	ld.global.f32 	%f245, [polyPoints+64];
	setp.ge.ftz.f32 	%p103, %f33, %f237;
	setp.lt.ftz.f32 	%p104, %f33, %f245;
	xor.pred 	%p105, %p103, %p104;
	@%p105 bra 	$Lt_19_116482;
	cvt.rn.f32.s32 	%f246, %r25;
	ld.global.f32 	%f247, [polyPoints+48];
	sub.ftz.f32 	%f248, %f246, %f247;
	ld.global.f32 	%f249, [polyPoints+56];
	sub.ftz.f32 	%f250, %f33, %f237;
	mul.ftz.f32 	%f251, %f249, %f250;
	sub.ftz.f32 	%f252, %f251, %f248;
	mov.f32 	%f209, %f252;
$Lt_19_116482:
	ld.global.f32 	%f253, [polyPoints+76];
	setp.ge.ftz.f32 	%p106, %f33, %f245;
	setp.lt.ftz.f32 	%p107, %f33, %f253;
	xor.pred 	%p108, %p106, %p107;
	@%p108 bra 	$Lt_19_116994;
	cvt.rn.f32.s32 	%f254, %r25;
	ld.global.f32 	%f255, [polyPoints+60];
	sub.ftz.f32 	%f256, %f254, %f255;
	ld.global.f32 	%f257, [polyPoints+68];
	sub.ftz.f32 	%f258, %f33, %f245;
	mul.ftz.f32 	%f259, %f257, %f258;
	sub.ftz.f32 	%f260, %f259, %f256;
	mov.f32 	%f211, %f260;
$Lt_19_116994:
	ld.global.f32 	%f261, [polyPoints+88];
	setp.ge.ftz.f32 	%p109, %f33, %f253;
	setp.lt.ftz.f32 	%p110, %f33, %f261;
	xor.pred 	%p111, %p109, %p110;
	@%p111 bra 	$Lt_19_117506;
	cvt.rn.f32.s32 	%f262, %r25;
	ld.global.f32 	%f263, [polyPoints+72];
	sub.ftz.f32 	%f264, %f262, %f263;
	ld.global.f32 	%f265, [polyPoints+80];
	sub.ftz.f32 	%f266, %f33, %f253;
	mul.ftz.f32 	%f267, %f265, %f266;
	sub.ftz.f32 	%f268, %f267, %f264;
	mov.f32 	%f213, %f268;
$Lt_19_117506:
	setp.ge.ftz.f32 	%p112, %f33, %f261;
	ld.global.f32 	%f269, [polyPoints+100];
	setp.gt.ftz.f32 	%p113, %f269, %f33;
	xor.pred 	%p114, %p112, %p113;
	@%p114 bra 	$Lt_19_118018;
	cvt.rn.f32.s32 	%f270, %r25;
	ld.global.f32 	%f271, [polyPoints+84];
	sub.ftz.f32 	%f272, %f270, %f271;
	ld.global.f32 	%f273, [polyPoints+92];
	sub.ftz.f32 	%f274, %f33, %f261;
	mul.ftz.f32 	%f275, %f273, %f274;
	sub.ftz.f32 	%f276, %f275, %f272;
	mov.f32 	%f215, %f276;
$Lt_19_118018:
	.loc	18	52	0
	mov.s32 	%r82, 0;
$Lt_19_119042:
 //<loop> Loop body line 52, nesting depth: 1, iterations: 4
	cvt.rn.f32.s32 	%f277, %r82;
	mov.f32 	%f278, %f201;
	setp.lt.ftz.f32 	%p115, %f277, %f278;
	@!%p115 bra 	$Lt_19_119554;
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	.loc	18	62	0
	mov.f32 	%f279, 0f47c35000;   	// 100000
	min.ftz.f32 	%f280, %f278, %f279;
	mov.s32 	%r83, 1;
	bra.uni 	$Lt_19_119298;
$Lt_19_119554:
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	mov.s32 	%r83, 0;
	mov.f32 	%f280, 0f47c35000;   	// 100000
$Lt_19_119298:
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	mov.f32 	%f281, %f203;
	setp.lt.ftz.f32 	%p116, %f277, %f281;
	@!%p116 bra 	$Lt_19_119810;
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	.loc	18	61	0
	xor.b32 	%r83, %r83, 1;
	.loc	18	62	0
	min.ftz.f32 	%f280, %f280, %f281;
$Lt_19_119810:
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	mov.f32 	%f282, %f205;
	setp.lt.ftz.f32 	%p117, %f277, %f282;
	@!%p117 bra 	$Lt_19_120322;
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	.loc	18	61	0
	xor.b32 	%r83, %r83, 1;
	.loc	18	62	0
	min.ftz.f32 	%f280, %f280, %f282;
$Lt_19_120322:
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	mov.f32 	%f283, %f207;
	setp.lt.ftz.f32 	%p118, %f277, %f283;
	@!%p118 bra 	$Lt_19_120834;
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	.loc	18	61	0
	xor.b32 	%r83, %r83, 1;
	.loc	18	62	0
	min.ftz.f32 	%f280, %f280, %f283;
$Lt_19_120834:
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	mov.f32 	%f284, %f209;
	setp.lt.ftz.f32 	%p119, %f277, %f284;
	@!%p119 bra 	$Lt_19_121346;
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	.loc	18	61	0
	xor.b32 	%r83, %r83, 1;
	.loc	18	62	0
	min.ftz.f32 	%f280, %f280, %f284;
$Lt_19_121346:
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	mov.f32 	%f285, %f211;
	setp.lt.ftz.f32 	%p120, %f277, %f285;
	@!%p120 bra 	$Lt_19_121858;
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	.loc	18	61	0
	xor.b32 	%r83, %r83, 1;
	.loc	18	62	0
	min.ftz.f32 	%f280, %f280, %f285;
$Lt_19_121858:
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	mov.f32 	%f286, %f213;
	setp.lt.ftz.f32 	%p121, %f277, %f286;
	@!%p121 bra 	$Lt_19_122370;
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	.loc	18	61	0
	xor.b32 	%r83, %r83, 1;
	.loc	18	62	0
	min.ftz.f32 	%f280, %f280, %f286;
$Lt_19_122370:
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	mov.f32 	%f287, %f215;
	setp.lt.ftz.f32 	%p122, %f277, %f287;
	@!%p122 bra 	$Lt_19_122882;
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	.loc	18	61	0
	xor.b32 	%r83, %r83, 1;
	.loc	18	62	0
	min.ftz.f32 	%f280, %f280, %f287;
$Lt_19_122882:
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	mov.s32 	%r84, 0;
	setp.eq.s32 	%p123, %r83, %r84;
	add.s32 	%r85, %r82, 8;
	cvt.rn.f32.s32 	%f288, %r85;
	setp.le.ftz.f32 	%p124, %f288, %f280;
	@!%p124 bra 	$Lt_19_123650;
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	@!%p123 bra 	$Lt_19_123394;
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	.loc	18	68	0
	mov.u32 	%r86, 0;
	st.global.u16 	[%rd11+6], %r86;
	mov.u32 	%r87, 0;
	st.global.u16 	[%rd11+14], %r87;
	mov.u32 	%r88, 0;
	st.global.u16 	[%rd11+22], %r88;
	mov.u32 	%r89, 0;
	st.global.u16 	[%rd11+30], %r89;
	mov.u32 	%r90, 0;
	st.global.u16 	[%rd11+38], %r90;
	mov.u32 	%r91, 0;
	st.global.u16 	[%rd11+46], %r91;
	mov.u32 	%r92, 0;
	st.global.u16 	[%rd11+54], %r92;
	mov.u32 	%r93, 0;
	st.global.u16 	[%rd11+62], %r93;
	bra.uni 	$Lt_19_123394;
$Lt_19_123650:
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	@!%p123 bra 	$Lt_19_124418;
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	.loc	18	72	0
	mov.u32 	%r94, 0;
	st.global.u16 	[%rd11+6], %r94;
$Lt_19_124418:
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	mov.s32 	%r95, 1;
$Lt_19_125442:
 //<loop> Loop body line 72, nesting depth: 2, iterations: 7
	.loc	18	75	0
	add.s32 	%r96, %r95, %r82;
	cvt.rn.f32.s32 	%f289, %r96;
	mov.f32 	%f290, %f209;
	setp.gt.ftz.f32 	%p125, %f290, %f289;
	mov.f32 	%f291, %f207;
	setp.gt.ftz.f32 	%p126, %f291, %f289;
	mov.f32 	%f292, %f205;
	setp.gt.ftz.f32 	%p127, %f292, %f289;
	mov.f32 	%f293, %f203;
	setp.gt.ftz.f32 	%p128, %f293, %f289;
	mov.f32 	%f294, %f201;
	setp.gt.ftz.f32 	%p129, %f294, %f289;
	selp.s32 	%r97, 1, 0, %p129;
	xor.b32 	%r98, %r97, 1;
	selp.s32 	%r99, %r98, %r97, %p128;
	xor.b32 	%r100, %r99, 1;
	selp.s32 	%r101, %r100, %r99, %p127;
	xor.b32 	%r102, %r101, 1;
	selp.s32 	%r103, %r102, %r101, %p126;
	xor.b32 	%r104, %r103, 1;
	selp.s32 	%r105, %r104, %r103, %p125;
	xor.b32 	%r106, %r105, 1;
	mov.f32 	%f295, %f211;
	setp.gt.ftz.f32 	%p130, %f295, %f289;
	selp.s32 	%r107, %r106, %r105, %p130;
	xor.b32 	%r108, %r107, 1;
	mov.f32 	%f296, %f213;
	setp.gt.ftz.f32 	%p131, %f296, %f289;
	selp.s32 	%r83, %r108, %r107, %p131;
	xor.b32 	%r109, %r83, 1;
	mov.f32 	%f297, %f215;
	setp.gt.ftz.f32 	%p132, %f297, %f289;
	selp.s32 	%r110, %r109, %r83, %p132;
	mov.u32 	%r111, 0;
	setp.ne.s32 	%p133, %r110, %r111;
	@%p133 bra 	$Lt_19_125698;
 //<loop> Part of loop body line 72, head labeled $Lt_19_125442
	.loc	18	76	0
	mov.u32 	%r112, 0;
	cvt.s64.s32 	%rd12, %r95;
	mul.wide.s32 	%rd13, %r95, 8;
	add.u64 	%rd14, %rd11, %rd13;
	st.global.u16 	[%rd14+6], %r112;
$Lt_19_125698:
 //<loop> Part of loop body line 72, head labeled $Lt_19_125442
	add.s32 	%r95, %r95, 1;
	mov.u32 	%r113, 8;
	setp.ne.s32 	%p134, %r95, %r113;
	@%p134 bra 	$Lt_19_125442;
$Lt_19_123394:
 //<loop> Part of loop body line 52, head labeled $Lt_19_119042
	.loc	18	80	0
	add.u64 	%rd11, %rd11, 64;
	mov.s32 	%r82, %r85;
	mov.u32 	%r114, 31;
	setp.le.s32 	%p135, %r85, %r114;
	@%p135 bra 	$Lt_19_119042;
	bra.uni 	$LBB318_cuda_kernel_bitmask;
$Lt_19_114178:
	mov.u32 	%r115, 4;
	setp.ne.s32 	%p136, %r20, %r115;
	@%p136 bra 	$LBB318_cuda_kernel_bitmask;
	.loc	18	102	0
	sub.s32 	%r24, %r12, 32;
	setp.gt.s32 	%p4, %r11, %r24;
	selp.s32 	%r25, %r24, %r11, %p4;
	ld.param.u64 	%rd15, [__cudaparm_cuda_kernel_bitmask_dstFrame];
	ld.param.s32 	%r116, [__cudaparm_cuda_kernel_bitmask_dstPitch];
	mul.lo.s32 	%r117, %r116, %r10;
	add.s32 	%r118, %r25, %r117;
	cvt.s64.s32 	%rd16, %r118;
	mul.wide.s32 	%rd17, %r118, 8;
	add.u64 	%rd18, %rd15, %rd17;
	.loc	18	51	0
	mov.f32 	%f298, 0fbf800000;   	// -1
	mov.f32 	%f299, %f298;
	mov.f32 	%f300, 0fbf800000;   	// -1
	mov.f32 	%f301, %f300;
	mov.f32 	%f302, 0fbf800000;   	// -1
	mov.f32 	%f303, %f302;
	mov.f32 	%f304, 0fbf800000;   	// -1
	mov.f32 	%f305, %f304;
	.loc	18	24	0
	cvt.rn.f32.s32 	%f33, %r10;
	ld.global.f32 	%f34, [polyPoints+4];
	ld.global.f32 	%f35, [polyPoints+16];
	setp.ge.ftz.f32 	%p137, %f33, %f34;
	setp.lt.ftz.f32 	%p138, %f33, %f35;
	xor.pred 	%p139, %p137, %p138;
	@%p139 bra 	$Lt_19_127234;
	.loc	18	41	0
	cvt.rn.f32.s32 	%f306, %r25;
	ld.global.f32 	%f307, [polyPoints+0];
	sub.ftz.f32 	%f308, %f306, %f307;
	ld.global.f32 	%f309, [polyPoints+8];
	sub.ftz.f32 	%f310, %f33, %f34;
	mul.ftz.f32 	%f311, %f309, %f310;
	sub.ftz.f32 	%f312, %f311, %f308;
	mov.f32 	%f299, %f312;
$Lt_19_127234:
	ld.global.f32 	%f43, [polyPoints+28];
	setp.ge.ftz.f32 	%p140, %f33, %f35;
	setp.lt.ftz.f32 	%p141, %f33, %f43;
	xor.pred 	%p142, %p140, %p141;
	@%p142 bra 	$Lt_19_127746;
	cvt.rn.f32.s32 	%f313, %r25;
	ld.global.f32 	%f314, [polyPoints+12];
	sub.ftz.f32 	%f315, %f313, %f314;
	ld.global.f32 	%f316, [polyPoints+20];
	sub.ftz.f32 	%f317, %f33, %f35;
	mul.ftz.f32 	%f318, %f316, %f317;
	sub.ftz.f32 	%f319, %f318, %f315;
	mov.f32 	%f301, %f319;
$Lt_19_127746:
	ld.global.f32 	%f51, [polyPoints+40];
	setp.ge.ftz.f32 	%p143, %f33, %f43;
	setp.lt.ftz.f32 	%p144, %f33, %f51;
	xor.pred 	%p145, %p143, %p144;
	@%p145 bra 	$Lt_19_128258;
	cvt.rn.f32.s32 	%f320, %r25;
	ld.global.f32 	%f321, [polyPoints+24];
	sub.ftz.f32 	%f322, %f320, %f321;
	ld.global.f32 	%f323, [polyPoints+32];
	sub.ftz.f32 	%f324, %f33, %f43;
	mul.ftz.f32 	%f325, %f323, %f324;
	sub.ftz.f32 	%f326, %f325, %f322;
	mov.f32 	%f303, %f326;
$Lt_19_128258:
	setp.ge.ftz.f32 	%p146, %f33, %f51;
	ld.global.f32 	%f327, [polyPoints+52];
	setp.gt.ftz.f32 	%p147, %f327, %f33;
	xor.pred 	%p148, %p146, %p147;
	@%p148 bra 	$Lt_19_128770;
	cvt.rn.f32.s32 	%f328, %r25;
	ld.global.f32 	%f329, [polyPoints+36];
	sub.ftz.f32 	%f330, %f328, %f329;
	ld.global.f32 	%f331, [polyPoints+44];
	sub.ftz.f32 	%f332, %f33, %f51;
	mul.ftz.f32 	%f333, %f331, %f332;
	sub.ftz.f32 	%f334, %f333, %f330;
	mov.f32 	%f305, %f334;
$Lt_19_128770:
	.loc	18	52	0
	mov.s32 	%r119, 0;
$Lt_19_129794:
 //<loop> Loop body line 52, nesting depth: 1, iterations: 4
	cvt.rn.f32.s32 	%f335, %r119;
	mov.f32 	%f336, %f299;
	setp.lt.ftz.f32 	%p149, %f335, %f336;
	@!%p149 bra 	$Lt_19_130306;
 //<loop> Part of loop body line 52, head labeled $Lt_19_129794
	.loc	18	62	0
	mov.f32 	%f337, 0f47c35000;   	// 100000
	min.ftz.f32 	%f338, %f336, %f337;
	mov.s32 	%r120, 1;
	bra.uni 	$Lt_19_130050;
$Lt_19_130306:
 //<loop> Part of loop body line 52, head labeled $Lt_19_129794
	mov.s32 	%r120, 0;
	mov.f32 	%f338, 0f47c35000;   	// 100000
$Lt_19_130050:
 //<loop> Part of loop body line 52, head labeled $Lt_19_129794
	mov.f32 	%f339, %f301;
	setp.lt.ftz.f32 	%p150, %f335, %f339;
	@!%p150 bra 	$Lt_19_130562;
 //<loop> Part of loop body line 52, head labeled $Lt_19_129794
	.loc	18	61	0
	xor.b32 	%r120, %r120, 1;
	.loc	18	62	0
	min.ftz.f32 	%f338, %f338, %f339;
$Lt_19_130562:
 //<loop> Part of loop body line 52, head labeled $Lt_19_129794
	mov.f32 	%f340, %f303;
	setp.lt.ftz.f32 	%p151, %f335, %f340;
	@!%p151 bra 	$Lt_19_131074;
 //<loop> Part of loop body line 52, head labeled $Lt_19_129794
	.loc	18	61	0
	xor.b32 	%r120, %r120, 1;
	.loc	18	62	0
	min.ftz.f32 	%f338, %f338, %f340;
$Lt_19_131074:
 //<loop> Part of loop body line 52, head labeled $Lt_19_129794
	mov.f32 	%f341, %f305;
	setp.lt.ftz.f32 	%p152, %f335, %f341;
	@!%p152 bra 	$Lt_19_131586;
 //<loop> Part of loop body line 52, head labeled $Lt_19_129794
	.loc	18	61	0
	xor.b32 	%r120, %r120, 1;
	.loc	18	62	0
	min.ftz.f32 	%f338, %f338, %f341;
$Lt_19_131586:
 //<loop> Part of loop body line 52, head labeled $Lt_19_129794
	mov.s32 	%r121, 0;
	setp.eq.s32 	%p153, %r120, %r121;
	add.s32 	%r122, %r119, 8;
	cvt.rn.f32.s32 	%f342, %r122;
	setp.le.ftz.f32 	%p154, %f342, %f338;
	@!%p154 bra 	$Lt_19_132354;
 //<loop> Part of loop body line 52, head labeled $Lt_19_129794
	@!%p153 bra 	$Lt_19_132098;
 //<loop> Part of loop body line 52, head labeled $Lt_19_129794
	.loc	18	68	0
	mov.u32 	%r123, 0;
	st.global.u16 	[%rd18+6], %r123;
	mov.u32 	%r124, 0;
	st.global.u16 	[%rd18+14], %r124;
	mov.u32 	%r125, 0;
	st.global.u16 	[%rd18+22], %r125;
	mov.u32 	%r126, 0;
	st.global.u16 	[%rd18+30], %r126;
	mov.u32 	%r127, 0;
	st.global.u16 	[%rd18+38], %r127;
	mov.u32 	%r128, 0;
	st.global.u16 	[%rd18+46], %r128;
	mov.u32 	%r129, 0;
	st.global.u16 	[%rd18+54], %r129;
	mov.u32 	%r130, 0;
	st.global.u16 	[%rd18+62], %r130;
	bra.uni 	$Lt_19_132098;
$Lt_19_132354:
 //<loop> Part of loop body line 52, head labeled $Lt_19_129794
	@!%p153 bra 	$Lt_19_133122;
 //<loop> Part of loop body line 52, head labeled $Lt_19_129794
	.loc	18	72	0
	mov.u32 	%r131, 0;
	st.global.u16 	[%rd18+6], %r131;
$Lt_19_133122:
 //<loop> Part of loop body line 52, head labeled $Lt_19_129794
	mov.s32 	%r132, 1;
$Lt_19_134146:
 //<loop> Loop body line 72, nesting depth: 2, iterations: 7
	add.s32 	%r133, %r132, %r119;
	cvt.rn.f32.s32 	%f343, %r133;
	mov.f32 	%f344, %f303;
	setp.gt.ftz.f32 	%p155, %f344, %f343;
	mov.f32 	%f345, %f301;
	setp.gt.ftz.f32 	%p156, %f345, %f343;
	mov.f32 	%f346, %f299;
	setp.gt.ftz.f32 	%p157, %f346, %f343;
	selp.s32 	%r134, 1, 0, %p157;
	xor.b32 	%r135, %r134, 1;
	selp.s32 	%r136, %r135, %r134, %p156;
	xor.b32 	%r137, %r136, 1;
	selp.s32 	%r138, %r137, %r136, %p155;
	xor.b32 	%r139, %r138, 1;
	mov.f32 	%f347, %f305;
	setp.gt.ftz.f32 	%p158, %f347, %f343;
	selp.s32 	%r140, %r139, %r138, %p158;
	mov.u32 	%r141, 0;
	setp.ne.s32 	%p159, %r140, %r141;
	@%p159 bra 	$Lt_19_134402;
 //<loop> Part of loop body line 72, head labeled $Lt_19_134146
	.loc	18	76	0
	mov.u32 	%r142, 0;
	cvt.s64.s32 	%rd19, %r132;
	mul.wide.s32 	%rd20, %r132, 8;
	add.u64 	%rd21, %rd18, %rd20;
	st.global.u16 	[%rd21+6], %r142;
$Lt_19_134402:
 //<loop> Part of loop body line 72, head labeled $Lt_19_134146
	add.s32 	%r132, %r132, 1;
	mov.u32 	%r143, 8;
	setp.ne.s32 	%p160, %r132, %r143;
	@%p160 bra 	$Lt_19_134146;
$Lt_19_132098:
 //<loop> Part of loop body line 52, head labeled $Lt_19_129794
	.loc	18	80	0
	add.u64 	%rd18, %rd18, 64;
	mov.s32 	%r119, %r122;
	mov.u32 	%r144, 31;
	setp.le.s32 	%p161, %r122, %r144;
	@%p161 bra 	$Lt_19_129794;
	bra.uni 	$LBB318_cuda_kernel_bitmask;
$Lt_19_92674:
	.loc	18	106	0
	@!%p2 bra 	$Lt_19_135682;
	.loc	18	108	0
	sub.s32 	%r24, %r12, 32;
	setp.gt.s32 	%p4, %r11, %r24;
	selp.s32 	%r25, %r24, %r11, %p4;
	ld.param.u64 	%rd22, [__cudaparm_cuda_kernel_bitmask_dstFrame];
	ld.param.s32 	%r145, [__cudaparm_cuda_kernel_bitmask_dstPitch];
	mul.lo.s32 	%r146, %r145, %r10;
	add.s32 	%r147, %r25, %r146;
	cvt.s64.s32 	%rd23, %r147;
	mul.wide.s32 	%rd24, %r147, 16;
	add.u64 	%rd25, %rd22, %rd24;
	.loc	18	51	0
	mov.f32 	%f348, 0fbf800000;   	// -1
	mov.f32 	%f349, %f348;
	mov.f32 	%f350, 0fbf800000;   	// -1
	mov.f32 	%f351, %f350;
	mov.f32 	%f352, 0fbf800000;   	// -1
	mov.f32 	%f353, %f352;
	mov.f32 	%f354, 0fbf800000;   	// -1
	mov.f32 	%f355, %f354;
	mov.f32 	%f356, 0fbf800000;   	// -1
	mov.f32 	%f357, %f356;
	mov.f32 	%f358, 0fbf800000;   	// -1
	mov.f32 	%f359, %f358;
	mov.f32 	%f360, 0fbf800000;   	// -1
	mov.f32 	%f361, %f360;
	mov.f32 	%f362, 0fbf800000;   	// -1
	mov.f32 	%f363, %f362;
	mov.f32 	%f364, 0fbf800000;   	// -1
	mov.f32 	%f365, %f364;
	mov.f32 	%f366, 0fbf800000;   	// -1
	mov.f32 	%f367, %f366;
	mov.f32 	%f368, 0fbf800000;   	// -1
	mov.f32 	%f369, %f368;
	mov.f32 	%f370, 0fbf800000;   	// -1
	mov.f32 	%f371, %f370;
	mov.f32 	%f372, 0fbf800000;   	// -1
	mov.f32 	%f373, %f372;
	mov.f32 	%f374, 0fbf800000;   	// -1
	mov.f32 	%f375, %f374;
	mov.f32 	%f376, 0fbf800000;   	// -1
	mov.f32 	%f377, %f376;
	mov.f32 	%f378, 0fbf800000;   	// -1
	mov.f32 	%f379, %f378;
	.loc	18	24	0
	cvt.rn.f32.s32 	%f33, %r10;
	ld.global.f32 	%f34, [polyPoints+4];
	ld.global.f32 	%f35, [polyPoints+16];
	setp.ge.ftz.f32 	%p162, %f33, %f34;
	setp.lt.ftz.f32 	%p163, %f33, %f35;
	xor.pred 	%p164, %p162, %p163;
	@%p164 bra 	$Lt_19_135938;
	.loc	18	41	0
	cvt.rn.f32.s32 	%f380, %r25;
	ld.global.f32 	%f381, [polyPoints+0];
	sub.ftz.f32 	%f382, %f380, %f381;
	ld.global.f32 	%f383, [polyPoints+8];
	sub.ftz.f32 	%f384, %f33, %f34;
	mul.ftz.f32 	%f385, %f383, %f384;
	sub.ftz.f32 	%f386, %f385, %f382;
	mov.f32 	%f349, %f386;
$Lt_19_135938:
	ld.global.f32 	%f43, [polyPoints+28];
	setp.ge.ftz.f32 	%p165, %f33, %f35;
	setp.lt.ftz.f32 	%p166, %f33, %f43;
	xor.pred 	%p167, %p165, %p166;
	@%p167 bra 	$Lt_19_136450;
	cvt.rn.f32.s32 	%f387, %r25;
	ld.global.f32 	%f388, [polyPoints+12];
	sub.ftz.f32 	%f389, %f387, %f388;
	ld.global.f32 	%f390, [polyPoints+20];
	sub.ftz.f32 	%f391, %f33, %f35;
	mul.ftz.f32 	%f392, %f390, %f391;
	sub.ftz.f32 	%f393, %f392, %f389;
	mov.f32 	%f351, %f393;
$Lt_19_136450:
	ld.global.f32 	%f51, [polyPoints+40];
	setp.ge.ftz.f32 	%p168, %f33, %f43;
	setp.lt.ftz.f32 	%p169, %f33, %f51;
	xor.pred 	%p170, %p168, %p169;
	@%p170 bra 	$Lt_19_136962;
	cvt.rn.f32.s32 	%f394, %r25;
	ld.global.f32 	%f395, [polyPoints+24];
	sub.ftz.f32 	%f396, %f394, %f395;
	ld.global.f32 	%f397, [polyPoints+32];
	sub.ftz.f32 	%f398, %f33, %f43;
	mul.ftz.f32 	%f399, %f397, %f398;
	sub.ftz.f32 	%f400, %f399, %f396;
	mov.f32 	%f353, %f400;
$Lt_19_136962:
	ld.global.f32 	%f401, [polyPoints+52];
	setp.ge.ftz.f32 	%p171, %f33, %f51;
	setp.lt.ftz.f32 	%p172, %f33, %f401;
	xor.pred 	%p173, %p171, %p172;
	@%p173 bra 	$Lt_19_137474;
	cvt.rn.f32.s32 	%f402, %r25;
	ld.global.f32 	%f403, [polyPoints+36];
	sub.ftz.f32 	%f404, %f402, %f403;
	ld.global.f32 	%f405, [polyPoints+44];
	sub.ftz.f32 	%f406, %f33, %f51;
	mul.ftz.f32 	%f407, %f405, %f406;
	sub.ftz.f32 	%f408, %f407, %f404;
	mov.f32 	%f355, %f408;
$Lt_19_137474:
	ld.global.f32 	%f409, [polyPoints+64];
	setp.ge.ftz.f32 	%p174, %f33, %f401;
	setp.lt.ftz.f32 	%p175, %f33, %f409;
	xor.pred 	%p176, %p174, %p175;
	@%p176 bra 	$Lt_19_137986;
	cvt.rn.f32.s32 	%f410, %r25;
	ld.global.f32 	%f411, [polyPoints+48];
	sub.ftz.f32 	%f412, %f410, %f411;
	ld.global.f32 	%f413, [polyPoints+56];
	sub.ftz.f32 	%f414, %f33, %f401;
	mul.ftz.f32 	%f415, %f413, %f414;
	sub.ftz.f32 	%f416, %f415, %f412;
	mov.f32 	%f357, %f416;
$Lt_19_137986:
	ld.global.f32 	%f417, [polyPoints+76];
	setp.ge.ftz.f32 	%p177, %f33, %f409;
	setp.lt.ftz.f32 	%p178, %f33, %f417;
	xor.pred 	%p179, %p177, %p178;
	@%p179 bra 	$Lt_19_138498;
	cvt.rn.f32.s32 	%f418, %r25;
	ld.global.f32 	%f419, [polyPoints+60];
	sub.ftz.f32 	%f420, %f418, %f419;
	ld.global.f32 	%f421, [polyPoints+68];
	sub.ftz.f32 	%f422, %f33, %f409;
	mul.ftz.f32 	%f423, %f421, %f422;
	sub.ftz.f32 	%f424, %f423, %f420;
	mov.f32 	%f359, %f424;
$Lt_19_138498:
	ld.global.f32 	%f425, [polyPoints+88];
	setp.ge.ftz.f32 	%p180, %f33, %f417;
	setp.lt.ftz.f32 	%p181, %f33, %f425;
	xor.pred 	%p182, %p180, %p181;
	@%p182 bra 	$Lt_19_139010;
	cvt.rn.f32.s32 	%f426, %r25;
	ld.global.f32 	%f427, [polyPoints+72];
	sub.ftz.f32 	%f428, %f426, %f427;
	ld.global.f32 	%f429, [polyPoints+80];
	sub.ftz.f32 	%f430, %f33, %f417;
	mul.ftz.f32 	%f431, %f429, %f430;
	sub.ftz.f32 	%f432, %f431, %f428;
	mov.f32 	%f361, %f432;
$Lt_19_139010:
	ld.global.f32 	%f433, [polyPoints+100];
	setp.ge.ftz.f32 	%p183, %f33, %f425;
	setp.lt.ftz.f32 	%p184, %f33, %f433;
	xor.pred 	%p185, %p183, %p184;
	@%p185 bra 	$Lt_19_139522;
	cvt.rn.f32.s32 	%f434, %r25;
	ld.global.f32 	%f435, [polyPoints+84];
	sub.ftz.f32 	%f436, %f434, %f435;
	ld.global.f32 	%f437, [polyPoints+92];
	sub.ftz.f32 	%f438, %f33, %f425;
	mul.ftz.f32 	%f439, %f437, %f438;
	sub.ftz.f32 	%f440, %f439, %f436;
	mov.f32 	%f363, %f440;
$Lt_19_139522:
	ld.global.f32 	%f441, [polyPoints+112];
	setp.ge.ftz.f32 	%p186, %f33, %f433;
	setp.lt.ftz.f32 	%p187, %f33, %f441;
	xor.pred 	%p188, %p186, %p187;
	@%p188 bra 	$Lt_19_140034;
	cvt.rn.f32.s32 	%f442, %r25;
	ld.global.f32 	%f443, [polyPoints+96];
	sub.ftz.f32 	%f444, %f442, %f443;
	ld.global.f32 	%f445, [polyPoints+104];
	sub.ftz.f32 	%f446, %f33, %f433;
	mul.ftz.f32 	%f447, %f445, %f446;
	sub.ftz.f32 	%f448, %f447, %f444;
	mov.f32 	%f365, %f448;
$Lt_19_140034:
	ld.global.f32 	%f449, [polyPoints+124];
	setp.ge.ftz.f32 	%p189, %f33, %f441;
	setp.lt.ftz.f32 	%p190, %f33, %f449;
	xor.pred 	%p191, %p189, %p190;
	@%p191 bra 	$Lt_19_140546;
	cvt.rn.f32.s32 	%f450, %r25;
	ld.global.f32 	%f451, [polyPoints+108];
	sub.ftz.f32 	%f452, %f450, %f451;
	ld.global.f32 	%f453, [polyPoints+116];
	sub.ftz.f32 	%f454, %f33, %f441;
	mul.ftz.f32 	%f455, %f453, %f454;
	sub.ftz.f32 	%f456, %f455, %f452;
	mov.f32 	%f367, %f456;
$Lt_19_140546:
	ld.global.f32 	%f457, [polyPoints+136];
	setp.ge.ftz.f32 	%p192, %f33, %f449;
	setp.lt.ftz.f32 	%p193, %f33, %f457;
	xor.pred 	%p194, %p192, %p193;
	@%p194 bra 	$Lt_19_141058;
	cvt.rn.f32.s32 	%f458, %r25;
	ld.global.f32 	%f459, [polyPoints+120];
	sub.ftz.f32 	%f460, %f458, %f459;
	ld.global.f32 	%f461, [polyPoints+128];
	sub.ftz.f32 	%f462, %f33, %f449;
	mul.ftz.f32 	%f463, %f461, %f462;
	sub.ftz.f32 	%f464, %f463, %f460;
	mov.f32 	%f369, %f464;
$Lt_19_141058:
	ld.global.f32 	%f465, [polyPoints+148];
	setp.ge.ftz.f32 	%p195, %f33, %f457;
	setp.lt.ftz.f32 	%p196, %f33, %f465;
	xor.pred 	%p197, %p195, %p196;
	@%p197 bra 	$Lt_19_141570;
	cvt.rn.f32.s32 	%f466, %r25;
	ld.global.f32 	%f467, [polyPoints+132];
	sub.ftz.f32 	%f468, %f466, %f467;
	ld.global.f32 	%f469, [polyPoints+140];
	sub.ftz.f32 	%f470, %f33, %f457;
	mul.ftz.f32 	%f471, %f469, %f470;
	sub.ftz.f32 	%f472, %f471, %f468;
	mov.f32 	%f371, %f472;
$Lt_19_141570:
	ld.global.f32 	%f473, [polyPoints+160];
	setp.ge.ftz.f32 	%p198, %f33, %f465;
	setp.lt.ftz.f32 	%p199, %f33, %f473;
	xor.pred 	%p200, %p198, %p199;
	@%p200 bra 	$Lt_19_142082;
	cvt.rn.f32.s32 	%f474, %r25;
	ld.global.f32 	%f475, [polyPoints+144];
	sub.ftz.f32 	%f476, %f474, %f475;
	ld.global.f32 	%f477, [polyPoints+152];
	sub.ftz.f32 	%f478, %f33, %f465;
	mul.ftz.f32 	%f479, %f477, %f478;
	sub.ftz.f32 	%f480, %f479, %f476;
	mov.f32 	%f373, %f480;
$Lt_19_142082:
	ld.global.f32 	%f481, [polyPoints+172];
	setp.ge.ftz.f32 	%p201, %f33, %f473;
	setp.lt.ftz.f32 	%p202, %f33, %f481;
	xor.pred 	%p203, %p201, %p202;
	@%p203 bra 	$Lt_19_142594;
	cvt.rn.f32.s32 	%f482, %r25;
	ld.global.f32 	%f483, [polyPoints+156];
	sub.ftz.f32 	%f484, %f482, %f483;
	ld.global.f32 	%f485, [polyPoints+164];
	sub.ftz.f32 	%f486, %f33, %f473;
	mul.ftz.f32 	%f487, %f485, %f486;
	sub.ftz.f32 	%f488, %f487, %f484;
	mov.f32 	%f375, %f488;
$Lt_19_142594:
	ld.global.f32 	%f489, [polyPoints+184];
	setp.ge.ftz.f32 	%p204, %f33, %f481;
	setp.lt.ftz.f32 	%p205, %f33, %f489;
	xor.pred 	%p206, %p204, %p205;
	@%p206 bra 	$Lt_19_143106;
	cvt.rn.f32.s32 	%f490, %r25;
	ld.global.f32 	%f491, [polyPoints+168];
	sub.ftz.f32 	%f492, %f490, %f491;
	ld.global.f32 	%f493, [polyPoints+176];
	sub.ftz.f32 	%f494, %f33, %f481;
	mul.ftz.f32 	%f495, %f493, %f494;
	sub.ftz.f32 	%f496, %f495, %f492;
	mov.f32 	%f377, %f496;
$Lt_19_143106:
	setp.ge.ftz.f32 	%p207, %f33, %f489;
	ld.global.f32 	%f497, [polyPoints+196];
	setp.gt.ftz.f32 	%p208, %f497, %f33;
	xor.pred 	%p209, %p207, %p208;
	@%p209 bra 	$Lt_19_143618;
	cvt.rn.f32.s32 	%f498, %r25;
	ld.global.f32 	%f499, [polyPoints+180];
	sub.ftz.f32 	%f500, %f498, %f499;
	ld.global.f32 	%f501, [polyPoints+188];
	sub.ftz.f32 	%f502, %f33, %f489;
	mul.ftz.f32 	%f503, %f501, %f502;
	sub.ftz.f32 	%f504, %f503, %f500;
	mov.f32 	%f379, %f504;
$Lt_19_143618:
	.loc	18	52	0
	mov.s32 	%r148, 0;
$Lt_19_144642:
 //<loop> Loop body line 52, nesting depth: 1, iterations: 4
	cvt.rn.f32.s32 	%f505, %r148;
	mov.f32 	%f506, %f349;
	setp.lt.ftz.f32 	%p210, %f505, %f506;
	@!%p210 bra 	$Lt_19_145154;
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	.loc	18	62	0
	mov.f32 	%f507, 0f47c35000;   	// 100000
	min.ftz.f32 	%f508, %f506, %f507;
	mov.s32 	%r149, 1;
	bra.uni 	$Lt_19_144898;
$Lt_19_145154:
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	mov.s32 	%r149, 0;
	mov.f32 	%f508, 0f47c35000;   	// 100000
$Lt_19_144898:
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	mov.f32 	%f509, %f351;
	setp.lt.ftz.f32 	%p211, %f505, %f509;
	@!%p211 bra 	$Lt_19_145410;
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	.loc	18	61	0
	xor.b32 	%r149, %r149, 1;
	.loc	18	62	0
	min.ftz.f32 	%f508, %f508, %f509;
$Lt_19_145410:
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	mov.f32 	%f510, %f353;
	setp.lt.ftz.f32 	%p212, %f505, %f510;
	@!%p212 bra 	$Lt_19_145922;
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	.loc	18	61	0
	xor.b32 	%r149, %r149, 1;
	.loc	18	62	0
	min.ftz.f32 	%f508, %f508, %f510;
$Lt_19_145922:
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	mov.f32 	%f511, %f355;
	setp.lt.ftz.f32 	%p213, %f505, %f511;
	@!%p213 bra 	$Lt_19_146434;
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	.loc	18	61	0
	xor.b32 	%r149, %r149, 1;
	.loc	18	62	0
	min.ftz.f32 	%f508, %f508, %f511;
$Lt_19_146434:
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	mov.f32 	%f512, %f357;
	setp.lt.ftz.f32 	%p214, %f505, %f512;
	@!%p214 bra 	$Lt_19_146946;
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	.loc	18	61	0
	xor.b32 	%r149, %r149, 1;
	.loc	18	62	0
	min.ftz.f32 	%f508, %f508, %f512;
$Lt_19_146946:
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	mov.f32 	%f513, %f359;
	setp.lt.ftz.f32 	%p215, %f505, %f513;
	@!%p215 bra 	$Lt_19_147458;
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	.loc	18	61	0
	xor.b32 	%r149, %r149, 1;
	.loc	18	62	0
	min.ftz.f32 	%f508, %f508, %f513;
$Lt_19_147458:
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	mov.f32 	%f514, %f361;
	setp.lt.ftz.f32 	%p216, %f505, %f514;
	@!%p216 bra 	$Lt_19_147970;
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	.loc	18	61	0
	xor.b32 	%r149, %r149, 1;
	.loc	18	62	0
	min.ftz.f32 	%f508, %f508, %f514;
$Lt_19_147970:
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	mov.f32 	%f515, %f363;
	setp.lt.ftz.f32 	%p217, %f505, %f515;
	@!%p217 bra 	$Lt_19_148482;
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	.loc	18	61	0
	xor.b32 	%r149, %r149, 1;
	.loc	18	62	0
	min.ftz.f32 	%f508, %f508, %f515;
$Lt_19_148482:
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	mov.f32 	%f516, %f365;
	setp.lt.ftz.f32 	%p218, %f505, %f516;
	@!%p218 bra 	$Lt_19_148994;
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	.loc	18	61	0
	xor.b32 	%r149, %r149, 1;
	.loc	18	62	0
	min.ftz.f32 	%f508, %f508, %f516;
$Lt_19_148994:
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	mov.f32 	%f517, %f367;
	setp.lt.ftz.f32 	%p219, %f505, %f517;
	@!%p219 bra 	$Lt_19_149506;
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	.loc	18	61	0
	xor.b32 	%r149, %r149, 1;
	.loc	18	62	0
	min.ftz.f32 	%f508, %f508, %f517;
$Lt_19_149506:
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	mov.f32 	%f518, %f369;
	setp.lt.ftz.f32 	%p220, %f505, %f518;
	@!%p220 bra 	$Lt_19_150018;
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	.loc	18	61	0
	xor.b32 	%r149, %r149, 1;
	.loc	18	62	0
	min.ftz.f32 	%f508, %f508, %f518;
$Lt_19_150018:
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	mov.f32 	%f519, %f371;
	setp.lt.ftz.f32 	%p221, %f505, %f519;
	@!%p221 bra 	$Lt_19_150530;
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	.loc	18	61	0
	xor.b32 	%r149, %r149, 1;
	.loc	18	62	0
	min.ftz.f32 	%f508, %f508, %f519;
$Lt_19_150530:
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	mov.f32 	%f520, %f373;
	setp.lt.ftz.f32 	%p222, %f505, %f520;
	@!%p222 bra 	$Lt_19_151042;
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	.loc	18	61	0
	xor.b32 	%r149, %r149, 1;
	.loc	18	62	0
	min.ftz.f32 	%f508, %f508, %f520;
$Lt_19_151042:
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	mov.f32 	%f521, %f375;
	setp.lt.ftz.f32 	%p223, %f505, %f521;
	@!%p223 bra 	$Lt_19_151554;
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	.loc	18	61	0
	xor.b32 	%r149, %r149, 1;
	.loc	18	62	0
	min.ftz.f32 	%f508, %f508, %f521;
$Lt_19_151554:
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	mov.f32 	%f522, %f377;
	setp.lt.ftz.f32 	%p224, %f505, %f522;
	@!%p224 bra 	$Lt_19_152066;
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	.loc	18	61	0
	xor.b32 	%r149, %r149, 1;
	.loc	18	62	0
	min.ftz.f32 	%f508, %f508, %f522;
$Lt_19_152066:
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	mov.f32 	%f523, %f379;
	setp.lt.ftz.f32 	%p225, %f505, %f523;
	@!%p225 bra 	$Lt_19_152578;
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	.loc	18	61	0
	xor.b32 	%r149, %r149, 1;
	.loc	18	62	0
	min.ftz.f32 	%f508, %f508, %f523;
$Lt_19_152578:
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	mov.s32 	%r150, 0;
	setp.eq.s32 	%p226, %r149, %r150;
	add.s32 	%r151, %r148, 8;
	cvt.rn.f32.s32 	%f524, %r151;
	setp.le.ftz.f32 	%p227, %f524, %f508;
	@!%p227 bra 	$Lt_19_153346;
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	@!%p226 bra 	$Lt_19_153090;
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	.loc	18	68	0
	mov.f32 	%f525, 0f00000000;   	// 0
	st.global.f32 	[%rd25+12], %f525;
	mov.f32 	%f526, 0f00000000;   	// 0
	st.global.f32 	[%rd25+28], %f526;
	mov.f32 	%f527, 0f00000000;   	// 0
	st.global.f32 	[%rd25+44], %f527;
	mov.f32 	%f528, 0f00000000;   	// 0
	st.global.f32 	[%rd25+60], %f528;
	mov.f32 	%f529, 0f00000000;   	// 0
	st.global.f32 	[%rd25+76], %f529;
	mov.f32 	%f530, 0f00000000;   	// 0
	st.global.f32 	[%rd25+92], %f530;
	mov.f32 	%f531, 0f00000000;   	// 0
	st.global.f32 	[%rd25+108], %f531;
	mov.f32 	%f532, 0f00000000;   	// 0
	st.global.f32 	[%rd25+124], %f532;
	bra.uni 	$Lt_19_153090;
$Lt_19_153346:
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	@!%p226 bra 	$Lt_19_154114;
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	.loc	18	72	0
	mov.f32 	%f533, 0f00000000;   	// 0
	st.global.f32 	[%rd25+12], %f533;
$Lt_19_154114:
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	mov.s32 	%r152, 1;
$Lt_19_155138:
 //<loop> Loop body line 72, nesting depth: 2, iterations: 7
	.loc	18	75	0
	add.s32 	%r153, %r152, %r148;
	cvt.rn.f32.s32 	%f534, %r153;
	mov.f32 	%f535, %f357;
	setp.gt.ftz.f32 	%p228, %f535, %f534;
	mov.f32 	%f536, %f355;
	setp.gt.ftz.f32 	%p229, %f536, %f534;
	mov.f32 	%f537, %f353;
	setp.gt.ftz.f32 	%p230, %f537, %f534;
	mov.f32 	%f538, %f351;
	setp.gt.ftz.f32 	%p231, %f538, %f534;
	mov.f32 	%f539, %f349;
	setp.gt.ftz.f32 	%p232, %f539, %f534;
	selp.s32 	%r154, 1, 0, %p232;
	xor.b32 	%r155, %r154, 1;
	selp.s32 	%r156, %r155, %r154, %p231;
	xor.b32 	%r157, %r156, 1;
	selp.s32 	%r158, %r157, %r156, %p230;
	xor.b32 	%r159, %r158, 1;
	selp.s32 	%r160, %r159, %r158, %p229;
	xor.b32 	%r161, %r160, 1;
	selp.s32 	%r162, %r161, %r160, %p228;
	xor.b32 	%r163, %r162, 1;
	mov.f32 	%f540, %f359;
	setp.gt.ftz.f32 	%p233, %f540, %f534;
	selp.s32 	%r164, %r163, %r162, %p233;
	xor.b32 	%r165, %r164, 1;
	mov.f32 	%f541, %f361;
	setp.gt.ftz.f32 	%p234, %f541, %f534;
	selp.s32 	%r166, %r165, %r164, %p234;
	xor.b32 	%r167, %r166, 1;
	mov.f32 	%f542, %f363;
	setp.gt.ftz.f32 	%p235, %f542, %f534;
	selp.s32 	%r168, %r167, %r166, %p235;
	xor.b32 	%r169, %r168, 1;
	mov.f32 	%f543, %f365;
	setp.gt.ftz.f32 	%p236, %f543, %f534;
	selp.s32 	%r170, %r169, %r168, %p236;
	xor.b32 	%r171, %r170, 1;
	mov.f32 	%f544, %f367;
	setp.gt.ftz.f32 	%p237, %f544, %f534;
	selp.s32 	%r172, %r171, %r170, %p237;
	xor.b32 	%r173, %r172, 1;
	mov.f32 	%f545, %f369;
	setp.gt.ftz.f32 	%p238, %f545, %f534;
	selp.s32 	%r174, %r173, %r172, %p238;
	xor.b32 	%r175, %r174, 1;
	mov.f32 	%f546, %f371;
	setp.gt.ftz.f32 	%p239, %f546, %f534;
	selp.s32 	%r176, %r175, %r174, %p239;
	xor.b32 	%r177, %r176, 1;
	mov.f32 	%f547, %f373;
	setp.gt.ftz.f32 	%p240, %f547, %f534;
	selp.s32 	%r178, %r177, %r176, %p240;
	xor.b32 	%r179, %r178, 1;
	mov.f32 	%f548, %f375;
	setp.gt.ftz.f32 	%p241, %f548, %f534;
	selp.s32 	%r180, %r179, %r178, %p241;
	xor.b32 	%r181, %r180, 1;
	mov.f32 	%f549, %f377;
	setp.gt.ftz.f32 	%p242, %f549, %f534;
	selp.s32 	%r149, %r181, %r180, %p242;
	xor.b32 	%r182, %r149, 1;
	mov.f32 	%f550, %f379;
	setp.gt.ftz.f32 	%p243, %f550, %f534;
	selp.s32 	%r183, %r182, %r149, %p243;
	mov.u32 	%r184, 0;
	setp.ne.s32 	%p244, %r183, %r184;
	@%p244 bra 	$Lt_19_155394;
 //<loop> Part of loop body line 72, head labeled $Lt_19_155138
	.loc	18	76	0
	mov.f32 	%f551, 0f00000000;   	// 0
	cvt.s64.s32 	%rd26, %r152;
	mul.wide.s32 	%rd27, %r152, 16;
	add.u64 	%rd28, %rd25, %rd27;
	st.global.f32 	[%rd28+12], %f551;
$Lt_19_155394:
 //<loop> Part of loop body line 72, head labeled $Lt_19_155138
	add.s32 	%r152, %r152, 1;
	mov.u32 	%r185, 8;
	setp.ne.s32 	%p245, %r152, %r185;
	@%p245 bra 	$Lt_19_155138;
$Lt_19_153090:
 //<loop> Part of loop body line 52, head labeled $Lt_19_144642
	.loc	18	80	0
	add.u64 	%rd25, %rd25, 128;
	mov.s32 	%r148, %r151;
	mov.u32 	%r186, 31;
	setp.le.s32 	%p246, %r151, %r186;
	@%p246 bra 	$Lt_19_144642;
	bra.uni 	$LBB318_cuda_kernel_bitmask;
$Lt_19_135682:
	mov.u32 	%r187, 8;
	setp.ne.s32 	%p247, %r20, %r187;
	@%p247 bra 	$Lt_19_156674;
	.loc	18	109	0
	sub.s32 	%r24, %r12, 32;
	setp.gt.s32 	%p4, %r11, %r24;
	selp.s32 	%r25, %r24, %r11, %p4;
	ld.param.u64 	%rd29, [__cudaparm_cuda_kernel_bitmask_dstFrame];
	ld.param.s32 	%r188, [__cudaparm_cuda_kernel_bitmask_dstPitch];
	mul.lo.s32 	%r189, %r188, %r10;
	add.s32 	%r190, %r25, %r189;
	cvt.s64.s32 	%rd30, %r190;
	mul.wide.s32 	%rd31, %r190, 16;
	add.u64 	%rd32, %rd29, %rd31;
	.loc	18	51	0
	mov.f32 	%f552, 0fbf800000;   	// -1
	mov.f32 	%f553, %f552;
	mov.f32 	%f554, 0fbf800000;   	// -1
	mov.f32 	%f555, %f554;
	mov.f32 	%f556, 0fbf800000;   	// -1
	mov.f32 	%f557, %f556;
	mov.f32 	%f558, 0fbf800000;   	// -1
	mov.f32 	%f559, %f558;
	mov.f32 	%f560, 0fbf800000;   	// -1
	mov.f32 	%f561, %f560;
	mov.f32 	%f562, 0fbf800000;   	// -1
	mov.f32 	%f563, %f562;
	mov.f32 	%f564, 0fbf800000;   	// -1
	mov.f32 	%f565, %f564;
	mov.f32 	%f566, 0fbf800000;   	// -1
	mov.f32 	%f567, %f566;
	.loc	18	24	0
	cvt.rn.f32.s32 	%f33, %r10;
	ld.global.f32 	%f34, [polyPoints+4];
	ld.global.f32 	%f35, [polyPoints+16];
	setp.ge.ftz.f32 	%p248, %f33, %f34;
	setp.lt.ftz.f32 	%p249, %f33, %f35;
	xor.pred 	%p250, %p248, %p249;
	@%p250 bra 	$Lt_19_156930;
	.loc	18	41	0
	cvt.rn.f32.s32 	%f568, %r25;
	ld.global.f32 	%f569, [polyPoints+0];
	sub.ftz.f32 	%f570, %f568, %f569;
	ld.global.f32 	%f571, [polyPoints+8];
	sub.ftz.f32 	%f572, %f33, %f34;
	mul.ftz.f32 	%f573, %f571, %f572;
	sub.ftz.f32 	%f574, %f573, %f570;
	mov.f32 	%f553, %f574;
$Lt_19_156930:
	ld.global.f32 	%f43, [polyPoints+28];
	setp.ge.ftz.f32 	%p251, %f33, %f35;
	setp.lt.ftz.f32 	%p252, %f33, %f43;
	xor.pred 	%p253, %p251, %p252;
	@%p253 bra 	$Lt_19_157442;
	cvt.rn.f32.s32 	%f575, %r25;
	ld.global.f32 	%f576, [polyPoints+12];
	sub.ftz.f32 	%f577, %f575, %f576;
	ld.global.f32 	%f578, [polyPoints+20];
	sub.ftz.f32 	%f579, %f33, %f35;
	mul.ftz.f32 	%f580, %f578, %f579;
	sub.ftz.f32 	%f581, %f580, %f577;
	mov.f32 	%f555, %f581;
$Lt_19_157442:
	ld.global.f32 	%f51, [polyPoints+40];
	setp.ge.ftz.f32 	%p254, %f33, %f43;
	setp.lt.ftz.f32 	%p255, %f33, %f51;
	xor.pred 	%p256, %p254, %p255;
	@%p256 bra 	$Lt_19_157954;
	cvt.rn.f32.s32 	%f582, %r25;
	ld.global.f32 	%f583, [polyPoints+24];
	sub.ftz.f32 	%f584, %f582, %f583;
	ld.global.f32 	%f585, [polyPoints+32];
	sub.ftz.f32 	%f586, %f33, %f43;
	mul.ftz.f32 	%f587, %f585, %f586;
	sub.ftz.f32 	%f588, %f587, %f584;
	mov.f32 	%f557, %f588;
$Lt_19_157954:
	ld.global.f32 	%f589, [polyPoints+52];
	setp.ge.ftz.f32 	%p257, %f33, %f51;
	setp.lt.ftz.f32 	%p258, %f33, %f589;
	xor.pred 	%p259, %p257, %p258;
	@%p259 bra 	$Lt_19_158466;
	cvt.rn.f32.s32 	%f590, %r25;
	ld.global.f32 	%f591, [polyPoints+36];
	sub.ftz.f32 	%f592, %f590, %f591;
	ld.global.f32 	%f593, [polyPoints+44];
	sub.ftz.f32 	%f594, %f33, %f51;
	mul.ftz.f32 	%f595, %f593, %f594;
	sub.ftz.f32 	%f596, %f595, %f592;
	mov.f32 	%f559, %f596;
$Lt_19_158466:
	ld.global.f32 	%f597, [polyPoints+64];
	setp.ge.ftz.f32 	%p260, %f33, %f589;
	setp.lt.ftz.f32 	%p261, %f33, %f597;
	xor.pred 	%p262, %p260, %p261;
	@%p262 bra 	$Lt_19_158978;
	cvt.rn.f32.s32 	%f598, %r25;
	ld.global.f32 	%f599, [polyPoints+48];
	sub.ftz.f32 	%f600, %f598, %f599;
	ld.global.f32 	%f601, [polyPoints+56];
	sub.ftz.f32 	%f602, %f33, %f589;
	mul.ftz.f32 	%f603, %f601, %f602;
	sub.ftz.f32 	%f604, %f603, %f600;
	mov.f32 	%f561, %f604;
$Lt_19_158978:
	ld.global.f32 	%f605, [polyPoints+76];
	setp.ge.ftz.f32 	%p263, %f33, %f597;
	setp.lt.ftz.f32 	%p264, %f33, %f605;
	xor.pred 	%p265, %p263, %p264;
	@%p265 bra 	$Lt_19_159490;
	cvt.rn.f32.s32 	%f606, %r25;
	ld.global.f32 	%f607, [polyPoints+60];
	sub.ftz.f32 	%f608, %f606, %f607;
	ld.global.f32 	%f609, [polyPoints+68];
	sub.ftz.f32 	%f610, %f33, %f597;
	mul.ftz.f32 	%f611, %f609, %f610;
	sub.ftz.f32 	%f612, %f611, %f608;
	mov.f32 	%f563, %f612;
$Lt_19_159490:
	ld.global.f32 	%f613, [polyPoints+88];
	setp.ge.ftz.f32 	%p266, %f33, %f605;
	setp.lt.ftz.f32 	%p267, %f33, %f613;
	xor.pred 	%p268, %p266, %p267;
	@%p268 bra 	$Lt_19_160002;
	cvt.rn.f32.s32 	%f614, %r25;
	ld.global.f32 	%f615, [polyPoints+72];
	sub.ftz.f32 	%f616, %f614, %f615;
	ld.global.f32 	%f617, [polyPoints+80];
	sub.ftz.f32 	%f618, %f33, %f605;
	mul.ftz.f32 	%f619, %f617, %f618;
	sub.ftz.f32 	%f620, %f619, %f616;
	mov.f32 	%f565, %f620;
$Lt_19_160002:
	setp.ge.ftz.f32 	%p269, %f33, %f613;
	ld.global.f32 	%f621, [polyPoints+100];
	setp.gt.ftz.f32 	%p270, %f621, %f33;
	xor.pred 	%p271, %p269, %p270;
	@%p271 bra 	$Lt_19_160514;
	cvt.rn.f32.s32 	%f622, %r25;
	ld.global.f32 	%f623, [polyPoints+84];
	sub.ftz.f32 	%f624, %f622, %f623;
	ld.global.f32 	%f625, [polyPoints+92];
	sub.ftz.f32 	%f626, %f33, %f613;
	mul.ftz.f32 	%f627, %f625, %f626;
	sub.ftz.f32 	%f628, %f627, %f624;
	mov.f32 	%f567, %f628;
$Lt_19_160514:
	.loc	18	52	0
	mov.s32 	%r191, 0;
$Lt_19_161538:
 //<loop> Loop body line 52, nesting depth: 1, iterations: 4
	cvt.rn.f32.s32 	%f629, %r191;
	mov.f32 	%f630, %f553;
	setp.lt.ftz.f32 	%p272, %f629, %f630;
	@!%p272 bra 	$Lt_19_162050;
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	.loc	18	62	0
	mov.f32 	%f631, 0f47c35000;   	// 100000
	min.ftz.f32 	%f632, %f630, %f631;
	mov.s32 	%r192, 1;
	bra.uni 	$Lt_19_161794;
$Lt_19_162050:
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	mov.s32 	%r192, 0;
	mov.f32 	%f632, 0f47c35000;   	// 100000
$Lt_19_161794:
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	mov.f32 	%f633, %f555;
	setp.lt.ftz.f32 	%p273, %f629, %f633;
	@!%p273 bra 	$Lt_19_162306;
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	.loc	18	61	0
	xor.b32 	%r192, %r192, 1;
	.loc	18	62	0
	min.ftz.f32 	%f632, %f632, %f633;
$Lt_19_162306:
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	mov.f32 	%f634, %f557;
	setp.lt.ftz.f32 	%p274, %f629, %f634;
	@!%p274 bra 	$Lt_19_162818;
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	.loc	18	61	0
	xor.b32 	%r192, %r192, 1;
	.loc	18	62	0
	min.ftz.f32 	%f632, %f632, %f634;
$Lt_19_162818:
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	mov.f32 	%f635, %f559;
	setp.lt.ftz.f32 	%p275, %f629, %f635;
	@!%p275 bra 	$Lt_19_163330;
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	.loc	18	61	0
	xor.b32 	%r192, %r192, 1;
	.loc	18	62	0
	min.ftz.f32 	%f632, %f632, %f635;
$Lt_19_163330:
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	mov.f32 	%f636, %f561;
	setp.lt.ftz.f32 	%p276, %f629, %f636;
	@!%p276 bra 	$Lt_19_163842;
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	.loc	18	61	0
	xor.b32 	%r192, %r192, 1;
	.loc	18	62	0
	min.ftz.f32 	%f632, %f632, %f636;
$Lt_19_163842:
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	mov.f32 	%f637, %f563;
	setp.lt.ftz.f32 	%p277, %f629, %f637;
	@!%p277 bra 	$Lt_19_164354;
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	.loc	18	61	0
	xor.b32 	%r192, %r192, 1;
	.loc	18	62	0
	min.ftz.f32 	%f632, %f632, %f637;
$Lt_19_164354:
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	mov.f32 	%f638, %f565;
	setp.lt.ftz.f32 	%p278, %f629, %f638;
	@!%p278 bra 	$Lt_19_164866;
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	.loc	18	61	0
	xor.b32 	%r192, %r192, 1;
	.loc	18	62	0
	min.ftz.f32 	%f632, %f632, %f638;
$Lt_19_164866:
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	mov.f32 	%f639, %f567;
	setp.lt.ftz.f32 	%p279, %f629, %f639;
	@!%p279 bra 	$Lt_19_165378;
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	.loc	18	61	0
	xor.b32 	%r192, %r192, 1;
	.loc	18	62	0
	min.ftz.f32 	%f632, %f632, %f639;
$Lt_19_165378:
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	mov.s32 	%r193, 0;
	setp.eq.s32 	%p280, %r192, %r193;
	add.s32 	%r194, %r191, 8;
	cvt.rn.f32.s32 	%f640, %r194;
	setp.le.ftz.f32 	%p281, %f640, %f632;
	@!%p281 bra 	$Lt_19_166146;
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	@!%p280 bra 	$Lt_19_165890;
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	.loc	18	68	0
	mov.f32 	%f641, 0f00000000;   	// 0
	st.global.f32 	[%rd32+12], %f641;
	mov.f32 	%f642, 0f00000000;   	// 0
	st.global.f32 	[%rd32+28], %f642;
	mov.f32 	%f643, 0f00000000;   	// 0
	st.global.f32 	[%rd32+44], %f643;
	mov.f32 	%f644, 0f00000000;   	// 0
	st.global.f32 	[%rd32+60], %f644;
	mov.f32 	%f645, 0f00000000;   	// 0
	st.global.f32 	[%rd32+76], %f645;
	mov.f32 	%f646, 0f00000000;   	// 0
	st.global.f32 	[%rd32+92], %f646;
	mov.f32 	%f647, 0f00000000;   	// 0
	st.global.f32 	[%rd32+108], %f647;
	mov.f32 	%f648, 0f00000000;   	// 0
	st.global.f32 	[%rd32+124], %f648;
	bra.uni 	$Lt_19_165890;
$Lt_19_166146:
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	@!%p280 bra 	$Lt_19_166914;
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	.loc	18	72	0
	mov.f32 	%f649, 0f00000000;   	// 0
	st.global.f32 	[%rd32+12], %f649;
$Lt_19_166914:
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	mov.s32 	%r195, 1;
$Lt_19_167938:
 //<loop> Loop body line 72, nesting depth: 2, iterations: 7
	.loc	18	75	0
	add.s32 	%r196, %r195, %r191;
	cvt.rn.f32.s32 	%f650, %r196;
	mov.f32 	%f651, %f561;
	setp.gt.ftz.f32 	%p282, %f651, %f650;
	mov.f32 	%f652, %f559;
	setp.gt.ftz.f32 	%p283, %f652, %f650;
	mov.f32 	%f653, %f557;
	setp.gt.ftz.f32 	%p284, %f653, %f650;
	mov.f32 	%f654, %f555;
	setp.gt.ftz.f32 	%p285, %f654, %f650;
	mov.f32 	%f655, %f553;
	setp.gt.ftz.f32 	%p286, %f655, %f650;
	selp.s32 	%r197, 1, 0, %p286;
	xor.b32 	%r198, %r197, 1;
	selp.s32 	%r199, %r198, %r197, %p285;
	xor.b32 	%r200, %r199, 1;
	selp.s32 	%r201, %r200, %r199, %p284;
	xor.b32 	%r202, %r201, 1;
	selp.s32 	%r203, %r202, %r201, %p283;
	xor.b32 	%r204, %r203, 1;
	selp.s32 	%r205, %r204, %r203, %p282;
	xor.b32 	%r206, %r205, 1;
	mov.f32 	%f656, %f563;
	setp.gt.ftz.f32 	%p287, %f656, %f650;
	selp.s32 	%r207, %r206, %r205, %p287;
	xor.b32 	%r208, %r207, 1;
	mov.f32 	%f657, %f565;
	setp.gt.ftz.f32 	%p288, %f657, %f650;
	selp.s32 	%r192, %r208, %r207, %p288;
	xor.b32 	%r209, %r192, 1;
	mov.f32 	%f658, %f567;
	setp.gt.ftz.f32 	%p289, %f658, %f650;
	selp.s32 	%r210, %r209, %r192, %p289;
	mov.u32 	%r211, 0;
	setp.ne.s32 	%p290, %r210, %r211;
	@%p290 bra 	$Lt_19_168194;
 //<loop> Part of loop body line 72, head labeled $Lt_19_167938
	.loc	18	76	0
	mov.f32 	%f659, 0f00000000;   	// 0
	cvt.s64.s32 	%rd33, %r195;
	mul.wide.s32 	%rd34, %r195, 16;
	add.u64 	%rd35, %rd32, %rd34;
	st.global.f32 	[%rd35+12], %f659;
$Lt_19_168194:
 //<loop> Part of loop body line 72, head labeled $Lt_19_167938
	add.s32 	%r195, %r195, 1;
	mov.u32 	%r212, 8;
	setp.ne.s32 	%p291, %r195, %r212;
	@%p291 bra 	$Lt_19_167938;
$Lt_19_165890:
 //<loop> Part of loop body line 52, head labeled $Lt_19_161538
	.loc	18	80	0
	add.u64 	%rd32, %rd32, 128;
	mov.s32 	%r191, %r194;
	mov.u32 	%r213, 31;
	setp.le.s32 	%p292, %r194, %r213;
	@%p292 bra 	$Lt_19_161538;
	bra.uni 	$LBB318_cuda_kernel_bitmask;
$Lt_19_156674:
	mov.u32 	%r214, 4;
	setp.ne.s32 	%p293, %r20, %r214;
	@%p293 bra 	$LBB318_cuda_kernel_bitmask;
	.loc	18	110	0
	sub.s32 	%r24, %r12, 32;
	setp.gt.s32 	%p4, %r11, %r24;
	selp.s32 	%r25, %r24, %r11, %p4;
	ld.param.u64 	%rd36, [__cudaparm_cuda_kernel_bitmask_dstFrame];
	ld.param.s32 	%r215, [__cudaparm_cuda_kernel_bitmask_dstPitch];
	mul.lo.s32 	%r216, %r215, %r10;
	add.s32 	%r217, %r25, %r216;
	cvt.s64.s32 	%rd37, %r217;
	mul.wide.s32 	%rd38, %r217, 16;
	add.u64 	%rd39, %rd36, %rd38;
	.loc	18	51	0
	mov.f32 	%f660, 0fbf800000;   	// -1
	mov.f32 	%f661, %f660;
	mov.f32 	%f662, 0fbf800000;   	// -1
	mov.f32 	%f663, %f662;
	mov.f32 	%f664, 0fbf800000;   	// -1
	mov.f32 	%f665, %f664;
	mov.f32 	%f666, 0fbf800000;   	// -1
	mov.f32 	%f667, %f666;
	.loc	18	24	0
	cvt.rn.f32.s32 	%f33, %r10;
	ld.global.f32 	%f34, [polyPoints+4];
	ld.global.f32 	%f35, [polyPoints+16];
	setp.ge.ftz.f32 	%p294, %f33, %f34;
	setp.lt.ftz.f32 	%p295, %f33, %f35;
	xor.pred 	%p296, %p294, %p295;
	@%p296 bra 	$Lt_19_169730;
	.loc	18	41	0
	cvt.rn.f32.s32 	%f668, %r25;
	ld.global.f32 	%f669, [polyPoints+0];
	sub.ftz.f32 	%f670, %f668, %f669;
	ld.global.f32 	%f671, [polyPoints+8];
	sub.ftz.f32 	%f672, %f33, %f34;
	mul.ftz.f32 	%f673, %f671, %f672;
	sub.ftz.f32 	%f674, %f673, %f670;
	mov.f32 	%f661, %f674;
$Lt_19_169730:
	ld.global.f32 	%f43, [polyPoints+28];
	setp.ge.ftz.f32 	%p297, %f33, %f35;
	setp.lt.ftz.f32 	%p298, %f33, %f43;
	xor.pred 	%p299, %p297, %p298;
	@%p299 bra 	$Lt_19_170242;
	cvt.rn.f32.s32 	%f675, %r25;
	ld.global.f32 	%f676, [polyPoints+12];
	sub.ftz.f32 	%f677, %f675, %f676;
	ld.global.f32 	%f678, [polyPoints+20];
	sub.ftz.f32 	%f679, %f33, %f35;
	mul.ftz.f32 	%f680, %f678, %f679;
	sub.ftz.f32 	%f681, %f680, %f677;
	mov.f32 	%f663, %f681;
$Lt_19_170242:
	ld.global.f32 	%f51, [polyPoints+40];
	setp.ge.ftz.f32 	%p300, %f33, %f43;
	setp.lt.ftz.f32 	%p301, %f33, %f51;
	xor.pred 	%p302, %p300, %p301;
	@%p302 bra 	$Lt_19_170754;
	cvt.rn.f32.s32 	%f682, %r25;
	ld.global.f32 	%f683, [polyPoints+24];
	sub.ftz.f32 	%f684, %f682, %f683;
	ld.global.f32 	%f685, [polyPoints+32];
	sub.ftz.f32 	%f686, %f33, %f43;
	mul.ftz.f32 	%f687, %f685, %f686;
	sub.ftz.f32 	%f688, %f687, %f684;
	mov.f32 	%f665, %f688;
$Lt_19_170754:
	setp.ge.ftz.f32 	%p303, %f33, %f51;
	ld.global.f32 	%f689, [polyPoints+52];
	setp.gt.ftz.f32 	%p304, %f689, %f33;
	xor.pred 	%p305, %p303, %p304;
	@%p305 bra 	$Lt_19_171266;
	cvt.rn.f32.s32 	%f690, %r25;
	ld.global.f32 	%f691, [polyPoints+36];
	sub.ftz.f32 	%f692, %f690, %f691;
	ld.global.f32 	%f693, [polyPoints+44];
	sub.ftz.f32 	%f694, %f33, %f51;
	mul.ftz.f32 	%f695, %f693, %f694;
	sub.ftz.f32 	%f696, %f695, %f692;
	mov.f32 	%f667, %f696;
$Lt_19_171266:
	.loc	18	52	0
	mov.s32 	%r218, 0;
$Lt_19_172290:
 //<loop> Loop body line 52, nesting depth: 1, iterations: 4
	cvt.rn.f32.s32 	%f697, %r218;
	mov.f32 	%f698, %f661;
	setp.lt.ftz.f32 	%p306, %f697, %f698;
	@!%p306 bra 	$Lt_19_172802;
 //<loop> Part of loop body line 52, head labeled $Lt_19_172290
	.loc	18	62	0
	mov.f32 	%f699, 0f47c35000;   	// 100000
	min.ftz.f32 	%f700, %f698, %f699;
	mov.s32 	%r219, 1;
	bra.uni 	$Lt_19_172546;
$Lt_19_172802:
 //<loop> Part of loop body line 52, head labeled $Lt_19_172290
	mov.s32 	%r219, 0;
	mov.f32 	%f700, 0f47c35000;   	// 100000
$Lt_19_172546:
 //<loop> Part of loop body line 52, head labeled $Lt_19_172290
	mov.f32 	%f701, %f663;
	setp.lt.ftz.f32 	%p307, %f697, %f701;
	@!%p307 bra 	$Lt_19_173058;
 //<loop> Part of loop body line 52, head labeled $Lt_19_172290
	.loc	18	61	0
	xor.b32 	%r219, %r219, 1;
	.loc	18	62	0
	min.ftz.f32 	%f700, %f700, %f701;
$Lt_19_173058:
 //<loop> Part of loop body line 52, head labeled $Lt_19_172290
	mov.f32 	%f702, %f665;
	setp.lt.ftz.f32 	%p308, %f697, %f702;
	@!%p308 bra 	$Lt_19_173570;
 //<loop> Part of loop body line 52, head labeled $Lt_19_172290
	.loc	18	61	0
	xor.b32 	%r219, %r219, 1;
	.loc	18	62	0
	min.ftz.f32 	%f700, %f700, %f702;
$Lt_19_173570:
 //<loop> Part of loop body line 52, head labeled $Lt_19_172290
	mov.f32 	%f703, %f667;
	setp.lt.ftz.f32 	%p309, %f697, %f703;
	@!%p309 bra 	$Lt_19_174082;
 //<loop> Part of loop body line 52, head labeled $Lt_19_172290
	.loc	18	61	0
	xor.b32 	%r219, %r219, 1;
	.loc	18	62	0
	min.ftz.f32 	%f700, %f700, %f703;
$Lt_19_174082:
 //<loop> Part of loop body line 52, head labeled $Lt_19_172290
	mov.s32 	%r220, 0;
	setp.eq.s32 	%p310, %r219, %r220;
	add.s32 	%r221, %r218, 8;
	cvt.rn.f32.s32 	%f704, %r221;
	setp.le.ftz.f32 	%p311, %f704, %f700;
	@!%p311 bra 	$Lt_19_174850;
 //<loop> Part of loop body line 52, head labeled $Lt_19_172290
	@!%p310 bra 	$Lt_19_174594;
 //<loop> Part of loop body line 52, head labeled $Lt_19_172290
	.loc	18	68	0
	mov.f32 	%f705, 0f00000000;   	// 0
	st.global.f32 	[%rd39+12], %f705;
	mov.f32 	%f706, 0f00000000;   	// 0
	st.global.f32 	[%rd39+28], %f706;
	mov.f32 	%f707, 0f00000000;   	// 0
	st.global.f32 	[%rd39+44], %f707;
	mov.f32 	%f708, 0f00000000;   	// 0
	st.global.f32 	[%rd39+60], %f708;
	mov.f32 	%f709, 0f00000000;   	// 0
	st.global.f32 	[%rd39+76], %f709;
	mov.f32 	%f710, 0f00000000;   	// 0
	st.global.f32 	[%rd39+92], %f710;
	mov.f32 	%f711, 0f00000000;   	// 0
	st.global.f32 	[%rd39+108], %f711;
	mov.f32 	%f712, 0f00000000;   	// 0
	st.global.f32 	[%rd39+124], %f712;
	bra.uni 	$Lt_19_174594;
$Lt_19_174850:
 //<loop> Part of loop body line 52, head labeled $Lt_19_172290
	@!%p310 bra 	$Lt_19_175618;
 //<loop> Part of loop body line 52, head labeled $Lt_19_172290
	.loc	18	72	0
	mov.f32 	%f713, 0f00000000;   	// 0
	st.global.f32 	[%rd39+12], %f713;
$Lt_19_175618:
 //<loop> Part of loop body line 52, head labeled $Lt_19_172290
	mov.s32 	%r222, 1;
$Lt_19_176642:
 //<loop> Loop body line 72, nesting depth: 2, iterations: 7
	add.s32 	%r223, %r222, %r218;
	cvt.rn.f32.s32 	%f714, %r223;
	mov.f32 	%f715, %f665;
	setp.gt.ftz.f32 	%p312, %f715, %f714;
	mov.f32 	%f716, %f663;
	setp.gt.ftz.f32 	%p313, %f716, %f714;
	mov.f32 	%f717, %f661;
	setp.gt.ftz.f32 	%p314, %f717, %f714;
	selp.s32 	%r224, 1, 0, %p314;
	xor.b32 	%r225, %r224, 1;
	selp.s32 	%r226, %r225, %r224, %p313;
	xor.b32 	%r227, %r226, 1;
	selp.s32 	%r228, %r227, %r226, %p312;
	xor.b32 	%r229, %r228, 1;
	mov.f32 	%f718, %f667;
	setp.gt.ftz.f32 	%p315, %f718, %f714;
	selp.s32 	%r230, %r229, %r228, %p315;
	mov.u32 	%r231, 0;
	setp.ne.s32 	%p316, %r230, %r231;
	@%p316 bra 	$Lt_19_176898;
 //<loop> Part of loop body line 72, head labeled $Lt_19_176642
	.loc	18	76	0
	mov.f32 	%f719, 0f00000000;   	// 0
	cvt.s64.s32 	%rd40, %r222;
	mul.wide.s32 	%rd41, %r222, 16;
	add.u64 	%rd42, %rd39, %rd41;
	st.global.f32 	[%rd42+12], %f719;
$Lt_19_176898:
 //<loop> Part of loop body line 72, head labeled $Lt_19_176642
	add.s32 	%r222, %r222, 1;
	mov.u32 	%r232, 8;
	setp.ne.s32 	%p317, %r222, %r232;
	@%p317 bra 	$Lt_19_176642;
$Lt_19_174594:
 //<loop> Part of loop body line 52, head labeled $Lt_19_172290
	.loc	18	80	0
	add.u64 	%rd39, %rd39, 128;
	mov.s32 	%r218, %r221;
	mov.u32 	%r233, 31;
	setp.le.s32 	%p318, %r221, %r233;
	@%p318 bra 	$Lt_19_172290;
$LBB318_cuda_kernel_bitmask:
	.loc	18	113	0
	exit;
$LDWend_cuda_kernel_bitmask:
	} // cuda_kernel_bitmask

