	.version 2.2
	.target sm_20
	// compiled with ../../../External/3rdParty/NVIDIA/CUDA/win/bin/../open64/lib//be.exe
	// nvopencc 3.2 built on 2010-11-04

	.visible .func (.param .s32 __cudaretf__Z15IntegerMultiplyii) _Z15IntegerMultiplyii (.param .s32 __cudaparmf1__Z15IntegerMultiplyii, .param .s32 __cudaparmf2__Z15IntegerMultiplyii)

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelXv) _Z17Standard2DKernelXv ()

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelYv) _Z17Standard2DKernelYv ()

	.visible .func (.param .align 16 .b8 __cudaretf__Z13Half4ToFloat47ushort4[16]) _Z13Half4ToFloat47ushort4 (.param .align 8 .b8 __cudaparmf1__Z13Half4ToFloat47ushort4[8])

	.visible .func (.param .align 8 .b8 __cudaretf__Z13Float4ToHalf46float4[8]) _Z13Float4ToHalf46float4 (.param .align 16 .b8 __cudaparmf1__Z13Float4ToHalf46float4[16])

	.visible .func (.param .u32 __cudaretf__Z4Mix3RjS_S_) _Z4Mix3RjS_S_ (.param .u64 __cudaparmf1__Z4Mix3RjS_S_, .param .u64 __cudaparmf2__Z4Mix3RjS_S_, .param .u64 __cudaparmf3__Z4Mix3RjS_S_)

	.visible .func (.param .s32 __cudaretf__Z4Randj) _Z4Randj (.param .u32 __cudaparmf1__Z4Randj)

	.visible .func (.param .s32 __cudaretf__Z6Rand2Djjj) _Z6Rand2Djjj (.param .u32 __cudaparmf1__Z6Rand2Djjj, .param .u32 __cudaparmf2__Z6Rand2Djjj, .param .u32 __cudaparmf3__Z6Rand2Djjj)

	.visible .func (.param .s32 __cudaretf__Z6Rand2Dj) _Z6Rand2Dj (.param .u32 __cudaparmf1__Z6Rand2Dj)

	.visible .func (.param .s32 __cudaretf__Z19MaxUnsignedBitValuei) _Z19MaxUnsignedBitValuei (.param .s32 __cudaparmf1__Z19MaxUnsignedBitValuei)

	.visible .func (.param .f32 __cudaretf__Z13MaxDepthValue14IR_PixelFormat) _Z13MaxDepthValue14IR_PixelFormat (.param .s32 __cudaparmf1__Z13MaxDepthValue14IR_PixelFormat)

	.visible .func (.param .f32 __cudaretf__Z15DepthScaleValue14IR_PixelFormatS_) _Z15DepthScaleValue14IR_PixelFormatS_ (.param .s32 __cudaparmf1__Z15DepthScaleValue14IR_PixelFormatS_, .param .s32 __cudaparmf2__Z15DepthScaleValue14IR_PixelFormatS_)

	.visible .func (.param .s32 __cudaretf__Z7IsYCbCr14IR_PixelFormat) _Z7IsYCbCr14IR_PixelFormat (.param .s32 __cudaparmf1__Z7IsYCbCr14IR_PixelFormat)

	.visible .func (.param .f32 __cudaretf__Z11YCbCrOffseti14IR_PixelFormat) _Z11YCbCrOffseti14IR_PixelFormat (.param .s32 __cudaparmf1__Z11YCbCrOffseti14IR_PixelFormat, .param .s32 __cudaparmf2__Z11YCbCrOffseti14IR_PixelFormat)

	.visible .func (.param .align 16 .b8 __cudaretf__Z15AddAYCbCrOffset6float414IR_PixelFormat[16]) _Z15AddAYCbCrOffset6float414IR_PixelFormat (.param .align 16 .b8 __cudaparmf1__Z15AddAYCbCrOffset6float414IR_PixelFormat[16], .param .s32 __cudaparmf2__Z15AddAYCbCrOffset6float414IR_PixelFormat)

	.visible .func (.param .align 16 .b8 __cudaretf__Z20SubtractAYCbCrOffset6float414IR_PixelFormat[16]) _Z20SubtractAYCbCrOffset6float414IR_PixelFormat (.param .align 16 .b8 __cudaparmf1__Z20SubtractAYCbCrOffset6float414IR_PixelFormat[16], .param .s32 __cudaparmf2__Z20SubtractAYCbCrOffset6float414IR_PixelFormat)

	.visible .func (.param .u64 __cudaretf__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_) _Z23ColorSpaceConvertMatrix14IR_PixelFormatS_ (.param .s32 __cudaparmf1__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_, .param .s32 __cudaparmf2__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_)

	.visible .func (.param .s32 __cudaretf__Z13IsGammaLinear14IR_PixelFormat) _Z13IsGammaLinear14IR_PixelFormat (.param .s32 __cudaparmf1__Z13IsGammaLinear14IR_PixelFormat)

	.visible .func (.param .f32 __cudaretf__Z18ApplyGammaFunctionff) _Z18ApplyGammaFunctionff (.param .f32 __cudaparmf1__Z18ApplyGammaFunctionff, .param .f32 __cudaparmf2__Z18ApplyGammaFunctionff)

	.visible .func (.param .align 16 .b8 __cudaretf__Z16OpaqueComponents6float414IR_PixelFormat[16]) _Z16OpaqueComponents6float414IR_PixelFormat (.param .align 16 .b8 __cudaparmf1__Z16OpaqueComponents6float414IR_PixelFormat[16], .param .s32 __cudaparmf2__Z16OpaqueComponents6float414IR_PixelFormat)

	.visible .func (.param .align 16 .b8 __cudaretf__Z21PremultiplyComponents6float414IR_PixelFormat[16]) _Z21PremultiplyComponents6float414IR_PixelFormat (.param .align 16 .b8 __cudaparmf1__Z21PremultiplyComponents6float414IR_PixelFormat[16], .param .s32 __cudaparmf2__Z21PremultiplyComponents6float414IR_PixelFormat)

	.visible .func (.param .align 16 .b8 __cudaretf__Z23UnpremultiplyComponents6float414IR_PixelFormat[16]) _Z23UnpremultiplyComponents6float414IR_PixelFormat (.param .align 16 .b8 __cudaparmf1__Z23UnpremultiplyComponents6float414IR_PixelFormat[16], .param .s32 __cudaparmf2__Z23UnpremultiplyComponents6float414IR_PixelFormat)

	.visible .func (.param .align 16 .b8 __cudaretf__Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_[16]) _Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_ (.param .align 16 .b8 __cudaparmf1__Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_[16], .param .s32 __cudaparmf2__Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_, .param .s32 __cudaparmf3__Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_)

	.visible .func (.param .align 16 .b8 __cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_[16]) _Z18SwapComponentOrderI6float4ET_RKS1_ (.param .u64 __cudaparmf1__Z18SwapComponentOrderI6float4ET_RKS1_)

	.visible .func (.param .align 16 .b8 __cudaretf__Z15ClampComponentsI6float4ET_RKS1_ff[16]) _Z15ClampComponentsI6float4ET_RKS1_ff (.param .u64 __cudaparmf1__Z15ClampComponentsI6float4ET_RKS1_ff, .param .f32 __cudaparmf2__Z15ClampComponentsI6float4ET_RKS1_ff, .param .f32 __cudaparmf3__Z15ClampComponentsI6float4ET_RKS1_ff)

	.visible .func (.param .align 16 .b8 __cudaretf__ZplI6float4ET_RKS1_f[16]) _ZplI6float4ET_RKS1_f (.param .u64 __cudaparmf1__ZplI6float4ET_RKS1_f, .param .f32 __cudaparmf2__ZplI6float4ET_RKS1_f)

	.visible .func (.param .u64 __cudaretf__ZpLI6float4ERT_S2_f) _ZpLI6float4ERT_S2_f (.param .u64 __cudaparmf1__ZpLI6float4ERT_S2_f, .param .f32 __cudaparmf2__ZpLI6float4ERT_S2_f)

	.visible .func (.param .align 8 .b8 __cudaretf__Z6Read2DI7ushort4ET_PKS1_iii[8]) _Z6Read2DI7ushort4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI7ushort4ET_PKS1_iii)

	.visible .func (.param .align 16 .b8 __cudaretf__Z6Read2DI6float4ET_PKS1_iii[16]) _Z6Read2DI6float4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI6float4ET_PKS1_iii)

	.visible .func _Z7Write2DI7ushort4EvT_PS1_iii (.param .align 8 .b8 __cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii[8], .param .u64 __cudaparmf2__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI7ushort4EvT_PS1_iii)

	.visible .func _Z7Write2DI6float4EvT_PS1_iii (.param .align 16 .b8 __cudaparmf1__Z7Write2DI6float4EvT_PS1_iii[16], .param .u64 __cudaparmf2__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI6float4EvT_PS1_iii)

	.visible .func (.param .align 16 .b8 __cudaretf__Z18UnpremultiplyPixel8PixelRGB[16]) _Z18UnpremultiplyPixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z18UnpremultiplyPixel8PixelRGB[16])

	.visible .func (.param .f32 __cudaretf__Z13ToLinearColorf) _Z13ToLinearColorf (.param .f32 __cudaparmf1__Z13ToLinearColorf)

	.visible .func (.param .f32 __cudaretf__Z15FromLinearColorf) _Z15FromLinearColorf (.param .f32 __cudaparmf1__Z15FromLinearColorf)

	.visible .func (.param .align 16 .b8 __cudaretf__Z25PremultiplyLinearizePixel8PixelRGB[16]) _Z25PremultiplyLinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16]) _Z29UnpremultiplyUnlinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z20PremultiplyLinearize6float4[16]) _Z20PremultiplyLinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z20PremultiplyLinearize6float4[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z24UnpremultiplyUnlinearize6float4[16]) _Z24UnpremultiplyUnlinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z24UnpremultiplyUnlinearize6float4[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_[16]) _Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_ (.param .align 4 .b8 __cudaparmf1__Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_[4], .param .s32 __cudaparmf2__Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_, .param .s32 __cudaparmf3__Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_)

	.visible .func (.param .align 16 .b8 __cudaretf__Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_[16]) _Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_ (.param .align 8 .b8 __cudaparmf1__Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_[8], .param .s32 __cudaparmf2__Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_, .param .s32 __cudaparmf3__Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_)

	.visible .func (.param .align 4 .b8 __cudaretf__Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_[4]) _Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_ (.param .align 16 .b8 __cudaparmf1__Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_[16], .param .s32 __cudaparmf2__Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_, .param .s32 __cudaparmf3__Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_)

	.visible .func (.param .align 8 .b8 __cudaretf__Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_[8]) _Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_ (.param .align 16 .b8 __cudaparmf1__Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_[16], .param .s32 __cudaparmf2__Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_, .param .s32 __cudaparmf3__Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_)

	.visible .func (.param .align 4 .b8 __cudaretf__Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_[4]) _Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_ (.param .align 4 .b8 __cudaparmf1__Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_[4], .param .s32 __cudaparmf2__Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_, .param .s32 __cudaparmf3__Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_)

	.visible .func (.param .align 4 .b8 __cudaretf__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii[4]) _Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii (.param .u64 __cudaparmf1__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii, .param .s32 __cudaparmf2__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii, .param .u32 __cudaparmf3__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii, .param .s32 __cudaparmf4__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii, .param .s32 __cudaparmf5__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii)

	.visible .func (.param .align 4 .b8 __cudaretf__Z6Read2DI6uchar4ET_PKS1_iii[4]) _Z6Read2DI6uchar4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI6uchar4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI6uchar4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI6uchar4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI6uchar4ET_PKS1_iii)

	.visible .func (.param .align 8 .b8 __cudaretf__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii[8]) _Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii (.param .u64 __cudaparmf1__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii, .param .s32 __cudaparmf2__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii, .param .u32 __cudaparmf3__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii, .param .s32 __cudaparmf4__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii, .param .s32 __cudaparmf5__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii)

	.visible .func _Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii (.param .align 4 .b8 __cudaparmf1__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii[4], .param .u64 __cudaparmf2__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii, .param .s32 __cudaparmf3__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii, .param .u32 __cudaparmf4__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii, .param .s32 __cudaparmf5__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii, .param .s32 __cudaparmf6__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii)

	.visible .func _Z7Write2DI6uchar4EvT_PS1_iii (.param .align 4 .b8 __cudaparmf1__Z7Write2DI6uchar4EvT_PS1_iii[4], .param .u64 __cudaparmf2__Z7Write2DI6uchar4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI6uchar4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI6uchar4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI6uchar4EvT_PS1_iii)

	.visible .func _Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii (.param .align 8 .b8 __cudaparmf1__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii[8], .param .u64 __cudaparmf2__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii, .param .s32 __cudaparmf3__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii, .param .u32 __cudaparmf4__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii, .param .s32 __cudaparmf5__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii, .param .s32 __cudaparmf6__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii)

	//-----------------------------------------------------------
	// Compiling C:/Users/dvaeng/AppData/Local/Temp/tmpxft_00002f40_00000000-11_PixelFormatConvert_444.cpp3.i (C:/Users/dvaeng/AppData/Local/Temp/ccBI#.a14604)
	//-----------------------------------------------------------

	//-----------------------------------------------------------
	// Options:
	//-----------------------------------------------------------
	//  Target:ptx, ISA:sm_20, Endian:little, Pointer Size:64
	//  -O3	(Optimization level)
	//  -g0	(Debug level)
	//  -m2	(Report advisories)
	//-----------------------------------------------------------

	.file	1	"C:/Users/dvaeng/AppData/Local/Temp/tmpxft_00002f40_00000000-10_PixelFormatConvert_444.cudafe2.gpu"
	.file	2	"c:\Mulder64\shared\adobe\MediaCore\ImageRenderer\API\Inc\ImageRenderer.h"
	.file	3	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/PixelFormat.h"
	.file	4	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/PixelRGB.h"
	.file	5	"C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\include\crtdefs.h"
	.file	6	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\crt/device_runtime.h"
	.file	7	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\host_defines.h"
	.file	8	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\builtin_types.h"
	.file	9	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\device_types.h"
	.file	10	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\driver_types.h"
	.file	11	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\surface_types.h"
	.file	12	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\texture_types.h"
	.file	13	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\vector_types.h"
	.file	14	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\builtin_types.h"
	.file	15	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\host_defines.h"
	.file	16	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\device_launch_parameters.h"
	.file	17	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\crt\storage_class.h"
	.file	18	"C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\include\time.h"
	.file	19	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/Utils.h"
	.file	20	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\Inc\ImageProcessing/PixelFormatConvert_Common.h"
	.file	21	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/VectorUtils.h"
	.file	22	"c:/Mulder64/shared/adobe/MediaCore/GPUFoundation/Src/ImageProcessing/PixelFormatConvert_444.cu"
	.file	23	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\common_functions.h"
	.file	24	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_functions.h"
	.file	25	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_constants.h"
	.file	26	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\device_functions.h"
	.file	27	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_11_atomic_functions.h"
	.file	28	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_12_atomic_functions.h"
	.file	29	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_13_double_functions.h"
	.file	30	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_20_atomic_functions.h"
	.file	31	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_20_intrinsics.h"
	.file	32	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\surface_functions.h"
	.file	33	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\texture_fetch_functions.h"
	.file	34	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_functions_dbl_ptx3.h"
	.file	35	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/ColorSpaceConvert.h"


	.visible .func (.param .s32 __cudaretf__Z15IntegerMultiplyii) _Z15IntegerMultiplyii (.param .s32 __cudaparmf1__Z15IntegerMultiplyii, .param .s32 __cudaparmf2__Z15IntegerMultiplyii)
	{
	.reg .u32 %r<7>;
	.loc	19	60	0
$LDWbegin__Z15IntegerMultiplyii:
	ld.param.u32 	%r1, [__cudaparmf1__Z15IntegerMultiplyii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf2__Z15IntegerMultiplyii];
	mov.s32 	%r4, %r3;
	.loc	19	64	0
	mul.lo.s32 	%r5, %r2, %r4;
	st.param.s32 	[__cudaretf__Z15IntegerMultiplyii], %r5;
	ret;
$LDWend__Z15IntegerMultiplyii:
	} // _Z15IntegerMultiplyii

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelXv) _Z17Standard2DKernelXv ()
	{
	.reg .u32 %r<7>;
	.loc	19	73	0
$LDWbegin__Z17Standard2DKernelXv:
	.loc	19	74	0
	mov.u32 	%r1, %tid.x;
	cvt.s32.u32 	%r2, %ctaid.x;
	cvt.s32.u32 	%r3, %ntid.x;
	mul.lo.s32 	%r4, %r2, %r3;
	add.u32 	%r5, %r1, %r4;
	st.param.s32 	[__cudaretf__Z17Standard2DKernelXv], %r5;
	ret;
$LDWend__Z17Standard2DKernelXv:
	} // _Z17Standard2DKernelXv

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelYv) _Z17Standard2DKernelYv ()
	{
	.reg .u32 %r<7>;
	.loc	19	77	0
$LDWbegin__Z17Standard2DKernelYv:
	.loc	19	78	0
	mov.u32 	%r1, %tid.y;
	cvt.s32.u32 	%r2, %ctaid.y;
	cvt.s32.u32 	%r3, %ntid.y;
	mul.lo.s32 	%r4, %r2, %r3;
	add.u32 	%r5, %r1, %r4;
	st.param.s32 	[__cudaretf__Z17Standard2DKernelYv], %r5;
	ret;
$LDWend__Z17Standard2DKernelYv:
	} // _Z17Standard2DKernelYv

	.visible .func (.param .align 16 .b8 __cudaretf__Z13Half4ToFloat47ushort4[16]) _Z13Half4ToFloat47ushort4 (.param .align 8 .b8 __cudaparmf1__Z13Half4ToFloat47ushort4[8])
	{
	.reg .u32 %r<14>;
	.reg .f32 %f<9>;
	.loc	19	86	0
$LDWbegin__Z13Half4ToFloat47ushort4:
	ld.param.u16 	%r1, [__cudaparmf1__Z13Half4ToFloat47ushort4+0];
	mov.s32 	%r2, %r1;
	ld.param.u16 	%r3, [__cudaparmf1__Z13Half4ToFloat47ushort4+2];
	mov.s32 	%r4, %r3;
	ld.param.u16 	%r5, [__cudaparmf1__Z13Half4ToFloat47ushort4+4];
	mov.s32 	%r6, %r5;
	ld.param.u16 	%r7, [__cudaparmf1__Z13Half4ToFloat47ushort4+6];
	mov.s32 	%r8, %r7;
	.loc	19	87	0
	cvt.u16.u32 	%r9, %r4;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r9;
	cvt.ftz.f32.f16	%f1, %b1; }
	cvt.u16.u32 	%r10, %r6;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r10;
	cvt.ftz.f32.f16	%f2, %b1; }
	cvt.u16.u32 	%r11, %r8;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r11;
	cvt.ftz.f32.f16	%f3, %b1; }
	cvt.u16.u32 	%r12, %r2;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r12;
	cvt.ftz.f32.f16	%f4, %b1; }
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+0], %f4;
	mov.f32 	%f5, %f1;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+4], %f5;
	mov.f32 	%f6, %f2;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+8], %f6;
	mov.f32 	%f7, %f3;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+12], %f7;
	ret;
$LDWend__Z13Half4ToFloat47ushort4:
	} // _Z13Half4ToFloat47ushort4

	.visible .func (.param .align 8 .b8 __cudaretf__Z13Float4ToHalf46float4[8]) _Z13Float4ToHalf46float4 (.param .align 16 .b8 __cudaparmf1__Z13Float4ToHalf46float4[16])
	{
	.reg .u32 %r<13>;
	.reg .f32 %f<10>;
	.loc	19	95	0
$LDWbegin__Z13Float4ToHalf46float4:
	ld.param.f32 	%f1, [__cudaparmf1__Z13Float4ToHalf46float4+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z13Float4ToHalf46float4+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z13Float4ToHalf46float4+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z13Float4ToHalf46float4+12];
	mov.f32 	%f8, %f7;
	.loc	19	96	0
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r1, %b1; }
	cvt.u16.u32 	%r2, %r1;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r3, %b1; }
	cvt.u16.u32 	%r4, %r3;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f8;
	mov.b32		%r5, %b1; }
	cvt.u16.u32 	%r6, %r5;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f2;
	mov.b32		%r7, %b1; }
	cvt.u16.u32 	%r8, %r7;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+0], %r8;
	mov.s32 	%r9, %r2;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+2], %r9;
	mov.s32 	%r10, %r4;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+4], %r10;
	mov.s32 	%r11, %r6;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+6], %r11;
	ret;
$LDWend__Z13Float4ToHalf46float4:
	} // _Z13Float4ToHalf46float4

	.visible .func (.param .u32 __cudaretf__Z4Mix3RjS_S_) _Z4Mix3RjS_S_ (.param .u64 __cudaparmf1__Z4Mix3RjS_S_, .param .u64 __cudaparmf2__Z4Mix3RjS_S_, .param .u64 __cudaparmf3__Z4Mix3RjS_S_)
	{
	.reg .u32 %r<75>;
	.reg .u64 %rd<8>;
	.loc	19	138	0
$LDWbegin__Z4Mix3RjS_S_:
	ld.param.u64 	%rd1, [__cudaparmf1__Z4Mix3RjS_S_];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf2__Z4Mix3RjS_S_];
	mov.s64 	%rd4, %rd3;
	ld.param.u64 	%rd5, [__cudaparmf3__Z4Mix3RjS_S_];
	mov.s64 	%rd6, %rd5;
	.loc	19	139	0
	ld.u32 	%r1, [%rd2+0];
	ld.u32 	%r2, [%rd4+0];
	sub.u32 	%r3, %r1, %r2;
	st.u32 	[%rd2+0], %r3;
	ld.u32 	%r4, [%rd6+0];
	sub.u32 	%r5, %r3, %r4;
	st.u32 	[%rd2+0], %r5;
	ld.u32 	%r6, [%rd6+0];
	shr.u32 	%r7, %r6, 13;
	xor.b32 	%r8, %r5, %r7;
	st.u32 	[%rd2+0], %r8;
	.loc	19	140	0
	ld.u32 	%r9, [%rd4+0];
	ld.u32 	%r10, [%rd6+0];
	sub.u32 	%r11, %r9, %r10;
	st.u32 	[%rd4+0], %r11;
	ld.u32 	%r12, [%rd2+0];
	sub.u32 	%r13, %r11, %r12;
	st.u32 	[%rd4+0], %r13;
	ld.u32 	%r14, [%rd2+0];
	shl.b32 	%r15, %r14, 8;
	xor.b32 	%r16, %r13, %r15;
	st.u32 	[%rd4+0], %r16;
	.loc	19	141	0
	ld.u32 	%r17, [%rd6+0];
	ld.u32 	%r18, [%rd2+0];
	sub.u32 	%r19, %r17, %r18;
	st.u32 	[%rd6+0], %r19;
	ld.u32 	%r20, [%rd4+0];
	sub.u32 	%r21, %r19, %r20;
	st.u32 	[%rd6+0], %r21;
	ld.u32 	%r22, [%rd4+0];
	shr.u32 	%r23, %r22, 13;
	xor.b32 	%r24, %r21, %r23;
	st.u32 	[%rd6+0], %r24;
	.loc	19	142	0
	ld.u32 	%r25, [%rd2+0];
	ld.u32 	%r26, [%rd4+0];
	sub.u32 	%r27, %r25, %r26;
	st.u32 	[%rd2+0], %r27;
	ld.u32 	%r28, [%rd6+0];
	sub.u32 	%r29, %r27, %r28;
	st.u32 	[%rd2+0], %r29;
	ld.u32 	%r30, [%rd6+0];
	shr.u32 	%r31, %r30, 12;
	xor.b32 	%r32, %r29, %r31;
	st.u32 	[%rd2+0], %r32;
	.loc	19	143	0
	ld.u32 	%r33, [%rd4+0];
	ld.u32 	%r34, [%rd6+0];
	sub.u32 	%r35, %r33, %r34;
	st.u32 	[%rd4+0], %r35;
	ld.u32 	%r36, [%rd2+0];
	sub.u32 	%r37, %r35, %r36;
	st.u32 	[%rd4+0], %r37;
	ld.u32 	%r38, [%rd2+0];
	shl.b32 	%r39, %r38, 16;
	xor.b32 	%r40, %r37, %r39;
	st.u32 	[%rd4+0], %r40;
	.loc	19	144	0
	ld.u32 	%r41, [%rd6+0];
	ld.u32 	%r42, [%rd2+0];
	sub.u32 	%r43, %r41, %r42;
	st.u32 	[%rd6+0], %r43;
	ld.u32 	%r44, [%rd4+0];
	sub.u32 	%r45, %r43, %r44;
	st.u32 	[%rd6+0], %r45;
	ld.u32 	%r46, [%rd4+0];
	shr.u32 	%r47, %r46, 5;
	xor.b32 	%r48, %r45, %r47;
	st.u32 	[%rd6+0], %r48;
	.loc	19	145	0
	ld.u32 	%r49, [%rd2+0];
	ld.u32 	%r50, [%rd4+0];
	sub.u32 	%r51, %r49, %r50;
	st.u32 	[%rd2+0], %r51;
	ld.u32 	%r52, [%rd6+0];
	sub.u32 	%r53, %r51, %r52;
	st.u32 	[%rd2+0], %r53;
	ld.u32 	%r54, [%rd6+0];
	shr.u32 	%r55, %r54, 3;
	xor.b32 	%r56, %r53, %r55;
	st.u32 	[%rd2+0], %r56;
	.loc	19	146	0
	ld.u32 	%r57, [%rd4+0];
	ld.u32 	%r58, [%rd6+0];
	sub.u32 	%r59, %r57, %r58;
	st.u32 	[%rd4+0], %r59;
	ld.u32 	%r60, [%rd2+0];
	sub.u32 	%r61, %r59, %r60;
	st.u32 	[%rd4+0], %r61;
	ld.u32 	%r62, [%rd2+0];
	shl.b32 	%r63, %r62, 10;
	xor.b32 	%r64, %r61, %r63;
	st.u32 	[%rd4+0], %r64;
	.loc	19	147	0
	ld.u32 	%r65, [%rd6+0];
	ld.u32 	%r66, [%rd2+0];
	sub.u32 	%r67, %r65, %r66;
	st.u32 	[%rd6+0], %r67;
	ld.u32 	%r68, [%rd4+0];
	sub.u32 	%r69, %r67, %r68;
	st.u32 	[%rd6+0], %r69;
	ld.u32 	%r70, [%rd4+0];
	shr.u32 	%r71, %r70, 15;
	xor.b32 	%r72, %r69, %r71;
	st.u32 	[%rd6+0], %r72;
	.loc	19	148	0
	mov.s32 	%r73, %r72;
	st.param.u32 	[__cudaretf__Z4Mix3RjS_S_], %r73;
	ret;
$LDWend__Z4Mix3RjS_S_:
	} // _Z4Mix3RjS_S_

	.visible .func (.param .s32 __cudaretf__Z4Randj) _Z4Randj (.param .u32 __cudaparmf1__Z4Randj)
	{
	.reg .u32 %r<14>;
	.loc	19	152	0
$LDWbegin__Z4Randj:
	ld.param.u32 	%r1, [__cudaparmf1__Z4Randj];
	mov.s32 	%r2, %r1;
	.loc	19	163	0
	mul.lo.u32 	%r3, %r2, 1103515245;
	add.u32 	%r4, %r3, 12345;
	shr.u32 	%r5, %r4, 16;
	and.b32 	%r6, %r5, 255;
	shl.b32 	%r7, %r6, 7;
	mul.lo.u32 	%r8, %r2, -1029531031;
	sub.u32 	%r9, %r8, 740551042;
	shr.u32 	%r10, %r9, 16;
	and.b32 	%r11, %r10, 255;
	xor.b32 	%r12, %r7, %r11;
	st.param.s32 	[__cudaretf__Z4Randj], %r12;
	ret;
$LDWend__Z4Randj:
	} // _Z4Randj

	.visible .func (.param .s32 __cudaretf__Z6Rand2Djjj) _Z6Rand2Djjj (.param .u32 __cudaparmf1__Z6Rand2Djjj, .param .u32 __cudaparmf2__Z6Rand2Djjj, .param .u32 __cudaparmf3__Z6Rand2Djjj)
	{
	.reg .u32 %r<54>;
	.loc	19	169	0
$LDWbegin__Z6Rand2Djjj:
	ld.param.u32 	%r1, [__cudaparmf1__Z6Rand2Djjj];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf2__Z6Rand2Djjj];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf3__Z6Rand2Djjj];
	mov.s32 	%r6, %r5;
	.loc	19	139	0
	sub.u32 	%r7, %r2, %r4;
	sub.u32 	%r8, %r7, %r6;
	shr.u32 	%r9, %r6, 13;
	xor.b32 	%r10, %r8, %r9;
	.loc	19	140	0
	sub.u32 	%r11, %r4, %r6;
	sub.u32 	%r12, %r11, %r10;
	shl.b32 	%r13, %r10, 8;
	xor.b32 	%r14, %r12, %r13;
	.loc	19	141	0
	sub.u32 	%r15, %r6, %r10;
	sub.u32 	%r16, %r15, %r14;
	shr.u32 	%r17, %r14, 13;
	xor.b32 	%r18, %r16, %r17;
	.loc	19	142	0
	sub.u32 	%r19, %r10, %r14;
	sub.u32 	%r20, %r19, %r18;
	shr.u32 	%r21, %r18, 12;
	xor.b32 	%r22, %r20, %r21;
	.loc	19	143	0
	sub.u32 	%r23, %r14, %r18;
	sub.u32 	%r24, %r23, %r22;
	shl.b32 	%r25, %r22, 16;
	xor.b32 	%r26, %r24, %r25;
	.loc	19	144	0
	sub.u32 	%r27, %r18, %r22;
	sub.u32 	%r28, %r27, %r26;
	shr.u32 	%r29, %r26, 5;
	xor.b32 	%r30, %r28, %r29;
	.loc	19	145	0
	sub.u32 	%r31, %r22, %r26;
	sub.u32 	%r32, %r31, %r30;
	shr.u32 	%r33, %r30, 3;
	xor.b32 	%r34, %r32, %r33;
	.loc	19	146	0
	sub.u32 	%r35, %r26, %r30;
	sub.u32 	%r36, %r35, %r34;
	shl.b32 	%r37, %r34, 10;
	xor.b32 	%r38, %r36, %r37;
	.loc	19	147	0
	sub.u32 	%r39, %r30, %r34;
	sub.u32 	%r40, %r39, %r38;
	shr.u32 	%r41, %r38, 15;
	xor.b32 	%r42, %r40, %r41;
	.loc	19	170	0
	mul.lo.u32 	%r43, %r42, 1103515245;
	add.u32 	%r44, %r43, 12345;
	shr.u32 	%r45, %r44, 16;
	and.b32 	%r46, %r45, 255;
	shl.b32 	%r47, %r46, 7;
	mul.lo.u32 	%r48, %r42, -1029531031;
	sub.u32 	%r49, %r48, 740551042;
	shr.u32 	%r50, %r49, 16;
	and.b32 	%r51, %r50, 255;
	xor.b32 	%r52, %r47, %r51;
	st.param.s32 	[__cudaretf__Z6Rand2Djjj], %r52;
	ret;
$LDWend__Z6Rand2Djjj:
	} // _Z6Rand2Djjj

	.visible .func (.param .s32 __cudaretf__Z6Rand2Dj) _Z6Rand2Dj (.param .u32 __cudaparmf1__Z6Rand2Dj)
	{
	.reg .u32 %r<60>;
	.loc	19	175	0
$LDWbegin__Z6Rand2Dj:
	ld.param.u32 	%r1, [__cudaparmf1__Z6Rand2Dj];
	mov.s32 	%r2, %r1;
	.loc	19	143	0
	cvt.s32.u32 	%r3, %ctaid.y;
	cvt.s32.u32 	%r4, %ntid.y;
	mul.lo.s32 	%r5, %r3, %r4;
	cvt.s32.u32 	%r6, %ctaid.x;
	cvt.s32.u32 	%r7, %ntid.x;
	mul.lo.s32 	%r8, %r6, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r5, %r9;
	mov.u32 	%r11, %tid.x;
	add.u32 	%r12, %r8, %r11;
	shr.u32 	%r13, %r10, 13;
	sub.u32 	%r14, %r2, %r12;
	sub.u32 	%r15, %r12, %r10;
	sub.u32 	%r16, %r14, %r10;
	xor.b32 	%r17, %r13, %r16;
	shl.b32 	%r18, %r17, 8;
	sub.u32 	%r19, %r15, %r17;
	sub.u32 	%r20, %r10, %r17;
	xor.b32 	%r21, %r18, %r19;
	shr.u32 	%r22, %r21, 13;
	sub.u32 	%r23, %r20, %r21;
	sub.u32 	%r24, %r17, %r21;
	xor.b32 	%r25, %r22, %r23;
	shr.u32 	%r26, %r25, 12;
	sub.u32 	%r27, %r24, %r25;
	xor.b32 	%r28, %r26, %r27;
	sub.u32 	%r29, %r21, %r25;
	sub.u32 	%r30, %r29, %r28;
	shl.b32 	%r31, %r28, 16;
	xor.b32 	%r32, %r30, %r31;
	.loc	19	144	0
	sub.u32 	%r33, %r25, %r28;
	sub.u32 	%r34, %r33, %r32;
	shr.u32 	%r35, %r32, 5;
	xor.b32 	%r36, %r34, %r35;
	.loc	19	145	0
	sub.u32 	%r37, %r28, %r32;
	sub.u32 	%r38, %r37, %r36;
	shr.u32 	%r39, %r36, 3;
	xor.b32 	%r40, %r38, %r39;
	.loc	19	146	0
	sub.u32 	%r41, %r32, %r36;
	sub.u32 	%r42, %r41, %r40;
	shl.b32 	%r43, %r40, 10;
	xor.b32 	%r44, %r42, %r43;
	.loc	19	147	0
	sub.u32 	%r45, %r36, %r40;
	sub.u32 	%r46, %r45, %r44;
	shr.u32 	%r47, %r44, 15;
	xor.b32 	%r48, %r46, %r47;
	.loc	19	176	0
	mul.lo.u32 	%r49, %r48, 1103515245;
	add.u32 	%r50, %r49, 12345;
	shr.u32 	%r51, %r50, 16;
	and.b32 	%r52, %r51, 255;
	shl.b32 	%r53, %r52, 7;
	mul.lo.u32 	%r54, %r48, -1029531031;
	sub.u32 	%r55, %r54, 740551042;
	shr.u32 	%r56, %r55, 16;
	and.b32 	%r57, %r56, 255;
	xor.b32 	%r58, %r53, %r57;
	st.param.s32 	[__cudaretf__Z6Rand2Dj], %r58;
	ret;
$LDWend__Z6Rand2Dj:
	} // _Z6Rand2Dj

	.visible .func (.param .s32 __cudaretf__Z19MaxUnsignedBitValuei) _Z19MaxUnsignedBitValuei (.param .s32 __cudaparmf1__Z19MaxUnsignedBitValuei)
	{
	.reg .u32 %r<7>;
	.loc	20	51	0
$LDWbegin__Z19MaxUnsignedBitValuei:
	ld.param.u32 	%r1, [__cudaparmf1__Z19MaxUnsignedBitValuei];
	mov.s32 	%r2, %r1;
	.loc	20	52	0
	sub.s32 	%r3, %r2, 1;
	shl.b32 	%r4, 2, %r3;
	sub.s32 	%r5, %r4, 1;
	st.param.s32 	[__cudaretf__Z19MaxUnsignedBitValuei], %r5;
	ret;
$LDWend__Z19MaxUnsignedBitValuei:
	} // _Z19MaxUnsignedBitValuei

	.visible .func (.param .f32 __cudaretf__Z13MaxDepthValue14IR_PixelFormat) _Z13MaxDepthValue14IR_PixelFormat (.param .s32 __cudaparmf1__Z13MaxDepthValue14IR_PixelFormat)
	{
	.reg .u32 %r<9>;
	.reg .f32 %f<4>;
	.reg .pred %p<6>;
	.loc	20	56	0
$LDWbegin__Z13MaxDepthValue14IR_PixelFormat:
	ld.param.u32 	%r1, [__cudaparmf1__Z13MaxDepthValue14IR_PixelFormat];
	mov.s32 	%r2, %r1;
	.loc	20	57	0
	and.b32 	%r3, %r2, 448;
	mov.u32 	%r4, 0;
	setp.ne.s32 	%p1, %r3, %r4;
	@%p1 bra 	$Lt_10_2306;
	.loc	20	59	0
	mov.f32 	%f1, 0f437f0000;     	// 255
	bra.uni 	$LBB10__Z13MaxDepthValue14IR_PixelFormat;
$Lt_10_2306:
	.loc	20	61	0
	mov.u32 	%r5, 64;
	setp.ne.s32 	%p2, %r3, %r5;
	@%p2 bra 	$Lt_10_2562;
	.loc	20	63	0
	mov.f32 	%f1, 0f447fc000;     	// 1023
	bra.uni 	$LBB10__Z13MaxDepthValue14IR_PixelFormat;
$Lt_10_2562:
	.loc	20	65	0
	mov.u32 	%r6, 128;
	setp.ne.s32 	%p3, %r3, %r6;
	@%p3 bra 	$Lt_10_2818;
	.loc	20	68	0
	mov.f32 	%f1, 0f47000000;     	// 32768
	bra.uni 	$LBB10__Z13MaxDepthValue14IR_PixelFormat;
$Lt_10_2818:
	.loc	20	70	0
	mov.u32 	%r7, 192;
	setp.ne.s32 	%p4, %r3, %r7;
	@%p4 bra 	$Lt_10_3074;
	.loc	20	72	0
	mov.f32 	%f1, 0fbf800000;     	// -1
	bra.uni 	$LBB10__Z13MaxDepthValue14IR_PixelFormat;
$Lt_10_3074:
	.loc	20	76	0
	mov.f32 	%f1, 0f3f800000;     	// 1
$LBB10__Z13MaxDepthValue14IR_PixelFormat:
	mov.f32 	%f2, %f1;
	st.param.f32 	[__cudaretf__Z13MaxDepthValue14IR_PixelFormat], %f2;
	ret;
$LDWend__Z13MaxDepthValue14IR_PixelFormat:
	} // _Z13MaxDepthValue14IR_PixelFormat

	.visible .func (.param .f32 __cudaretf__Z15DepthScaleValue14IR_PixelFormatS_) _Z15DepthScaleValue14IR_PixelFormatS_ (.param .s32 __cudaparmf1__Z15DepthScaleValue14IR_PixelFormatS_, .param .s32 __cudaparmf2__Z15DepthScaleValue14IR_PixelFormatS_)
	{
	.reg .u32 %r<16>;
	.reg .f32 %f<5>;
	.reg .pred %p<10>;
	.loc	20	82	0
$LDWbegin__Z15DepthScaleValue14IR_PixelFormatS_:
	ld.param.u32 	%r1, [__cudaparmf1__Z15DepthScaleValue14IR_PixelFormatS_];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf2__Z15DepthScaleValue14IR_PixelFormatS_];
	mov.s32 	%r4, %r3;
	.loc	20	57	0
	and.b32 	%r5, %r4, 448;
	mov.u32 	%r6, 0;
	setp.ne.s32 	%p1, %r5, %r6;
	@%p1 bra 	$Lt_11_5378;
	.loc	20	59	0
	mov.f32 	%f1, 0f437f0000;     	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_188_3;
$Lt_11_5378:
	.loc	20	61	0
	mov.u32 	%r7, 64;
	setp.ne.s32 	%p2, %r5, %r7;
	@%p2 bra 	$Lt_11_5634;
	.loc	20	63	0
	mov.f32 	%f1, 0f447fc000;     	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_188_3;
$Lt_11_5634:
	.loc	20	65	0
	mov.u32 	%r8, 128;
	setp.ne.s32 	%p3, %r5, %r8;
	@%p3 bra 	$Lt_11_5890;
	.loc	20	68	0
	mov.f32 	%f1, 0f47000000;     	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_188_3;
$Lt_11_5890:
	.loc	20	70	0
	mov.u32 	%r9, 192;
	setp.ne.s32 	%p4, %r5, %r9;
	@%p4 bra 	$Lt_11_6146;
	.loc	20	72	0
	mov.f32 	%f1, 0fbf800000;     	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_188_3;
$Lt_11_6146:
	.loc	20	76	0
	mov.f32 	%f1, 0f3f800000;     	// 1
$LDWendi__Z19MaxUnsignedBitValuei_188_3:
	.loc	20	57	0
	and.b32 	%r10, %r2, 448;
	mov.u32 	%r11, 0;
	setp.ne.s32 	%p5, %r10, %r11;
	@%p5 bra 	$Lt_11_6402;
	.loc	20	59	0
	mov.f32 	%f2, 0f437f0000;     	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_188_1;
$Lt_11_6402:
	.loc	20	61	0
	mov.u32 	%r12, 64;
	setp.ne.s32 	%p6, %r10, %r12;
	@%p6 bra 	$Lt_11_6658;
	.loc	20	63	0
	mov.f32 	%f2, 0f447fc000;     	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_188_1;
$Lt_11_6658:
	.loc	20	65	0
	mov.u32 	%r13, 128;
	setp.ne.s32 	%p7, %r10, %r13;
	@%p7 bra 	$Lt_11_6914;
	.loc	20	68	0
	mov.f32 	%f2, 0f47000000;     	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_188_1;
$Lt_11_6914:
	.loc	20	70	0
	mov.u32 	%r14, 192;
	setp.ne.s32 	%p8, %r10, %r14;
	@%p8 bra 	$Lt_11_7170;
	.loc	20	72	0
	mov.f32 	%f2, 0fbf800000;     	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_188_1;
$Lt_11_7170:
	.loc	20	76	0
	mov.f32 	%f2, 0f3f800000;     	// 1
$LDWendi__Z19MaxUnsignedBitValuei_188_1:
	.loc	20	83	0
	div.approx.ftz.f32 	%f3, %f1, %f2;
	st.param.f32 	[__cudaretf__Z15DepthScaleValue14IR_PixelFormatS_], %f3;
	ret;
$LDWend__Z15DepthScaleValue14IR_PixelFormatS_:
	} // _Z15DepthScaleValue14IR_PixelFormatS_

	.visible .func (.param .s32 __cudaretf__Z7IsYCbCr14IR_PixelFormat) _Z7IsYCbCr14IR_PixelFormat (.param .s32 __cudaparmf1__Z7IsYCbCr14IR_PixelFormat)
	{
	.reg .u32 %r<10>;
	.loc	20	92	0
$LDWbegin__Z7IsYCbCr14IR_PixelFormat:
	ld.param.u32 	%r1, [__cudaparmf1__Z7IsYCbCr14IR_PixelFormat];
	mov.s32 	%r2, %r1;
	.loc	20	93	0
	and.b32 	%r3, %r2, 448;
	mov.s32 	%r4, 256;
	set.ne.u32.s32 	%r5, %r3, %r4;
	neg.s32 	%r6, %r5;
	and.b32 	%r7, %r2, 1;
	and.b32 	%r8, %r6, %r7;
	st.param.s32 	[__cudaretf__Z7IsYCbCr14IR_PixelFormat], %r8;
	ret;
$LDWend__Z7IsYCbCr14IR_PixelFormat:
	} // _Z7IsYCbCr14IR_PixelFormat
	.const .align 4 .b8 kYCbCrOffset[12] = {0,0,128,65,0,0,0,67,0,0,0,67};
	.const .align 4 .b8 kYCbCrFullRangeOffset[12] = {0,0,0,0,0,0,0,67,0,0,0,67};

	.visible .func (.param .f32 __cudaretf__Z11YCbCrOffseti14IR_PixelFormat) _Z11YCbCrOffseti14IR_PixelFormat (.param .s32 __cudaparmf1__Z11YCbCrOffseti14IR_PixelFormat, .param .s32 __cudaparmf2__Z11YCbCrOffseti14IR_PixelFormat)
	{
	.reg .u32 %r<8>;
	.reg .u64 %rd<8>;
	.reg .f32 %f<4>;
	.reg .pred %p<3>;
	.loc	20	99	0
$LDWbegin__Z11YCbCrOffseti14IR_PixelFormat:
	ld.param.u32 	%r1, [__cudaparmf1__Z11YCbCrOffseti14IR_PixelFormat];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf2__Z11YCbCrOffseti14IR_PixelFormat];
	mov.s32 	%r4, %r3;
	cvt.s64.s32 	%rd1, %r2;
	mul.wide.s32 	%rd2, %r2, 4;
	and.b32 	%r5, %r4, 2048;
	mov.u32 	%r6, 0;
	setp.eq.s32 	%p1, %r5, %r6;
	@%p1 bra 	$Lt_13_1282;
	.loc	20	100	0
	mov.u64 	%rd3, kYCbCrOffset;
	add.u64 	%rd4, %rd2, %rd3;
	ld.const.f32 	%f1, [%rd4+0];
	bra.uni 	$Lt_13_1026;
$Lt_13_1282:
	mov.u64 	%rd5, kYCbCrFullRangeOffset;
	add.u64 	%rd6, %rd2, %rd5;
	ld.const.f32 	%f1, [%rd6+0];
$Lt_13_1026:
	mov.f32 	%f2, %f1;
	st.param.f32 	[__cudaretf__Z11YCbCrOffseti14IR_PixelFormat], %f2;
	ret;
$LDWend__Z11YCbCrOffseti14IR_PixelFormat:
	} // _Z11YCbCrOffseti14IR_PixelFormat

	.visible .func (.param .align 16 .b8 __cudaretf__Z15AddAYCbCrOffset6float414IR_PixelFormat[16]) _Z15AddAYCbCrOffset6float414IR_PixelFormat (.param .align 16 .b8 __cudaparmf1__Z15AddAYCbCrOffset6float414IR_PixelFormat[16], .param .s32 __cudaparmf2__Z15AddAYCbCrOffset6float414IR_PixelFormat)
	{
	.reg .u32 %r<11>;
	.reg .f32 %f<23>;
	.reg .pred %p<7>;
	.loc	20	105	0
$LDWbegin__Z15AddAYCbCrOffset6float414IR_PixelFormat:
	ld.param.f32 	%f1, [__cudaparmf1__Z15AddAYCbCrOffset6float414IR_PixelFormat+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z15AddAYCbCrOffset6float414IR_PixelFormat+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z15AddAYCbCrOffset6float414IR_PixelFormat+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z15AddAYCbCrOffset6float414IR_PixelFormat+12];
	mov.f32 	%f8, %f7;
	ld.param.u32 	%r1, [__cudaparmf2__Z15AddAYCbCrOffset6float414IR_PixelFormat];
	mov.s32 	%r2, %r1;
	.loc	20	57	0
	and.b32 	%r3, %r2, 448;
	mov.u32 	%r4, 0;
	setp.ne.s32 	%p1, %r3, %r4;
	@%p1 bra 	$Lt_14_6914;
	.loc	20	59	0
	mov.f32 	%f9, 0f437f0000;     	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_191_3;
$Lt_14_6914:
	.loc	20	61	0
	mov.u32 	%r5, 64;
	setp.ne.s32 	%p2, %r3, %r5;
	@%p2 bra 	$Lt_14_7170;
	.loc	20	63	0
	mov.f32 	%f9, 0f447fc000;     	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_191_3;
$Lt_14_7170:
	.loc	20	65	0
	mov.u32 	%r6, 128;
	setp.ne.s32 	%p3, %r3, %r6;
	@%p3 bra 	$Lt_14_7426;
	.loc	20	68	0
	mov.f32 	%f9, 0f47000000;     	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_191_3;
$Lt_14_7426:
	.loc	20	70	0
	mov.u32 	%r7, 192;
	setp.ne.s32 	%p4, %r3, %r7;
	@%p4 bra 	$Lt_14_7682;
	.loc	20	72	0
	mov.f32 	%f9, 0fbf800000;     	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_191_3;
$Lt_14_7682:
	.loc	20	76	0
	mov.f32 	%f9, 0f3f800000;     	// 1
$LDWendi__Z19MaxUnsignedBitValuei_191_3:
	.loc	20	107	0
	and.b32 	%r8, %r2, 2048;
	mov.s32 	%r9, 0;
	setp.ne.s32 	%p5, %r8, %r9;
	@!%p5 bra 	$Lt_14_11266;
	.loc	20	100	0
	ld.const.f32 	%f10, [kYCbCrOffset+0];
	bra.uni 	$Lt_14_11010;
$Lt_14_11266:
	ld.const.f32 	%f10, [kYCbCrFullRangeOffset+0];
$Lt_14_11010:
	.loc	20	107	0
	mov.f32 	%f11, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f12, %f9, %f11;
	fma.rn.ftz.f32 	%f13, %f12, %f10, %f4;
	@!%p5 bra 	$Lt_14_11778;
	.loc	20	100	0
	ld.const.f32 	%f14, [kYCbCrOffset+4];
	bra.uni 	$Lt_14_11522;
$Lt_14_11778:
	ld.const.f32 	%f14, [kYCbCrFullRangeOffset+4];
$Lt_14_11522:
	.loc	20	107	0
	fma.rn.ftz.f32 	%f15, %f12, %f14, %f6;
	@!%p5 bra 	$Lt_14_12290;
	.loc	20	100	0
	ld.const.f32 	%f16, [kYCbCrOffset+8];
	bra.uni 	$Lt_14_12034;
$Lt_14_12290:
	ld.const.f32 	%f16, [kYCbCrFullRangeOffset+8];
$Lt_14_12034:
	.loc	20	107	0
	fma.rn.ftz.f32 	%f17, %f12, %f16, %f8;
	mov.f32 	%f18, %f2;
	st.param.f32 	[__cudaretf__Z15AddAYCbCrOffset6float414IR_PixelFormat+0], %f18;
	mov.f32 	%f19, %f13;
	st.param.f32 	[__cudaretf__Z15AddAYCbCrOffset6float414IR_PixelFormat+4], %f19;
	mov.f32 	%f20, %f15;
	st.param.f32 	[__cudaretf__Z15AddAYCbCrOffset6float414IR_PixelFormat+8], %f20;
	mov.f32 	%f21, %f17;
	st.param.f32 	[__cudaretf__Z15AddAYCbCrOffset6float414IR_PixelFormat+12], %f21;
	ret;
$LDWend__Z15AddAYCbCrOffset6float414IR_PixelFormat:
	} // _Z15AddAYCbCrOffset6float414IR_PixelFormat

	.visible .func (.param .align 16 .b8 __cudaretf__Z20SubtractAYCbCrOffset6float414IR_PixelFormat[16]) _Z20SubtractAYCbCrOffset6float414IR_PixelFormat (.param .align 16 .b8 __cudaparmf1__Z20SubtractAYCbCrOffset6float414IR_PixelFormat[16], .param .s32 __cudaparmf2__Z20SubtractAYCbCrOffset6float414IR_PixelFormat)
	{
	.reg .u32 %r<11>;
	.reg .f32 %f<26>;
	.reg .pred %p<7>;
	.loc	20	116	0
$LDWbegin__Z20SubtractAYCbCrOffset6float414IR_PixelFormat:
	ld.param.f32 	%f1, [__cudaparmf1__Z20SubtractAYCbCrOffset6float414IR_PixelFormat+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z20SubtractAYCbCrOffset6float414IR_PixelFormat+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z20SubtractAYCbCrOffset6float414IR_PixelFormat+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z20SubtractAYCbCrOffset6float414IR_PixelFormat+12];
	mov.f32 	%f8, %f7;
	ld.param.u32 	%r1, [__cudaparmf2__Z20SubtractAYCbCrOffset6float414IR_PixelFormat];
	mov.s32 	%r2, %r1;
	.loc	20	57	0
	and.b32 	%r3, %r2, 448;
	mov.u32 	%r4, 0;
	setp.ne.s32 	%p1, %r3, %r4;
	@%p1 bra 	$Lt_15_6914;
	.loc	20	59	0
	mov.f32 	%f9, 0f437f0000;     	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_192_3;
$Lt_15_6914:
	.loc	20	61	0
	mov.u32 	%r5, 64;
	setp.ne.s32 	%p2, %r3, %r5;
	@%p2 bra 	$Lt_15_7170;
	.loc	20	63	0
	mov.f32 	%f9, 0f447fc000;     	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_192_3;
$Lt_15_7170:
	.loc	20	65	0
	mov.u32 	%r6, 128;
	setp.ne.s32 	%p3, %r3, %r6;
	@%p3 bra 	$Lt_15_7426;
	.loc	20	68	0
	mov.f32 	%f9, 0f47000000;     	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_192_3;
$Lt_15_7426:
	.loc	20	70	0
	mov.u32 	%r7, 192;
	setp.ne.s32 	%p4, %r3, %r7;
	@%p4 bra 	$Lt_15_7682;
	.loc	20	72	0
	mov.f32 	%f9, 0fbf800000;     	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_192_3;
$Lt_15_7682:
	.loc	20	76	0
	mov.f32 	%f9, 0f3f800000;     	// 1
$LDWendi__Z19MaxUnsignedBitValuei_192_3:
	.loc	20	118	0
	and.b32 	%r8, %r2, 2048;
	mov.s32 	%r9, 0;
	setp.ne.s32 	%p5, %r8, %r9;
	@!%p5 bra 	$Lt_15_11266;
	.loc	20	100	0
	ld.const.f32 	%f10, [kYCbCrOffset+0];
	bra.uni 	$Lt_15_11010;
$Lt_15_11266:
	ld.const.f32 	%f10, [kYCbCrFullRangeOffset+0];
$Lt_15_11010:
	.loc	20	118	0
	mov.f32 	%f11, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f12, %f9, %f11;
	mul.ftz.f32 	%f13, %f12, %f10;
	sub.ftz.f32 	%f14, %f4, %f13;
	@!%p5 bra 	$Lt_15_11778;
	.loc	20	100	0
	ld.const.f32 	%f15, [kYCbCrOffset+4];
	bra.uni 	$Lt_15_11522;
$Lt_15_11778:
	ld.const.f32 	%f15, [kYCbCrFullRangeOffset+4];
$Lt_15_11522:
	.loc	20	118	0
	mul.ftz.f32 	%f16, %f12, %f15;
	sub.ftz.f32 	%f17, %f6, %f16;
	@!%p5 bra 	$Lt_15_12290;
	.loc	20	100	0
	ld.const.f32 	%f18, [kYCbCrOffset+8];
	bra.uni 	$Lt_15_12034;
$Lt_15_12290:
	ld.const.f32 	%f18, [kYCbCrFullRangeOffset+8];
$Lt_15_12034:
	.loc	20	118	0
	mul.ftz.f32 	%f19, %f12, %f18;
	sub.ftz.f32 	%f20, %f8, %f19;
	mov.f32 	%f21, %f2;
	st.param.f32 	[__cudaretf__Z20SubtractAYCbCrOffset6float414IR_PixelFormat+0], %f21;
	mov.f32 	%f22, %f14;
	st.param.f32 	[__cudaretf__Z20SubtractAYCbCrOffset6float414IR_PixelFormat+4], %f22;
	mov.f32 	%f23, %f17;
	st.param.f32 	[__cudaretf__Z20SubtractAYCbCrOffset6float414IR_PixelFormat+8], %f23;
	mov.f32 	%f24, %f20;
	st.param.f32 	[__cudaretf__Z20SubtractAYCbCrOffset6float414IR_PixelFormat+12], %f24;
	ret;
$LDWend__Z20SubtractAYCbCrOffset6float414IR_PixelFormat:
	} // _Z20SubtractAYCbCrOffset6float414IR_PixelFormat
	.global .align 4 .b8 kRGB32f_To_601YPbPr[36] = {135,22,153,62,162,69,22,63,213,120,233,61,33,201,44,190,111,155,169,190,0,0,0,63,0,0,0,63,70,94,214,190,232,134,166,189};
	.global .align 4 .b8 k601YPbPr_To_RGB32f[36] = {0,0,128,63,0,0,0,0,188,116,179,63,0,0,128,63,152,50,176,190,158,209,54,191,0,0,128,63,229,208,226,63,0,0,0,0};
	.global .align 4 .b8 kRGB32f_To_601YCbCr[36] = {70,246,130,66,145,141,0,67,94,186,199,65,33,48,23,194,240,103,148,194,0,0,224,66,0,0,224,66,111,146,187,194,70,182,145,193};
	.global .align 4 .b8 k601YCbCr_To_RGB32f[36] = {37,160,149,59,0,0,0,0,182,23,205,59,37,160,149,59,40,15,201,186,156,239,80,187,37,160,149,59,236,155,1,60,0,0,0,0};
	.global .align 4 .b8 kRGB8u_To_601YCbCr[36] = {219,121,131,62,152,14,1,63,18,131,200,61,174,199,23,190,238,252,148,190,197,224,224,62,197,224,224,62,217,78,188,190,174,71,146,189};
	.global .align 4 .b8 k601YCbCr_To_RGB8u[36] = {127,10,149,63,0,0,0,0,160,74,204,63,127,10,149,63,254,148,200,190,184,30,80,191,127,10,149,63,78,26,1,64,0,0,0,0};
	.global .align 4 .b8 kRGB8u_To_601YCbCrFullRange[36] = {135,22,153,62,162,69,22,63,213,120,233,61,166,27,44,190,39,241,168,190,250,254,254,62,250,254,254,62,43,135,213,190,59,223,165,189};
	.global .align 4 .b8 k601YCbCrFullRange_To_RGB8u[36] = {0,0,128,63,0,0,0,0,72,193,178,63,0,0,128,63,143,130,175,190,225,26,54,191,0,0,128,63,20,238,225,63,0,0,0,0};
	.global .align 4 .b8 kRGB32f_To_601YCbCrFullRange[36] = {113,125,152,66,92,175,21,67,92,143,232,65,158,111,43,194,49,72,168,194,0,0,254,66,0,0,254,66,170,177,212,194,88,57,165,193};
	.global .align 4 .b8 k601YCbCrFullRange_To_RGB32f[36] = {129,128,128,59,0,0,0,0,188,116,179,59,129,128,128,59,194,50,176,186,179,209,54,187,129,128,128,59,229,208,226,59,0,0,0,0};
	.global .align 4 .b8 kRGB32f_To_709YPbPr[36] = {208,179,89,62,89,23,55,63,152,221,147,61,186,164,234,189,210,86,197,190,0,0,0,63,0,0,0,63,190,134,232,190,16,202,59,189};
	.global .align 4 .b8 k709YPbPr_To_RGB32f[36] = {0,0,128,63,0,0,0,0,12,147,201,63,0,0,128,63,221,209,63,190,243,173,239,190,0,0,128,63,77,132,237,63,0,0,0,0};
	.global .align 4 .b8 kRGB32f_To_709YCbCr[36] = {106,60,58,66,6,161,28,67,244,253,124,65,223,79,205,193,8,172,172,194,0,0,224,66,0,0,224,66,195,117,203,194,236,81,36,193};
	.global .align 4 .b8 k709YCbCr_To_RGB32f[36] = {37,160,149,59,0,0,0,0,239,94,230,59,37,160,149,59,33,57,91,186,178,245,8,187,37,160,149,59,82,185,7,60,0,0,0,0};
	.global .align 4 .b8 kRGB8u_To_709YCbCr[36] = {207,247,58,62,53,62,29,63,231,251,125,61,147,24,206,61,23,89,173,190,197,224,224,62,197,224,224,62,12,66,204,190,195,245,36,189};
	.global .align 4 .b8 k709YCbCr_To_RGB8u[36] = {127,10,149,63,0,0,0,0,147,120,229,63,127,10,149,63,53,94,90,190,205,108,8,191,127,10,149,63,154,49,7,64,0,0,0,0};
	.global .align 4 .b8 k709YCbCr_To_601YCbCr[36] = {0,0,128,63,23,100,203,61,1,77,68,62,0,0,0,0,18,103,125,63,10,158,226,189,0,0,0,0,61,98,148,189,249,191,123,63};
	.global .align 4 .b8 k601YCbCr_To_709YCbCr[36] = {0,0,128,63,122,165,236,189,179,237,84,190,0,0,0,0,204,98,130,63,216,188,234,61,0,0,0,0,74,179,153,61,234,61,131,63};

	.visible .func (.param .u64 __cudaretf__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_) _Z23ColorSpaceConvertMatrix14IR_PixelFormatS_ (.param .s32 __cudaparmf1__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_, .param .s32 __cudaparmf2__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_)
	{
	.reg .u32 %r<53>;
	.reg .u64 %rd<4>;
	.reg .pred %p<19>;
	.loc	20	132	0
$LDWbegin__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_:
	ld.param.u32 	%r1, [__cudaparmf1__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf2__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_];
	mov.s32 	%r4, %r3;
	and.b32 	%r5, %r2, 1;
	mov.u32 	%r6, 0;
	setp.ne.s32 	%p1, %r5, %r6;
	@%p1 bra 	$Lt_16_30210;
	and.b32 	%r7, %r4, 448;
	mov.s32 	%r8, 256;
	setp.eq.s32 	%p2, %r7, %r8;
	and.b32 	%r9, %r2, 448;
	mov.u32 	%r10, 256;
	setp.ne.s32 	%p3, %r9, %r10;
	@%p3 bra 	$Lt_16_30722;
	.loc	20	137	0
	and.b32 	%r11, %r4, 1536;
	mov.s32 	%r12, 512;
	setp.eq.s32 	%p4, %r11, %r12;
	@!%p2 bra 	$Lt_16_18690;
	.loc	20	139	0
	@!%p4 bra 	$Lt_16_18946;
	.loc	20	141	0
	cvta.global.u64 	%rd1, kRGB32f_To_709YPbPr;
	bra.uni 	$LBB53__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_;
$Lt_16_18946:
	.loc	20	145	0
	cvta.global.u64 	%rd1, kRGB32f_To_601YPbPr;
	bra.uni 	$LBB53__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_;
$Lt_16_18690:
	.loc	20	150	0
	@!%p4 bra 	$Lt_16_19202;
	.loc	20	152	0
	cvta.global.u64 	%rd1, kRGB32f_To_709YCbCr;
	bra.uni 	$LBB53__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_;
$Lt_16_19202:
	.loc	20	154	0
	and.b32 	%r13, %r4, 2048;
	mov.u32 	%r14, 0;
	setp.ne.s32 	%p5, %r13, %r14;
	@%p5 bra 	$Lt_16_19458;
	.loc	20	156	0
	cvta.global.u64 	%rd1, kRGB32f_To_601YCbCrFullRange;
	bra.uni 	$LBB53__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_;
$Lt_16_19458:
	.loc	20	160	0
	cvta.global.u64 	%rd1, kRGB32f_To_601YCbCr;
	bra.uni 	$LBB53__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_;
$Lt_16_30722:
	@!%p2 bra 	$Lt_16_31234;
	bra.uni 	$Lt_16_29954;
$Lt_16_31234:
	.loc	20	179	0
	and.b32 	%r15, %r4, 1536;
	mov.u32 	%r16, 512;
	setp.ne.s32 	%p6, %r15, %r16;
	@%p6 bra 	$Lt_16_20226;
	.loc	20	181	0
	cvta.global.u64 	%rd1, kRGB8u_To_709YCbCr;
	bra.uni 	$LBB53__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_;
$Lt_16_20226:
	.loc	20	183	0
	and.b32 	%r17, %r4, 2048;
	mov.u32 	%r18, 0;
	setp.ne.s32 	%p7, %r17, %r18;
	@%p7 bra 	$Lt_16_20482;
	.loc	20	185	0
	cvta.global.u64 	%rd1, kRGB8u_To_601YCbCrFullRange;
	bra.uni 	$LBB53__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_;
$Lt_16_20482:
	.loc	20	189	0
	cvta.global.u64 	%rd1, kRGB8u_To_601YCbCr;
	bra.uni 	$LBB53__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_;
$Lt_16_30210:
	and.b32 	%r19, %r4, 1;
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p8, %r19, %r20;
	and.b32 	%r21, %r2, 1536;
	mov.u32 	%r22, 512;
	setp.ne.s32 	%p9, %r21, %r22;
	@%p9 bra 	$Lt_16_31746;
	and.b32 	%r23, %r2, 448;
	mov.s32 	%r24, 256;
	setp.eq.s32 	%p10, %r23, %r24;
	@!%p8 bra 	$Lt_16_32258;
	and.b32 	%r25, %r4, 448;
	mov.s32 	%r26, 256;
	setp.eq.s32 	%p11, %r25, %r26;
	@!%p10 bra 	$Lt_16_32770;
	@!%p11 bra 	$Lt_16_29954;
	.loc	20	202	0
	cvta.global.u64 	%rd1, k709YPbPr_To_RGB32f;
	bra.uni 	$LBB53__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_;
$Lt_16_32770:
	.loc	20	211	0
	@!%p11 bra 	$Lt_16_21762;
	.loc	20	213	0
	cvta.global.u64 	%rd1, k709YCbCr_To_RGB32f;
	bra.uni 	$LBB53__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_;
$Lt_16_21762:
	.loc	20	217	0
	cvta.global.u64 	%rd1, k709YCbCr_To_RGB8u;
	bra.uni 	$LBB53__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_;
$Lt_16_32258:
	@!%p10 bra 	$Lt_16_33794;
	bra.uni 	$Lt_16_29954;
$Lt_16_33794:
	and.b32 	%r27, %r4, 448;
	mov.s32 	%r28, 256;
	set.eq.u32.s32 	%r29, %r27, %r28;
	neg.s32 	%r30, %r29;
	and.b32 	%r31, %r4, 2048;
	mov.s32 	%r32, 0;
	set.eq.u32.s32 	%r33, %r31, %r32;
	neg.s32 	%r34, %r33;
	or.b32 	%r35, %r30, %r34;
	mov.u32 	%r36, 0;
	setp.eq.s32 	%p12, %r35, %r36;
	@%p12 bra 	$Lt_16_34306;
	bra.uni 	$Lt_16_29954;
$Lt_16_34306:
	.loc	20	250	0
	cvta.global.u64 	%rd1, k709YCbCr_To_601YCbCr;
	bra.uni 	$LBB53__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_;
$Lt_16_31746:
	and.b32 	%r37, %r2, 2048;
	mov.s32 	%r38, 0;
	setp.eq.s32 	%p13, %r37, %r38;
	@!%p13 bra 	$Lt_16_34818;
	@!%p8 bra 	$Lt_16_29954;
	.loc	20	259	0
	and.b32 	%r39, %r4, 448;
	mov.u32 	%r40, 256;
	setp.ne.s32 	%p14, %r39, %r40;
	@%p14 bra 	$Lt_16_23298;
	.loc	20	261	0
	cvta.global.u64 	%rd1, k601YCbCrFullRange_To_RGB32f;
	bra.uni 	$LBB53__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_;
$Lt_16_23298:
	.loc	20	265	0
	cvta.global.u64 	%rd1, k601YCbCrFullRange_To_RGB8u;
	bra.uni 	$LBB53__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_;
$Lt_16_34818:
	and.b32 	%r41, %r2, 448;
	mov.s32 	%r42, 256;
	setp.eq.s32 	%p15, %r41, %r42;
	@!%p8 bra 	$Lt_16_35842;
	and.b32 	%r43, %r4, 448;
	mov.s32 	%r44, 256;
	setp.eq.s32 	%p16, %r43, %r44;
	@!%p15 bra 	$Lt_16_36354;
	@!%p16 bra 	$Lt_16_29954;
	.loc	20	302	0
	cvta.global.u64 	%rd1, k601YPbPr_To_RGB32f;
	bra.uni 	$LBB53__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_;
$Lt_16_36354:
	.loc	20	311	0
	@!%p16 bra 	$Lt_16_25090;
	.loc	20	313	0
	cvta.global.u64 	%rd1, k601YCbCr_To_RGB32f;
	bra.uni 	$LBB53__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_;
$Lt_16_25090:
	.loc	20	317	0
	cvta.global.u64 	%rd1, k601YCbCr_To_RGB8u;
	bra.uni 	$LBB53__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_;
$Lt_16_35842:
	@!%p15 bra 	$Lt_16_37378;
	bra.uni 	$Lt_16_29954;
$Lt_16_37378:
	selp.s32 	%r45, 1, 0, %p13;
	and.b32 	%r46, %r4, 448;
	mov.s32 	%r47, 256;
	set.eq.u32.s32 	%r48, %r46, %r47;
	neg.s32 	%r49, %r48;
	or.b32 	%r50, %r45, %r49;
	mov.u32 	%r51, 0;
	setp.eq.s32 	%p17, %r50, %r51;
	@%p17 bra 	$Lt_16_37890;
	bra.uni 	$Lt_16_29954;
$Lt_16_37890:
	.loc	20	350	0
	cvta.global.u64 	%rd1, k601YCbCr_To_709YCbCr;
	bra.uni 	$LBB53__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_;
$Lt_16_29954:
	.loc	20	355	0
	mov.u64 	%rd1, 0;
$LBB53__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_:
	mov.s64 	%rd2, %rd1;
	st.param.u64 	[__cudaretf__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_], %rd2;
	ret;
$LDWend__Z23ColorSpaceConvertMatrix14IR_PixelFormatS_:
	} // _Z23ColorSpaceConvertMatrix14IR_PixelFormatS_

	.visible .func (.param .s32 __cudaretf__Z13IsGammaLinear14IR_PixelFormat) _Z13IsGammaLinear14IR_PixelFormat (.param .s32 __cudaparmf1__Z13IsGammaLinear14IR_PixelFormat)
	{
	.reg .u32 %r<8>;
	.loc	20	363	0
$LDWbegin__Z13IsGammaLinear14IR_PixelFormat:
	ld.param.u32 	%r1, [__cudaparmf1__Z13IsGammaLinear14IR_PixelFormat];
	mov.s32 	%r2, %r1;
	.loc	20	364	0
	and.b32 	%r3, %r2, 2;
	mov.s32 	%r4, 0;
	set.ne.u32.s32 	%r5, %r3, %r4;
	neg.s32 	%r6, %r5;
	st.param.s32 	[__cudaretf__Z13IsGammaLinear14IR_PixelFormat], %r6;
	ret;
$LDWend__Z13IsGammaLinear14IR_PixelFormat:
	} // _Z13IsGammaLinear14IR_PixelFormat

	.visible .func (.param .f32 __cudaretf__Z18ApplyGammaFunctionff) _Z18ApplyGammaFunctionff (.param .f32 __cudaparmf1__Z18ApplyGammaFunctionff, .param .f32 __cudaparmf2__Z18ApplyGammaFunctionff)
	{
	.reg .f32 %f<15>;
	.reg .pred %p<3>;
	.loc	20	369	0
$LDWbegin__Z18ApplyGammaFunctionff:
	ld.param.f32 	%f1, [__cudaparmf1__Z18ApplyGammaFunctionff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z18ApplyGammaFunctionff];
	mov.f32 	%f4, %f3;
	mov.f32 	%f5, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f5;
	@!%p1 bra 	$Lt_18_1026;
	.loc	20	372	0
	neg.ftz.f32 	%f6, %f2;
	lg2.approx.ftz.f32 	%f7, %f6;
	mul.ftz.f32 	%f8, %f4, %f7;
	ex2.approx.ftz.f32 	%f9, %f8;
	neg.ftz.f32 	%f10, %f9;
	bra.uni 	$LBB4__Z18ApplyGammaFunctionff;
$Lt_18_1026:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f11, %f2;
	mul.ftz.f32 	%f12, %f4, %f11;
	ex2.approx.ftz.f32 	%f10, %f12;
$LBB4__Z18ApplyGammaFunctionff:
	mov.f32 	%f13, %f10;
	st.param.f32 	[__cudaretf__Z18ApplyGammaFunctionff], %f13;
	ret;
$LDWend__Z18ApplyGammaFunctionff:
	} // _Z18ApplyGammaFunctionff

	.visible .func (.param .align 16 .b8 __cudaretf__Z16OpaqueComponents6float414IR_PixelFormat[16]) _Z16OpaqueComponents6float414IR_PixelFormat (.param .align 16 .b8 __cudaparmf1__Z16OpaqueComponents6float414IR_PixelFormat[16], .param .s32 __cudaparmf2__Z16OpaqueComponents6float414IR_PixelFormat)
	{
	.reg .u32 %r<9>;
	.reg .f32 %f<13>;
	.reg .pred %p<6>;
	.loc	20	401	0
$LDWbegin__Z16OpaqueComponents6float414IR_PixelFormat:
	ld.param.f32 	%f1, [__cudaparmf1__Z16OpaqueComponents6float414IR_PixelFormat+4];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z16OpaqueComponents6float414IR_PixelFormat+8];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z16OpaqueComponents6float414IR_PixelFormat+12];
	mov.f32 	%f6, %f5;
	ld.param.u32 	%r1, [__cudaparmf2__Z16OpaqueComponents6float414IR_PixelFormat];
	mov.s32 	%r2, %r1;
	.loc	20	57	0
	and.b32 	%r3, %r2, 448;
	mov.u32 	%r4, 0;
	setp.ne.s32 	%p1, %r3, %r4;
	@%p1 bra 	$Lt_19_2818;
	.loc	20	59	0
	mov.f32 	%f7, 0f437f0000;     	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_196_1;
$Lt_19_2818:
	.loc	20	61	0
	mov.u32 	%r5, 64;
	setp.ne.s32 	%p2, %r3, %r5;
	@%p2 bra 	$Lt_19_3074;
	.loc	20	63	0
	mov.f32 	%f7, 0f447fc000;     	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_196_1;
$Lt_19_3074:
	.loc	20	65	0
	mov.u32 	%r6, 128;
	setp.ne.s32 	%p3, %r3, %r6;
	@%p3 bra 	$Lt_19_3330;
	.loc	20	68	0
	mov.f32 	%f7, 0f47000000;     	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_196_1;
$Lt_19_3330:
	.loc	20	70	0
	mov.u32 	%r7, 192;
	setp.ne.s32 	%p4, %r3, %r7;
	@%p4 bra 	$Lt_19_3586;
	.loc	20	72	0
	mov.f32 	%f7, 0fbf800000;     	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_196_1;
$Lt_19_3586:
	.loc	20	76	0
	mov.f32 	%f7, 0f3f800000;     	// 1
$LDWendi__Z19MaxUnsignedBitValuei_196_1:
	.loc	20	404	0
	mov.f32 	%f8, %f7;
	st.param.f32 	[__cudaretf__Z16OpaqueComponents6float414IR_PixelFormat+0], %f8;
	mov.f32 	%f9, %f2;
	st.param.f32 	[__cudaretf__Z16OpaqueComponents6float414IR_PixelFormat+4], %f9;
	mov.f32 	%f10, %f4;
	st.param.f32 	[__cudaretf__Z16OpaqueComponents6float414IR_PixelFormat+8], %f10;
	mov.f32 	%f11, %f6;
	st.param.f32 	[__cudaretf__Z16OpaqueComponents6float414IR_PixelFormat+12], %f11;
	ret;
$LDWend__Z16OpaqueComponents6float414IR_PixelFormat:
	} // _Z16OpaqueComponents6float414IR_PixelFormat

	.visible .func (.param .align 16 .b8 __cudaretf__Z21PremultiplyComponents6float414IR_PixelFormat[16]) _Z21PremultiplyComponents6float414IR_PixelFormat (.param .align 16 .b8 __cudaparmf1__Z21PremultiplyComponents6float414IR_PixelFormat[16], .param .s32 __cudaparmf2__Z21PremultiplyComponents6float414IR_PixelFormat)
	{
	.reg .u32 %r<26>;
	.reg .f32 %f<35>;
	.reg .pred %p<16>;
	.loc	20	409	0
$LDWbegin__Z21PremultiplyComponents6float414IR_PixelFormat:
	ld.param.f32 	%f1, [__cudaparmf1__Z21PremultiplyComponents6float414IR_PixelFormat+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z21PremultiplyComponents6float414IR_PixelFormat+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z21PremultiplyComponents6float414IR_PixelFormat+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z21PremultiplyComponents6float414IR_PixelFormat+12];
	mov.f32 	%f8, %f7;
	ld.param.u32 	%r1, [__cudaparmf2__Z21PremultiplyComponents6float414IR_PixelFormat];
	mov.s32 	%r2, %r1;
	.loc	20	410	0
	mov.f32 	%f9, %f4;
	mov.f32 	%f10, %f6;
	mov.f32 	%f11, %f8;
	.loc	20	57	0
	and.b32 	%r3, %r2, 448;
	mov.s32 	%r4, 0;
	setp.eq.s32 	%p1, %r3, %r4;
	@!%p1 bra 	$Lt_20_18690;
	.loc	20	59	0
	mov.f32 	%f12, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_197_9;
$Lt_20_18690:
	.loc	20	61	0
	mov.u32 	%r5, 64;
	setp.ne.s32 	%p2, %r3, %r5;
	@%p2 bra 	$Lt_20_18946;
	.loc	20	63	0
	mov.f32 	%f12, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_197_9;
$Lt_20_18946:
	.loc	20	65	0
	mov.u32 	%r6, 128;
	setp.ne.s32 	%p3, %r3, %r6;
	@%p3 bra 	$Lt_20_19202;
	.loc	20	68	0
	mov.f32 	%f12, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_197_9;
$Lt_20_19202:
	.loc	20	70	0
	mov.u32 	%r7, 192;
	setp.ne.s32 	%p4, %r3, %r7;
	@%p4 bra 	$Lt_20_19458;
	.loc	20	72	0
	mov.f32 	%f12, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_197_9;
$Lt_20_19458:
	.loc	20	76	0
	mov.f32 	%f12, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_197_9:
	.loc	20	413	0
	mov.s32 	%r8, %r2;
	mov.s32 	%r9, 256;
	setp.ne.s32 	%p5, %r3, %r9;
	and.b32 	%r10, %r8, 1;
	selp.s32 	%r11, 1, 0, %p5;
	and.b32 	%r12, %r10, %r11;
	mov.u32 	%r13, 0;
	setp.eq.s32 	%p6, %r12, %r13;
	@%p6 bra 	$Lt_20_29442;
	.loc	20	57	0
	@!%p1 bra 	$Lt_20_19970;
	.loc	20	59	0
	mov.f32 	%f13, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_197_7;
$Lt_20_19970:
	.loc	20	61	0
	mov.u32 	%r14, 64;
	setp.ne.s32 	%p7, %r3, %r14;
	@%p7 bra 	$Lt_20_20226;
	.loc	20	63	0
	mov.f32 	%f13, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_197_7;
$Lt_20_20226:
	.loc	20	65	0
	mov.u32 	%r15, 128;
	setp.ne.s32 	%p8, %r3, %r15;
	@%p8 bra 	$Lt_20_20482;
	.loc	20	68	0
	mov.f32 	%f13, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_197_7;
$Lt_20_20482:
	.loc	20	70	0
	mov.u32 	%r16, 192;
	setp.ne.s32 	%p9, %r3, %r16;
	@%p9 bra 	$Lt_20_20738;
	.loc	20	72	0
	mov.f32 	%f13, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_197_7;
$Lt_20_20738:
	.loc	20	76	0
	mov.f32 	%f13, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_197_7:
	.loc	20	118	0
	and.b32 	%r17, %r2, 2048;
	mov.s32 	%r18, 0;
	setp.ne.s32 	%p10, %r17, %r18;
	@!%p10 bra 	$Lt_20_30210;
	.loc	20	100	0
	ld.const.f32 	%f14, [kYCbCrOffset+0];
	bra.uni 	$Lt_20_29954;
$Lt_20_30210:
	ld.const.f32 	%f14, [kYCbCrFullRangeOffset+0];
$Lt_20_29954:
	.loc	20	118	0
	@!%p10 bra 	$Lt_20_30722;
	.loc	20	100	0
	ld.const.f32 	%f15, [kYCbCrOffset+4];
	bra.uni 	$Lt_20_30466;
$Lt_20_30722:
	ld.const.f32 	%f15, [kYCbCrFullRangeOffset+4];
$Lt_20_30466:
	.loc	20	118	0
	@!%p10 bra 	$Lt_20_31234;
	.loc	20	100	0
	ld.const.f32 	%f16, [kYCbCrOffset+8];
	bra.uni 	$Lt_20_30978;
$Lt_20_31234:
	ld.const.f32 	%f16, [kYCbCrFullRangeOffset+8];
$Lt_20_30978:
	.loc	20	415	0
	mov.f32 	%f17, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f18, %f13, %f17;
	mul.ftz.f32 	%f19, %f18, %f14;
	sub.ftz.f32 	%f9, %f4, %f19;
	mul.ftz.f32 	%f20, %f18, %f15;
	sub.ftz.f32 	%f10, %f6, %f20;
	mul.ftz.f32 	%f21, %f18, %f16;
	sub.ftz.f32 	%f11, %f8, %f21;
$Lt_20_29442:
	.loc	20	418	0
	rcp.approx.ftz.f32 	%f22, %f12;
	mul.ftz.f32 	%f23, %f22, %f2;
	mul.ftz.f32 	%f9, %f23, %f9;
	.loc	20	419	0
	mul.ftz.f32 	%f10, %f23, %f10;
	.loc	20	420	0
	mul.ftz.f32 	%f11, %f23, %f11;
	.loc	20	422	0
	mov.u32 	%r19, 0;
	setp.eq.s32 	%p11, %r12, %r19;
	@%p11 bra 	$Lt_20_31490;
	.loc	20	57	0
	@!%p1 bra 	$Lt_20_23042;
	.loc	20	59	0
	mov.f32 	%f24, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_197_3;
$Lt_20_23042:
	.loc	20	61	0
	mov.u32 	%r20, 64;
	setp.ne.s32 	%p12, %r3, %r20;
	@%p12 bra 	$Lt_20_23298;
	.loc	20	63	0
	mov.f32 	%f24, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_197_3;
$Lt_20_23298:
	.loc	20	65	0
	mov.u32 	%r21, 128;
	setp.ne.s32 	%p13, %r3, %r21;
	@%p13 bra 	$Lt_20_23554;
	.loc	20	68	0
	mov.f32 	%f24, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_197_3;
$Lt_20_23554:
	.loc	20	70	0
	mov.u32 	%r22, 192;
	setp.ne.s32 	%p14, %r3, %r22;
	@%p14 bra 	$Lt_20_23810;
	.loc	20	72	0
	mov.f32 	%f24, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_197_3;
$Lt_20_23810:
	.loc	20	76	0
	mov.f32 	%f24, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_197_3:
	.loc	20	107	0
	and.b32 	%r23, %r2, 2048;
	mov.s32 	%r24, 0;
	setp.ne.s32 	%p10, %r23, %r24;
	@!%p10 bra 	$Lt_20_32258;
	.loc	20	100	0
	ld.const.f32 	%f25, [kYCbCrOffset+0];
	bra.uni 	$Lt_20_32002;
$Lt_20_32258:
	ld.const.f32 	%f25, [kYCbCrFullRangeOffset+0];
$Lt_20_32002:
	.loc	20	107	0
	@!%p10 bra 	$Lt_20_32770;
	.loc	20	100	0
	ld.const.f32 	%f26, [kYCbCrOffset+4];
	bra.uni 	$Lt_20_32514;
$Lt_20_32770:
	ld.const.f32 	%f26, [kYCbCrFullRangeOffset+4];
$Lt_20_32514:
	.loc	20	107	0
	@!%p10 bra 	$Lt_20_33282;
	.loc	20	100	0
	ld.const.f32 	%f27, [kYCbCrOffset+8];
	bra.uni 	$Lt_20_33026;
$Lt_20_33282:
	ld.const.f32 	%f27, [kYCbCrFullRangeOffset+8];
$Lt_20_33026:
	.loc	20	424	0
	mov.f32 	%f28, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f29, %f24, %f28;
	fma.rn.ftz.f32 	%f9, %f29, %f25, %f9;
	fma.rn.ftz.f32 	%f10, %f29, %f26, %f10;
	fma.rn.ftz.f32 	%f11, %f29, %f27, %f11;
$Lt_20_31490:
	.loc	20	427	0
	mov.f32 	%f30, %f2;
	st.param.f32 	[__cudaretf__Z21PremultiplyComponents6float414IR_PixelFormat+0], %f30;
	mov.f32 	%f31, %f9;
	st.param.f32 	[__cudaretf__Z21PremultiplyComponents6float414IR_PixelFormat+4], %f31;
	mov.f32 	%f32, %f10;
	st.param.f32 	[__cudaretf__Z21PremultiplyComponents6float414IR_PixelFormat+8], %f32;
	mov.f32 	%f33, %f11;
	st.param.f32 	[__cudaretf__Z21PremultiplyComponents6float414IR_PixelFormat+12], %f33;
	ret;
$LDWend__Z21PremultiplyComponents6float414IR_PixelFormat:
	} // _Z21PremultiplyComponents6float414IR_PixelFormat

	.visible .func (.param .align 16 .b8 __cudaretf__Z23UnpremultiplyComponents6float414IR_PixelFormat[16]) _Z23UnpremultiplyComponents6float414IR_PixelFormat (.param .align 16 .b8 __cudaparmf1__Z23UnpremultiplyComponents6float414IR_PixelFormat[16], .param .s32 __cudaparmf2__Z23UnpremultiplyComponents6float414IR_PixelFormat)
	{
	.reg .u32 %r<28>;
	.reg .f32 %f<38>;
	.reg .pred %p<19>;
	.loc	20	432	0
$LDWbegin__Z23UnpremultiplyComponents6float414IR_PixelFormat:
	ld.param.f32 	%f1, [__cudaparmf1__Z23UnpremultiplyComponents6float414IR_PixelFormat+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z23UnpremultiplyComponents6float414IR_PixelFormat+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z23UnpremultiplyComponents6float414IR_PixelFormat+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z23UnpremultiplyComponents6float414IR_PixelFormat+12];
	mov.f32 	%f8, %f7;
	ld.param.u32 	%r1, [__cudaparmf2__Z23UnpremultiplyComponents6float414IR_PixelFormat];
	mov.s32 	%r2, %r1;
	.loc	20	433	0
	mov.f32 	%f9, %f2;
	mov.f32 	%f10, %f4;
	mov.f32 	%f11, %f6;
	mov.f32 	%f12, %f8;
	.loc	20	435	0
	and.b32 	%r3, %r2, 448;
	mov.s32 	%r4, %r2;
	mov.s32 	%r5, 256;
	setp.ne.s32 	%p1, %r3, %r5;
	and.b32 	%r6, %r4, 1;
	selp.s32 	%r7, 1, 0, %p1;
	and.b32 	%r8, %r6, %r7;
	mov.u32 	%r9, 0;
	setp.eq.s32 	%p2, %r8, %r9;
	@%p2 bra 	$Lt_21_30210;
	.loc	20	57	0
	mov.u32 	%r10, 0;
	setp.ne.s32 	%p3, %r3, %r10;
	@%p3 bra 	$Lt_21_19458;
	.loc	20	59	0
	mov.f32 	%f13, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_198_9;
$Lt_21_19458:
	.loc	20	61	0
	mov.u32 	%r11, 64;
	setp.ne.s32 	%p4, %r3, %r11;
	@%p4 bra 	$Lt_21_19714;
	.loc	20	63	0
	mov.f32 	%f13, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_198_9;
$Lt_21_19714:
	.loc	20	65	0
	mov.u32 	%r12, 128;
	setp.ne.s32 	%p5, %r3, %r12;
	@%p5 bra 	$Lt_21_19970;
	.loc	20	68	0
	mov.f32 	%f13, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_198_9;
$Lt_21_19970:
	.loc	20	70	0
	mov.u32 	%r13, 192;
	setp.ne.s32 	%p6, %r3, %r13;
	@%p6 bra 	$Lt_21_20226;
	.loc	20	72	0
	mov.f32 	%f13, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_198_9;
$Lt_21_20226:
	.loc	20	76	0
	mov.f32 	%f13, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_198_9:
	.loc	20	118	0
	and.b32 	%r14, %r2, 2048;
	mov.s32 	%r15, 0;
	setp.ne.s32 	%p7, %r14, %r15;
	@!%p7 bra 	$Lt_21_30978;
	.loc	20	100	0
	ld.const.f32 	%f14, [kYCbCrOffset+0];
	bra.uni 	$Lt_21_30722;
$Lt_21_30978:
	ld.const.f32 	%f14, [kYCbCrFullRangeOffset+0];
$Lt_21_30722:
	.loc	20	118	0
	@!%p7 bra 	$Lt_21_31490;
	.loc	20	100	0
	ld.const.f32 	%f15, [kYCbCrOffset+4];
	bra.uni 	$Lt_21_31234;
$Lt_21_31490:
	ld.const.f32 	%f15, [kYCbCrFullRangeOffset+4];
$Lt_21_31234:
	.loc	20	118	0
	@!%p7 bra 	$Lt_21_32002;
	.loc	20	100	0
	ld.const.f32 	%f16, [kYCbCrOffset+8];
	bra.uni 	$Lt_21_31746;
$Lt_21_32002:
	ld.const.f32 	%f16, [kYCbCrFullRangeOffset+8];
$Lt_21_31746:
	.loc	20	437	0
	mov.f32 	%f17, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f18, %f13, %f17;
	mul.ftz.f32 	%f19, %f18, %f14;
	sub.ftz.f32 	%f10, %f4, %f19;
	mul.ftz.f32 	%f20, %f18, %f15;
	sub.ftz.f32 	%f11, %f6, %f20;
	mul.ftz.f32 	%f21, %f18, %f16;
	sub.ftz.f32 	%f12, %f8, %f21;
$Lt_21_30210:
	mov.f32 	%f22, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f23, %f2, %f22;
	mov.f32 	%f24, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p8, %f23, %f24;
	@!%p8 bra 	$Lt_21_32514;
	mov.f32 	%f12, 0f00000000;    	// 0
	mov.f32 	%f11, 0f00000000;    	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
	mov.f32 	%f9, 0f00000000;     	// 0
	bra.uni 	$Lt_21_32258;
$Lt_21_32514:
	.loc	20	57	0
	mov.u32 	%r16, 0;
	setp.ne.s32 	%p9, %r3, %r16;
	@%p9 bra 	$Lt_21_22530;
	.loc	20	59	0
	mov.f32 	%f25, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_198_5;
$Lt_21_22530:
	.loc	20	61	0
	mov.u32 	%r17, 64;
	setp.ne.s32 	%p10, %r3, %r17;
	@%p10 bra 	$Lt_21_22786;
	.loc	20	63	0
	mov.f32 	%f25, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_198_5;
$Lt_21_22786:
	.loc	20	65	0
	mov.u32 	%r18, 128;
	setp.ne.s32 	%p11, %r3, %r18;
	@%p11 bra 	$Lt_21_23042;
	.loc	20	68	0
	mov.f32 	%f25, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_198_5;
$Lt_21_23042:
	.loc	20	70	0
	mov.u32 	%r19, 192;
	setp.ne.s32 	%p12, %r3, %r19;
	@%p12 bra 	$Lt_21_23298;
	.loc	20	72	0
	mov.f32 	%f25, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_198_5;
$Lt_21_23298:
	.loc	20	76	0
	mov.f32 	%f25, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_198_5:
	.loc	20	447	0
	div.approx.ftz.f32 	%f26, %f25, %f2;
	mul.ftz.f32 	%f10, %f26, %f10;
	.loc	20	448	0
	mul.ftz.f32 	%f11, %f26, %f11;
	.loc	20	449	0
	mul.ftz.f32 	%f12, %f26, %f12;
$Lt_21_32258:
	.loc	20	452	0
	mov.u32 	%r20, 0;
	setp.eq.s32 	%p13, %r8, %r20;
	@%p13 bra 	$Lt_21_32770;
	.loc	20	57	0
	mov.u32 	%r21, 0;
	setp.ne.s32 	%p14, %r3, %r21;
	@%p14 bra 	$Lt_21_23810;
	.loc	20	59	0
	mov.f32 	%f27, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_198_3;
$Lt_21_23810:
	.loc	20	61	0
	mov.u32 	%r22, 64;
	setp.ne.s32 	%p15, %r3, %r22;
	@%p15 bra 	$Lt_21_24066;
	.loc	20	63	0
	mov.f32 	%f27, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_198_3;
$Lt_21_24066:
	.loc	20	65	0
	mov.u32 	%r23, 128;
	setp.ne.s32 	%p16, %r3, %r23;
	@%p16 bra 	$Lt_21_24322;
	.loc	20	68	0
	mov.f32 	%f27, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_198_3;
$Lt_21_24322:
	.loc	20	70	0
	mov.u32 	%r24, 192;
	setp.ne.s32 	%p17, %r3, %r24;
	@%p17 bra 	$Lt_21_24578;
	.loc	20	72	0
	mov.f32 	%f27, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_198_3;
$Lt_21_24578:
	.loc	20	76	0
	mov.f32 	%f27, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_198_3:
	.loc	20	107	0
	and.b32 	%r25, %r2, 2048;
	mov.s32 	%r26, 0;
	setp.ne.s32 	%p7, %r25, %r26;
	@!%p7 bra 	$Lt_21_33538;
	.loc	20	100	0
	ld.const.f32 	%f28, [kYCbCrOffset+0];
	bra.uni 	$Lt_21_33282;
$Lt_21_33538:
	ld.const.f32 	%f28, [kYCbCrFullRangeOffset+0];
$Lt_21_33282:
	.loc	20	107	0
	@!%p7 bra 	$Lt_21_34050;
	.loc	20	100	0
	ld.const.f32 	%f29, [kYCbCrOffset+4];
	bra.uni 	$Lt_21_33794;
$Lt_21_34050:
	ld.const.f32 	%f29, [kYCbCrFullRangeOffset+4];
$Lt_21_33794:
	.loc	20	107	0
	@!%p7 bra 	$Lt_21_34562;
	.loc	20	100	0
	ld.const.f32 	%f30, [kYCbCrOffset+8];
	bra.uni 	$Lt_21_34306;
$Lt_21_34562:
	ld.const.f32 	%f30, [kYCbCrFullRangeOffset+8];
$Lt_21_34306:
	.loc	20	454	0
	mov.f32 	%f31, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f32, %f27, %f31;
	fma.rn.ftz.f32 	%f10, %f32, %f28, %f10;
	fma.rn.ftz.f32 	%f11, %f32, %f29, %f11;
	fma.rn.ftz.f32 	%f12, %f32, %f30, %f12;
$Lt_21_32770:
	.loc	20	457	0
	mov.f32 	%f33, %f9;
	st.param.f32 	[__cudaretf__Z23UnpremultiplyComponents6float414IR_PixelFormat+0], %f33;
	mov.f32 	%f34, %f10;
	st.param.f32 	[__cudaretf__Z23UnpremultiplyComponents6float414IR_PixelFormat+4], %f34;
	mov.f32 	%f35, %f11;
	st.param.f32 	[__cudaretf__Z23UnpremultiplyComponents6float414IR_PixelFormat+8], %f35;
	mov.f32 	%f36, %f12;
	st.param.f32 	[__cudaretf__Z23UnpremultiplyComponents6float414IR_PixelFormat+12], %f36;
	ret;
$LDWend__Z23UnpremultiplyComponents6float414IR_PixelFormat:
	} // _Z23UnpremultiplyComponents6float414IR_PixelFormat

	.visible .func (.param .align 16 .b8 __cudaretf__Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_[16]) _Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_ (.param .align 16 .b8 __cudaparmf1__Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_[16], .param .s32 __cudaparmf2__Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_, .param .s32 __cudaparmf3__Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_)
	{
	.reg .u32 %r<185>;
	.reg .u64 %rd<3>;
	.reg .f32 %f<198>;
	.reg .pred %p<135>;
	.loc	20	468	0
$LDWbegin__Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_:
	ld.param.f32 	%f1, [__cudaparmf1__Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_+12];
	mov.f32 	%f8, %f7;
	ld.param.u32 	%r1, [__cudaparmf2__Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf3__Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_];
	mov.s32 	%r4, %r3;
	.loc	20	469	0
	mov.f32 	%f9, %f2;
	mov.f32 	%f10, %f4;
	mov.f32 	%f11, %f6;
	mov.f32 	%f12, %f8;
	and.b32 	%r5, %r2, 4096;
	mov.u32 	%r6, 0;
	setp.ne.s32 	%p1, %r5, %r6;
	@%p1 bra 	$Lt_22_222722;
	.loc	20	473	0
	mov.f32 	%f9, %f8;
	mov.f32 	%f10, %f6;
	mov.f32 	%f11, %f4;
	mov.f32 	%f12, %f2;
$Lt_22_222722:
	.loc	20	476	0
	and.b32 	%r7, %r2, 448;
	mov.s32 	%r8, %r2;
	and.b32 	%r9, %r4, 448;
	mov.s32 	%r10, %r4;
	mov.s32 	%r11, 256;
	setp.ne.s32 	%p2, %r7, %r11;
	and.b32 	%r12, %r8, 1;
	mov.s32 	%r13, 256;
	setp.ne.s32 	%p3, %r9, %r13;
	and.b32 	%r14, %r10, 1;
	selp.s32 	%r15, 1, 0, %p2;
	selp.s32 	%r16, 1, 0, %p3;
	and.b32 	%r17, %r12, %r15;
	and.b32 	%r18, %r14, %r16;
	mov.u32 	%r19, 0;
	setp.eq.s32 	%p4, %r17, %r19;
	@%p4 bra 	$Lt_22_249858;
	mov.u32 	%r20, 0;
	setp.ne.s32 	%p5, %r18, %r20;
	@%p5 bra 	$Lt_22_249858;
	.loc	20	57	0
	mov.u32 	%r21, 0;
	setp.ne.s32 	%p6, %r7, %r21;
	@%p6 bra 	$Lt_22_140802;
	.loc	20	59	0
	mov.f32 	%f13, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_75;
$Lt_22_140802:
	.loc	20	61	0
	mov.u32 	%r22, 64;
	setp.ne.s32 	%p7, %r7, %r22;
	@%p7 bra 	$Lt_22_141058;
	.loc	20	63	0
	mov.f32 	%f13, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_75;
$Lt_22_141058:
	.loc	20	65	0
	mov.u32 	%r23, 128;
	setp.ne.s32 	%p8, %r7, %r23;
	@%p8 bra 	$Lt_22_141314;
	.loc	20	68	0
	mov.f32 	%f13, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_75;
$Lt_22_141314:
	.loc	20	70	0
	mov.u32 	%r24, 192;
	setp.ne.s32 	%p9, %r7, %r24;
	@%p9 bra 	$Lt_22_141570;
	.loc	20	72	0
	mov.f32 	%f13, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_75;
$Lt_22_141570:
	.loc	20	76	0
	mov.f32 	%f13, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_75:
	.loc	20	118	0
	and.b32 	%r25, %r2, 2048;
	mov.s32 	%r26, 0;
	setp.ne.s32 	%p10, %r25, %r26;
	@!%p10 bra 	$Lt_22_223490;
	.loc	20	100	0
	ld.const.f32 	%f14, [kYCbCrOffset+0];
	bra.uni 	$Lt_22_223234;
$Lt_22_223490:
	ld.const.f32 	%f14, [kYCbCrFullRangeOffset+0];
$Lt_22_223234:
	.loc	20	118	0
	@!%p10 bra 	$Lt_22_224002;
	.loc	20	100	0
	ld.const.f32 	%f15, [kYCbCrOffset+4];
	bra.uni 	$Lt_22_223746;
$Lt_22_224002:
	ld.const.f32 	%f15, [kYCbCrFullRangeOffset+4];
$Lt_22_223746:
	.loc	20	118	0
	@!%p10 bra 	$Lt_22_224514;
	.loc	20	100	0
	ld.const.f32 	%f16, [kYCbCrOffset+8];
	bra.uni 	$Lt_22_224258;
$Lt_22_224514:
	ld.const.f32 	%f16, [kYCbCrFullRangeOffset+8];
$Lt_22_224258:
	.loc	20	478	0
	mov.f32 	%f17, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f18, %f13, %f17;
	mul.ftz.f32 	%f19, %f18, %f14;
	sub.ftz.f32 	%f10, %f10, %f19;
	mul.ftz.f32 	%f20, %f18, %f15;
	sub.ftz.f32 	%f11, %f11, %f20;
	mul.ftz.f32 	%f21, %f18, %f16;
	sub.ftz.f32 	%f12, %f12, %f21;
$Lt_22_249858:
$Lt_22_26114:
	.loc	20	481	0
	and.b32 	%r27, %r2, 2;
	and.b32 	%r28, %r4, 2;
	mov.u32 	%r29, 0;
	setp.eq.s32 	%p11, %r27, %r29;
	@%p11 bra 	$Lt_22_250370;
	mov.u32 	%r30, 0;
	setp.ne.s32 	%p12, %r28, %r30;
	@%p12 bra 	$Lt_22_250370;
	.loc	20	483	0
	mov.f32 	%f22, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p13, %f10, %f22;
	@!%p13 bra 	$Lt_22_224770;
	.loc	20	372	0
	neg.ftz.f32 	%f23, %f10;
	lg2.approx.ftz.f32 	%f24, %f23;
	mov.f32 	%f25, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f26, %f24, %f25;
	ex2.approx.ftz.f32 	%f27, %f26;
	neg.ftz.f32 	%f28, %f27;
	bra.uni 	$LDWendi___log2f_199_71;
$Lt_22_224770:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f29, %f10;
	mov.f32 	%f30, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f31, %f29, %f30;
	ex2.approx.ftz.f32 	%f28, %f31;
$LDWendi___log2f_199_71:
	.loc	20	483	0
	mov.f32 	%f32, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p14, %f11, %f32;
	@!%p14 bra 	$Lt_22_225282;
	.loc	20	372	0
	neg.ftz.f32 	%f33, %f11;
	lg2.approx.ftz.f32 	%f34, %f33;
	mov.f32 	%f35, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f36, %f34, %f35;
	ex2.approx.ftz.f32 	%f37, %f36;
	neg.ftz.f32 	%f38, %f37;
	bra.uni 	$LDWendi___log2f_199_69;
$Lt_22_225282:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f39, %f11;
	mov.f32 	%f40, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f41, %f39, %f40;
	ex2.approx.ftz.f32 	%f38, %f41;
$LDWendi___log2f_199_69:
	.loc	20	483	0
	mov.f32 	%f42, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p15, %f12, %f42;
	@!%p15 bra 	$Lt_22_225794;
	.loc	20	372	0
	neg.ftz.f32 	%f43, %f12;
	lg2.approx.ftz.f32 	%f44, %f43;
	mov.f32 	%f45, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f46, %f44, %f45;
	ex2.approx.ftz.f32 	%f47, %f46;
	neg.ftz.f32 	%f48, %f47;
	bra.uni 	$LDWendi___log2f_199_67;
$Lt_22_225794:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f49, %f12;
	mov.f32 	%f50, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f51, %f49, %f50;
	ex2.approx.ftz.f32 	%f48, %f51;
$LDWendi___log2f_199_67:
	.loc	20	483	0
	mov.f32 	%f10, %f28;
	mov.f32 	%f11, %f38;
	mov.f32 	%f12, %f48;
$Lt_22_250370:
$Lt_22_29954:
	.loc	20	486	0
	and.b32 	%r31, %r2, 1;
	and.b32 	%r32, %r4, 1;
	and.b32 	%r33, %r2, 1536;
	and.b32 	%r34, %r4, 1536;
	set.ne.u32.s32 	%r35, %r31, %r32;
	neg.s32 	%r36, %r35;
	set.ne.u32.s32 	%r37, %r33, %r34;
	neg.s32 	%r38, %r37;
	or.b32 	%r39, %r36, %r38;
	mov.u32 	%r40, 0;
	setp.ne.s32 	%p16, %r39, %r40;
	@%p16 bra 	$Lt_22_80642;
	setp.eq.s32 	%p17, %r17, %r18;
	@%p17 bra 	$Lt_22_80898;
$Lt_22_80642:
	.loc	20	490	0
	mov.u32 	%r41, 0;
	setp.ne.s32 	%p18, %r31, %r41;
	@%p18 bra 	$Lt_22_226562;
	mov.s32 	%r42, 256;
	setp.eq.s32 	%p19, %r9, %r42;
	mov.u32 	%r43, 256;
	setp.ne.s32 	%p20, %r7, %r43;
	@%p20 bra 	$Lt_22_227074;
	.loc	20	137	0
	mov.s32 	%r44, 512;
	setp.eq.s32 	%p21, %r34, %r44;
	@!%p19 bra 	$Lt_22_144898;
	.loc	20	139	0
	@!%p21 bra 	$Lt_22_145154;
	.loc	20	141	0
	mov.u64 	%rd1, kRGB32f_To_709YPbPr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__199_65;
$Lt_22_145154:
	.loc	20	145	0
	mov.u64 	%rd1, kRGB32f_To_601YPbPr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__199_65;
$Lt_22_144898:
	.loc	20	150	0
	@!%p21 bra 	$Lt_22_145410;
	.loc	20	152	0
	mov.u64 	%rd1, kRGB32f_To_709YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__199_65;
$Lt_22_145410:
	.loc	20	154	0
	and.b32 	%r45, %r4, 2048;
	mov.u32 	%r46, 0;
	setp.ne.s32 	%p22, %r45, %r46;
	@%p22 bra 	$Lt_22_145666;
	.loc	20	156	0
	mov.u64 	%rd1, kRGB32f_To_601YCbCrFullRange;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__199_65;
$Lt_22_145666:
	.loc	20	160	0
	mov.u64 	%rd1, kRGB32f_To_601YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__199_65;
$Lt_22_227074:
	@!%p19 bra 	$Lt_22_227586;
	bra.uni 	$Lt_22_226306;
$Lt_22_227586:
	.loc	20	179	0
	mov.u32 	%r47, 512;
	setp.ne.s32 	%p23, %r34, %r47;
	@%p23 bra 	$Lt_22_146434;
	.loc	20	181	0
	mov.u64 	%rd1, kRGB8u_To_709YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__199_65;
$Lt_22_146434:
	.loc	20	183	0
	and.b32 	%r48, %r4, 2048;
	mov.u32 	%r49, 0;
	setp.ne.s32 	%p24, %r48, %r49;
	@%p24 bra 	$Lt_22_146690;
	.loc	20	185	0
	mov.u64 	%rd1, kRGB8u_To_601YCbCrFullRange;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__199_65;
$Lt_22_146690:
	.loc	20	189	0
	mov.u64 	%rd1, kRGB8u_To_601YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__199_65;
$Lt_22_226562:
	mov.s32 	%r50, 0;
	setp.eq.s32 	%p25, %r32, %r50;
	mov.u32 	%r51, 512;
	setp.ne.s32 	%p26, %r33, %r51;
	@%p26 bra 	$Lt_22_228098;
	mov.s32 	%r52, 256;
	setp.eq.s32 	%p27, %r7, %r52;
	@!%p25 bra 	$Lt_22_228610;
	mov.s32 	%r53, 256;
	setp.eq.s32 	%p28, %r9, %r53;
	@!%p27 bra 	$Lt_22_229122;
	@!%p28 bra 	$Lt_22_226306;
	.loc	20	202	0
	mov.u64 	%rd1, k709YPbPr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__199_65;
$Lt_22_229122:
	.loc	20	211	0
	@!%p28 bra 	$Lt_22_147970;
	.loc	20	213	0
	mov.u64 	%rd1, k709YCbCr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__199_65;
$Lt_22_147970:
	.loc	20	217	0
	mov.u64 	%rd1, k709YCbCr_To_RGB8u;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__199_65;
$Lt_22_228610:
	@!%p27 bra 	$Lt_22_230146;
	bra.uni 	$Lt_22_226306;
$Lt_22_230146:
	mov.s32 	%r54, 256;
	set.eq.u32.s32 	%r55, %r9, %r54;
	neg.s32 	%r56, %r55;
	and.b32 	%r57, %r4, 2048;
	mov.s32 	%r58, 0;
	set.eq.u32.s32 	%r59, %r57, %r58;
	neg.s32 	%r60, %r59;
	or.b32 	%r61, %r56, %r60;
	mov.u32 	%r62, 0;
	setp.eq.s32 	%p29, %r61, %r62;
	@%p29 bra 	$Lt_22_230658;
	bra.uni 	$Lt_22_226306;
$Lt_22_230658:
	.loc	20	250	0
	mov.u64 	%rd1, k709YCbCr_To_601YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__199_65;
$Lt_22_228098:
	and.b32 	%r63, %r2, 2048;
	mov.s32 	%r64, 0;
	setp.eq.s32 	%p30, %r63, %r64;
	@!%p30 bra 	$Lt_22_231170;
	@!%p25 bra 	$Lt_22_226306;
	.loc	20	259	0
	mov.u32 	%r65, 256;
	setp.ne.s32 	%p31, %r9, %r65;
	@%p31 bra 	$Lt_22_149506;
	.loc	20	261	0
	mov.u64 	%rd1, k601YCbCrFullRange_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__199_65;
$Lt_22_149506:
	.loc	20	265	0
	mov.u64 	%rd1, k601YCbCrFullRange_To_RGB8u;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__199_65;
$Lt_22_231170:
	mov.s32 	%r66, 256;
	setp.eq.s32 	%p27, %r7, %r66;
	@!%p25 bra 	$Lt_22_232194;
	mov.s32 	%r67, 256;
	setp.eq.s32 	%p32, %r9, %r67;
	@!%p27 bra 	$Lt_22_232706;
	@!%p32 bra 	$Lt_22_226306;
	.loc	20	302	0
	mov.u64 	%rd1, k601YPbPr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__199_65;
$Lt_22_232706:
	.loc	20	311	0
	@!%p32 bra 	$Lt_22_151298;
	.loc	20	313	0
	mov.u64 	%rd1, k601YCbCr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__199_65;
$Lt_22_151298:
	.loc	20	317	0
	mov.u64 	%rd1, k601YCbCr_To_RGB8u;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__199_65;
$Lt_22_232194:
	@!%p27 bra 	$Lt_22_233730;
	bra.uni 	$Lt_22_226306;
$Lt_22_233730:
	selp.s32 	%r68, 1, 0, %p30;
	mov.s32 	%r69, 256;
	set.eq.u32.s32 	%r70, %r9, %r69;
	neg.s32 	%r71, %r70;
	or.b32 	%r72, %r68, %r71;
	mov.u32 	%r73, 0;
	setp.eq.s32 	%p33, %r72, %r73;
	@%p33 bra 	$Lt_22_234242;
	bra.uni 	$Lt_22_226306;
$Lt_22_234242:
	.loc	20	350	0
	mov.u64 	%rd1, k601YCbCr_To_709YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__199_65;
$Lt_22_226306:
	.loc	20	355	0
	mov.u64 	%rd1, 0;
$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__199_65:
	.loc	20	490	0
	ld.global.f32 	%f52, [%rd1+16];
	mul.ftz.f32 	%f53, %f52, %f11;
	ld.global.f32 	%f54, [%rd1+12];
	fma.rn.ftz.f32 	%f55, %f54, %f10, %f53;
	ld.global.f32 	%f56, [%rd1+20];
	fma.rn.ftz.f32 	%f57, %f56, %f12, %f55;
	ld.global.f32 	%f58, [%rd1+28];
	mul.ftz.f32 	%f59, %f58, %f11;
	ld.global.f32 	%f60, [%rd1+24];
	fma.rn.ftz.f32 	%f61, %f60, %f10, %f59;
	ld.global.f32 	%f62, [%rd1+32];
	fma.rn.ftz.f32 	%f63, %f62, %f12, %f61;
	ld.global.f32 	%f64, [%rd1+4];
	mul.ftz.f32 	%f65, %f64, %f11;
	ld.global.f32 	%f66, [%rd1+0];
	fma.rn.ftz.f32 	%f67, %f66, %f10, %f65;
	ld.global.f32 	%f68, [%rd1+8];
	fma.rn.ftz.f32 	%f10, %f68, %f12, %f67;
	mov.f32 	%f11, %f57;
	mov.f32 	%f12, %f63;
	setp.eq.s32 	%p34, %r7, %r9;
	@%p34 bra 	$Lt_22_235010;
	.loc	20	494	0
	mov.s32 	%r74, 256;
	setp.eq.s32 	%p27, %r7, %r74;
	@!%p27 bra 	$L_22_220162;
	mov.s32 	%r75, 0;
	setp.eq.s32 	%p35, %r9, %r75;
	@%p35 bra 	$Lt_22_251394;
$L_22_220162:
	mov.s32 	%r76, 0;
	setp.eq.s32 	%p36, %r7, %r76;
	@!%p36 bra 	$Lt_22_251650;
	mov.u32 	%r77, 256;
	setp.ne.s32 	%p37, %r9, %r77;
	@%p37 bra 	$Lt_22_251650;
	mov.s32 	%r78, 0;
	setp.eq.s32 	%p35, %r9, %r78;
	bra.uni 	$L_22_219906;
$Lt_22_251394:
	mov.s32 	%r79, 0;
	setp.eq.s32 	%p36, %r7, %r79;
$L_22_219906:
	.loc	20	57	0
	@!%p35 bra 	$Lt_22_152834;
	.loc	20	59	0
	mov.f32 	%f69, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_63;
$Lt_22_152834:
	.loc	20	61	0
	mov.u32 	%r80, 64;
	setp.ne.s32 	%p38, %r9, %r80;
	@%p38 bra 	$Lt_22_153090;
	.loc	20	63	0
	mov.f32 	%f69, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_63;
$Lt_22_153090:
	.loc	20	65	0
	mov.u32 	%r81, 128;
	setp.ne.s32 	%p39, %r9, %r81;
	@%p39 bra 	$Lt_22_153346;
	.loc	20	68	0
	mov.f32 	%f69, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_63;
$Lt_22_153346:
	.loc	20	70	0
	mov.u32 	%r82, 192;
	setp.ne.s32 	%p40, %r9, %r82;
	@%p40 bra 	$Lt_22_153602;
	.loc	20	72	0
	mov.f32 	%f69, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_63;
$Lt_22_153602:
	.loc	20	76	0
	mov.f32 	%f69, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_63:
	.loc	20	57	0
	@!%p36 bra 	$Lt_22_153858;
	.loc	20	59	0
	mov.f32 	%f70, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_61;
$Lt_22_153858:
	.loc	20	61	0
	mov.u32 	%r83, 64;
	setp.ne.s32 	%p41, %r7, %r83;
	@%p41 bra 	$Lt_22_154114;
	.loc	20	63	0
	mov.f32 	%f70, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_61;
$Lt_22_154114:
	.loc	20	65	0
	mov.u32 	%r84, 128;
	setp.ne.s32 	%p42, %r7, %r84;
	@%p42 bra 	$Lt_22_154370;
	.loc	20	68	0
	mov.f32 	%f70, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_61;
$Lt_22_154370:
	.loc	20	70	0
	mov.u32 	%r85, 192;
	setp.ne.s32 	%p43, %r7, %r85;
	@%p43 bra 	$Lt_22_154626;
	.loc	20	72	0
	mov.f32 	%f70, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_61;
$Lt_22_154626:
	.loc	20	76	0
	mov.f32 	%f70, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_61:
	.loc	20	498	0
	div.approx.ftz.f32 	%f71, %f69, %f70;
	mul.ftz.f32 	%f9, %f9, %f71;
	bra.uni 	$Lt_22_235010;
$Lt_22_251650:
$L_22_219650:
	.loc	20	500	0
	@!%p27 bra 	$L_22_221186;
	@%p3 bra 	$L_22_220930;
$L_22_221186:
	@!%p2 bra 	$Lt_22_252674;
	mov.u32 	%r86, 256;
	setp.ne.s32 	%p44, %r9, %r86;
	@%p44 bra 	$Lt_22_252674;
$L_22_220930:
	.loc	20	57	0
	mov.u32 	%r87, 0;
	setp.ne.s32 	%p45, %r9, %r87;
	@%p45 bra 	$Lt_22_155138;
	.loc	20	59	0
	mov.f32 	%f69, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_59;
$Lt_22_155138:
	.loc	20	61	0
	mov.u32 	%r88, 64;
	setp.ne.s32 	%p46, %r9, %r88;
	@%p46 bra 	$Lt_22_155394;
	.loc	20	63	0
	mov.f32 	%f69, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_59;
$Lt_22_155394:
	.loc	20	65	0
	mov.u32 	%r89, 128;
	setp.ne.s32 	%p47, %r9, %r89;
	@%p47 bra 	$Lt_22_155650;
	.loc	20	68	0
	mov.f32 	%f69, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_59;
$Lt_22_155650:
	.loc	20	70	0
	mov.u32 	%r90, 192;
	setp.ne.s32 	%p48, %r9, %r90;
	@%p48 bra 	$Lt_22_155906;
	.loc	20	72	0
	mov.f32 	%f69, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_59;
$Lt_22_155906:
	.loc	20	76	0
	mov.f32 	%f69, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_59:
	.loc	20	57	0
	@!%p36 bra 	$Lt_22_156162;
	.loc	20	59	0
	mov.f32 	%f70, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_57;
$Lt_22_156162:
	.loc	20	61	0
	mov.u32 	%r91, 64;
	setp.ne.s32 	%p49, %r7, %r91;
	@%p49 bra 	$Lt_22_156418;
	.loc	20	63	0
	mov.f32 	%f70, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_57;
$Lt_22_156418:
	.loc	20	65	0
	mov.u32 	%r92, 128;
	setp.ne.s32 	%p50, %r7, %r92;
	@%p50 bra 	$Lt_22_156674;
	.loc	20	68	0
	mov.f32 	%f70, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_57;
$Lt_22_156674:
	.loc	20	70	0
	mov.u32 	%r93, 192;
	setp.ne.s32 	%p51, %r7, %r93;
	@%p51 bra 	$Lt_22_156930;
	.loc	20	72	0
	mov.f32 	%f70, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_57;
$Lt_22_156930:
	.loc	20	76	0
	mov.f32 	%f70, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_57:
	.loc	20	504	0
	div.approx.ftz.f32 	%f72, %f69, %f70;
	mul.ftz.f32 	%f9, %f9, %f72;
	.loc	20	57	0
	@!%p36 bra 	$Lt_22_158210;
	.loc	20	59	0
	mov.f32 	%f70, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_53;
$Lt_22_158210:
	.loc	20	61	0
	mov.u32 	%r94, 64;
	setp.ne.s32 	%p52, %r7, %r94;
	@%p52 bra 	$Lt_22_158466;
	.loc	20	63	0
	mov.f32 	%f70, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_53;
$Lt_22_158466:
	.loc	20	65	0
	mov.u32 	%r95, 128;
	setp.ne.s32 	%p53, %r7, %r95;
	@%p53 bra 	$Lt_22_158722;
	.loc	20	68	0
	mov.f32 	%f70, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_53;
$Lt_22_158722:
	.loc	20	70	0
	mov.u32 	%r96, 192;
	setp.ne.s32 	%p54, %r7, %r96;
	@%p54 bra 	$Lt_22_158978;
	.loc	20	72	0
	mov.f32 	%f70, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_53;
$Lt_22_158978:
	.loc	20	76	0
	mov.f32 	%f70, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_53:
	.loc	20	505	0
	mov.f32 	%f73, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f74, %f73, %f70;
	mul.ftz.f32 	%f10, %f10, %f74;
	.loc	20	57	0
	@!%p36 bra 	$Lt_22_160258;
	.loc	20	59	0
	mov.f32 	%f70, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_49;
$Lt_22_160258:
	.loc	20	61	0
	mov.u32 	%r97, 64;
	setp.ne.s32 	%p55, %r7, %r97;
	@%p55 bra 	$Lt_22_160514;
	.loc	20	63	0
	mov.f32 	%f70, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_49;
$Lt_22_160514:
	.loc	20	65	0
	mov.u32 	%r98, 128;
	setp.ne.s32 	%p56, %r7, %r98;
	@%p56 bra 	$Lt_22_160770;
	.loc	20	68	0
	mov.f32 	%f70, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_49;
$Lt_22_160770:
	.loc	20	70	0
	mov.u32 	%r99, 192;
	setp.ne.s32 	%p57, %r7, %r99;
	@%p57 bra 	$Lt_22_161026;
	.loc	20	72	0
	mov.f32 	%f70, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_49;
$Lt_22_161026:
	.loc	20	76	0
	mov.f32 	%f70, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_49:
	.loc	20	506	0
	mov.f32 	%f75, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f76, %f75, %f70;
	mul.ftz.f32 	%f11, %f57, %f76;
	.loc	20	57	0
	@!%p36 bra 	$Lt_22_162306;
	.loc	20	59	0
	mov.f32 	%f70, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_45;
$Lt_22_162306:
	.loc	20	61	0
	mov.u32 	%r100, 64;
	setp.ne.s32 	%p58, %r7, %r100;
	@%p58 bra 	$Lt_22_162562;
	.loc	20	63	0
	mov.f32 	%f70, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_45;
$Lt_22_162562:
	.loc	20	65	0
	mov.u32 	%r101, 128;
	setp.ne.s32 	%p59, %r7, %r101;
	@%p59 bra 	$Lt_22_162818;
	.loc	20	68	0
	mov.f32 	%f70, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_45;
$Lt_22_162818:
	.loc	20	70	0
	mov.u32 	%r102, 192;
	setp.ne.s32 	%p60, %r7, %r102;
	@%p60 bra 	$Lt_22_163074;
	.loc	20	72	0
	mov.f32 	%f70, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_45;
$Lt_22_163074:
	.loc	20	76	0
	mov.f32 	%f70, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_45:
	.loc	20	507	0
	mov.f32 	%f77, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f78, %f77, %f70;
	mul.ftz.f32 	%f12, %f63, %f78;
	bra.uni 	$Lt_22_235010;
$Lt_22_252674:
$L_22_220674:
	.loc	20	57	0
	@!%p36 bra 	$Lt_22_163330;
	.loc	20	59	0
	mov.f32 	%f69, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_43;
$Lt_22_163330:
	.loc	20	61	0
	mov.u32 	%r103, 64;
	setp.ne.s32 	%p61, %r7, %r103;
	@%p61 bra 	$Lt_22_163586;
	.loc	20	63	0
	mov.f32 	%f69, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_43;
$Lt_22_163586:
	.loc	20	65	0
	mov.u32 	%r104, 128;
	setp.ne.s32 	%p62, %r7, %r104;
	@%p62 bra 	$Lt_22_163842;
	.loc	20	68	0
	mov.f32 	%f69, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_43;
$Lt_22_163842:
	.loc	20	70	0
	mov.u32 	%r105, 192;
	setp.ne.s32 	%p63, %r7, %r105;
	@%p63 bra 	$Lt_22_164098;
	.loc	20	72	0
	mov.f32 	%f69, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_43;
$Lt_22_164098:
	.loc	20	76	0
	mov.f32 	%f69, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_43:
	.loc	20	511	0
	mov.f32 	%f79, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f80, %f69, %f79;
	mul.ftz.f32 	%f9, %f80, %f9;
	mul.ftz.f32 	%f10, %f80, %f10;
	mul.ftz.f32 	%f11, %f80, %f57;
	mul.ftz.f32 	%f12, %f80, %f63;
	bra.uni 	$Lt_22_235010;
$Lt_22_80898:
	.loc	20	486	0
	setp.eq.s32 	%p64, %r7, %r9;
	@%p64 bra 	$Lt_22_235010;
	.loc	20	57	0
	mov.u32 	%r106, 0;
	setp.ne.s32 	%p65, %r9, %r106;
	@%p65 bra 	$Lt_22_165634;
	.loc	20	59	0
	mov.f32 	%f69, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_39;
$Lt_22_165634:
	.loc	20	61	0
	mov.u32 	%r107, 64;
	setp.ne.s32 	%p66, %r9, %r107;
	@%p66 bra 	$Lt_22_165890;
	.loc	20	63	0
	mov.f32 	%f69, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_39;
$Lt_22_165890:
	.loc	20	65	0
	mov.u32 	%r108, 128;
	setp.ne.s32 	%p67, %r9, %r108;
	@%p67 bra 	$Lt_22_166146;
	.loc	20	68	0
	mov.f32 	%f69, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_39;
$Lt_22_166146:
	.loc	20	70	0
	mov.u32 	%r109, 192;
	setp.ne.s32 	%p68, %r9, %r109;
	@%p68 bra 	$Lt_22_166402;
	.loc	20	72	0
	mov.f32 	%f69, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_39;
$Lt_22_166402:
	.loc	20	76	0
	mov.f32 	%f69, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_39:
	.loc	20	57	0
	mov.u32 	%r110, 0;
	setp.ne.s32 	%p69, %r7, %r110;
	@%p69 bra 	$Lt_22_166658;
	.loc	20	59	0
	mov.f32 	%f70, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_37;
$Lt_22_166658:
	.loc	20	61	0
	mov.u32 	%r111, 64;
	setp.ne.s32 	%p70, %r7, %r111;
	@%p70 bra 	$Lt_22_166914;
	.loc	20	63	0
	mov.f32 	%f70, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_37;
$Lt_22_166914:
	.loc	20	65	0
	mov.u32 	%r112, 128;
	setp.ne.s32 	%p71, %r7, %r112;
	@%p71 bra 	$Lt_22_167170;
	.loc	20	68	0
	mov.f32 	%f70, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_37;
$Lt_22_167170:
	.loc	20	70	0
	mov.u32 	%r113, 192;
	setp.ne.s32 	%p72, %r7, %r113;
	@%p72 bra 	$Lt_22_167426;
	.loc	20	72	0
	mov.f32 	%f70, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_37;
$Lt_22_167426:
	.loc	20	76	0
	mov.f32 	%f70, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_37:
	.loc	20	517	0
	div.approx.ftz.f32 	%f81, %f69, %f70;
	mul.ftz.f32 	%f9, %f81, %f9;
	mul.ftz.f32 	%f10, %f81, %f10;
	mul.ftz.f32 	%f11, %f81, %f11;
	mul.ftz.f32 	%f12, %f81, %f12;
$Lt_22_235010:
$Lt_22_83202:
	.loc	20	520	0
	mov.u32 	%r114, 0;
	setp.eq.s32 	%p73, %r28, %r114;
	@%p73 bra 	$Lt_22_253186;
	mov.u32 	%r115, 0;
	setp.ne.s32 	%p74, %r27, %r115;
	@%p74 bra 	$Lt_22_253186;
	.loc	20	522	0
	mov.f32 	%f82, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p75, %f10, %f82;
	@!%p75 bra 	$Lt_22_235522;
	.loc	20	372	0
	neg.ftz.f32 	%f83, %f10;
	lg2.approx.ftz.f32 	%f84, %f83;
	mov.f32 	%f85, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f86, %f84, %f85;
	ex2.approx.ftz.f32 	%f87, %f86;
	neg.ftz.f32 	%f88, %f87;
	bra.uni 	$LDWendi___log2f_199_35;
$Lt_22_235522:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f89, %f10;
	mov.f32 	%f90, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f91, %f89, %f90;
	ex2.approx.ftz.f32 	%f88, %f91;
$LDWendi___log2f_199_35:
	.loc	20	522	0
	mov.f32 	%f92, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p76, %f11, %f92;
	@!%p76 bra 	$Lt_22_236034;
	.loc	20	372	0
	neg.ftz.f32 	%f93, %f11;
	lg2.approx.ftz.f32 	%f94, %f93;
	mov.f32 	%f95, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f96, %f94, %f95;
	ex2.approx.ftz.f32 	%f97, %f96;
	neg.ftz.f32 	%f98, %f97;
	bra.uni 	$LDWendi___log2f_199_33;
$Lt_22_236034:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f99, %f11;
	mov.f32 	%f100, 0f400e38e4;   	// 2.22222
	mul.ftz.f32 	%f101, %f99, %f100;
	ex2.approx.ftz.f32 	%f98, %f101;
$LDWendi___log2f_199_33:
	.loc	20	522	0
	mov.f32 	%f102, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p77, %f12, %f102;
	@!%p77 bra 	$Lt_22_236546;
	.loc	20	372	0
	neg.ftz.f32 	%f103, %f12;
	lg2.approx.ftz.f32 	%f104, %f103;
	mov.f32 	%f105, 0f400e38e4;   	// 2.22222
	mul.ftz.f32 	%f106, %f104, %f105;
	ex2.approx.ftz.f32 	%f107, %f106;
	neg.ftz.f32 	%f108, %f107;
	bra.uni 	$LDWendi___log2f_199_31;
$Lt_22_236546:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f109, %f12;
	mov.f32 	%f110, 0f400e38e4;   	// 2.22222
	mul.ftz.f32 	%f111, %f109, %f110;
	ex2.approx.ftz.f32 	%f108, %f111;
$LDWendi___log2f_199_31:
	.loc	20	522	0
	mov.f32 	%f10, %f88;
	mov.f32 	%f11, %f98;
	mov.f32 	%f12, %f108;
$Lt_22_253186:
$Lt_22_85250:
	.loc	20	525	0
	mov.u32 	%r116, 0;
	setp.eq.s32 	%p78, %r18, %r116;
	@%p78 bra 	$Lt_22_253698;
	mov.u32 	%r117, 0;
	setp.ne.s32 	%p79, %r17, %r117;
	@%p79 bra 	$Lt_22_253698;
	.loc	20	57	0
	mov.u32 	%r118, 0;
	setp.ne.s32 	%p80, %r9, %r118;
	@%p80 bra 	$Lt_22_168450;
	.loc	20	59	0
	mov.f32 	%f112, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_29;
$Lt_22_168450:
	.loc	20	61	0
	mov.u32 	%r119, 64;
	setp.ne.s32 	%p81, %r9, %r119;
	@%p81 bra 	$Lt_22_168706;
	.loc	20	63	0
	mov.f32 	%f112, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_29;
$Lt_22_168706:
	.loc	20	65	0
	mov.u32 	%r120, 128;
	setp.ne.s32 	%p82, %r9, %r120;
	@%p82 bra 	$Lt_22_168962;
	.loc	20	68	0
	mov.f32 	%f112, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_29;
$Lt_22_168962:
	.loc	20	70	0
	mov.u32 	%r121, 192;
	setp.ne.s32 	%p83, %r9, %r121;
	@%p83 bra 	$Lt_22_169218;
	.loc	20	72	0
	mov.f32 	%f112, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_29;
$Lt_22_169218:
	.loc	20	76	0
	mov.f32 	%f112, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_29:
	.loc	20	107	0
	and.b32 	%r122, %r4, 2048;
	mov.s32 	%r123, 0;
	setp.ne.s32 	%p84, %r122, %r123;
	@!%p84 bra 	$Lt_22_237314;
	.loc	20	100	0
	ld.const.f32 	%f113, [kYCbCrOffset+0];
	bra.uni 	$Lt_22_237058;
$Lt_22_237314:
	ld.const.f32 	%f113, [kYCbCrFullRangeOffset+0];
$Lt_22_237058:
	.loc	20	107	0
	@!%p84 bra 	$Lt_22_237826;
	.loc	20	100	0
	ld.const.f32 	%f114, [kYCbCrOffset+4];
	bra.uni 	$Lt_22_237570;
$Lt_22_237826:
	ld.const.f32 	%f114, [kYCbCrFullRangeOffset+4];
$Lt_22_237570:
	.loc	20	107	0
	@!%p84 bra 	$Lt_22_238338;
	.loc	20	100	0
	ld.const.f32 	%f115, [kYCbCrOffset+8];
	bra.uni 	$Lt_22_238082;
$Lt_22_238338:
	ld.const.f32 	%f115, [kYCbCrFullRangeOffset+8];
$Lt_22_238082:
	.loc	20	527	0
	mov.f32 	%f116, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f117, %f112, %f116;
	fma.rn.ftz.f32 	%f10, %f117, %f113, %f10;
	fma.rn.ftz.f32 	%f11, %f117, %f114, %f11;
	fma.rn.ftz.f32 	%f12, %f117, %f115, %f12;
$Lt_22_253698:
$Lt_22_91650:
	.loc	20	525	0
	and.b32 	%r124, %r2, 12;
	and.b32 	%r125, %r4, 12;
	setp.eq.s32 	%p85, %r124, %r125;
	@%p85 bra 	$Lt_22_239106;
	.loc	20	532	0
	mov.u32 	%r126, 8;
	setp.ne.s32 	%p86, %r124, %r126;
	@%p86 bra 	$L_22_222466;
	mov.u32 	%r127, 12;
	setp.eq.s32 	%p87, %r125, %r127;
	@%p87 bra 	$Lt_22_254466;
$L_22_222466:
	mov.u32 	%r128, 12;
	setp.eq.s32 	%p88, %r124, %r128;
	@%p88 bra 	$Lt_22_254466;
	mov.u32 	%r129, 0;
	setp.ne.s32 	%p89, %r124, %r129;
	@%p89 bra 	$L_22_221698;
$Lt_22_254466:
$L_22_221954:
	.loc	20	57	0
	mov.u32 	%r130, 0;
	setp.ne.s32 	%p90, %r9, %r130;
	@%p90 bra 	$Lt_22_171778;
	.loc	20	59	0
	mov.f32 	%f118, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_25;
$Lt_22_171778:
	.loc	20	61	0
	mov.u32 	%r131, 64;
	setp.ne.s32 	%p91, %r9, %r131;
	@%p91 bra 	$Lt_22_172034;
	.loc	20	63	0
	mov.f32 	%f118, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_25;
$Lt_22_172034:
	.loc	20	65	0
	mov.u32 	%r132, 128;
	setp.ne.s32 	%p92, %r9, %r132;
	@%p92 bra 	$Lt_22_172290;
	.loc	20	68	0
	mov.f32 	%f118, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_25;
$Lt_22_172290:
	.loc	20	70	0
	mov.u32 	%r133, 192;
	setp.ne.s32 	%p93, %r9, %r133;
	@%p93 bra 	$Lt_22_172546;
	.loc	20	72	0
	mov.f32 	%f118, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_25;
$Lt_22_172546:
	.loc	20	76	0
	mov.f32 	%f118, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_25:
	.loc	20	536	0
	mov.f32 	%f9, %f118;
	bra.uni 	$Lt_22_239106;
$L_22_221698:
	.loc	20	540	0
	mov.s32 	%r134, 12;
	setp.eq.s32 	%p94, %r125, %r134;
	mov.s32 	%r135, 4;
	set.eq.u32.s32 	%r136, %r124, %r135;
	neg.s32 	%r137, %r136;
	selp.s32 	%r138, 1, 0, %p94;
	mov.s32 	%r139, 8;
	set.eq.u32.s32 	%r140, %r125, %r139;
	neg.s32 	%r141, %r140;
	or.b32 	%r142, %r138, %r141;
	and.b32 	%r143, %r137, %r142;
	mov.u32 	%r144, 0;
	setp.eq.s32 	%p95, %r143, %r144;
	@%p95 bra 	$Lt_22_239362;
	.loc	20	410	0
	mov.f32 	%f119, %f10;
	mov.f32 	%f120, %f119;
	mov.f32 	%f121, %f11;
	mov.f32 	%f122, %f121;
	mov.f32 	%f123, %f12;
	mov.f32 	%f124, %f123;
	.loc	20	57	0
	mov.s32 	%r145, 0;
	setp.eq.s32 	%p35, %r9, %r145;
	@!%p35 bra 	$Lt_22_173058;
	.loc	20	59	0
	mov.f32 	%f125, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_23;
$Lt_22_173058:
	.loc	20	61	0
	mov.u32 	%r146, 64;
	setp.ne.s32 	%p96, %r9, %r146;
	@%p96 bra 	$Lt_22_173314;
	.loc	20	63	0
	mov.f32 	%f125, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_23;
$Lt_22_173314:
	.loc	20	65	0
	mov.u32 	%r147, 128;
	setp.ne.s32 	%p97, %r9, %r147;
	@%p97 bra 	$Lt_22_173570;
	.loc	20	68	0
	mov.f32 	%f125, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_23;
$Lt_22_173570:
	.loc	20	70	0
	mov.u32 	%r148, 192;
	setp.ne.s32 	%p98, %r9, %r148;
	@%p98 bra 	$Lt_22_173826;
	.loc	20	72	0
	mov.f32 	%f125, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_23;
$Lt_22_173826:
	.loc	20	76	0
	mov.f32 	%f125, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_23:
	.loc	20	413	0
	mov.u32 	%r149, 0;
	setp.eq.s32 	%p99, %r18, %r149;
	@%p99 bra 	$Lt_22_239618;
	.loc	20	57	0
	@!%p35 bra 	$Lt_22_174338;
	.loc	20	59	0
	mov.f32 	%f126, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_21;
$Lt_22_174338:
	.loc	20	61	0
	mov.u32 	%r150, 64;
	setp.ne.s32 	%p100, %r9, %r150;
	@%p100 bra 	$Lt_22_174594;
	.loc	20	63	0
	mov.f32 	%f126, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_21;
$Lt_22_174594:
	.loc	20	65	0
	mov.u32 	%r151, 128;
	setp.ne.s32 	%p101, %r9, %r151;
	@%p101 bra 	$Lt_22_174850;
	.loc	20	68	0
	mov.f32 	%f126, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_21;
$Lt_22_174850:
	.loc	20	70	0
	mov.u32 	%r152, 192;
	setp.ne.s32 	%p102, %r9, %r152;
	@%p102 bra 	$Lt_22_175106;
	.loc	20	72	0
	mov.f32 	%f126, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_21;
$Lt_22_175106:
	.loc	20	76	0
	mov.f32 	%f126, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_21:
	.loc	20	118	0
	and.b32 	%r122, %r4, 2048;
	mov.s32 	%r153, 0;
	setp.ne.s32 	%p84, %r122, %r153;
	@!%p84 bra 	$Lt_22_240386;
	.loc	20	100	0
	ld.const.f32 	%f127, [kYCbCrOffset+0];
	bra.uni 	$Lt_22_240130;
$Lt_22_240386:
	ld.const.f32 	%f127, [kYCbCrFullRangeOffset+0];
$Lt_22_240130:
	.loc	20	118	0
	@!%p84 bra 	$Lt_22_240898;
	.loc	20	100	0
	ld.const.f32 	%f128, [kYCbCrOffset+4];
	bra.uni 	$Lt_22_240642;
$Lt_22_240898:
	ld.const.f32 	%f128, [kYCbCrFullRangeOffset+4];
$Lt_22_240642:
	.loc	20	118	0
	@!%p84 bra 	$Lt_22_241410;
	.loc	20	100	0
	ld.const.f32 	%f129, [kYCbCrOffset+8];
	bra.uni 	$Lt_22_241154;
$Lt_22_241410:
	ld.const.f32 	%f129, [kYCbCrFullRangeOffset+8];
$Lt_22_241154:
	.loc	20	415	0
	mov.f32 	%f130, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f131, %f126, %f130;
	mul.ftz.f32 	%f132, %f131, %f127;
	sub.ftz.f32 	%f120, %f119, %f132;
	mul.ftz.f32 	%f133, %f131, %f128;
	sub.ftz.f32 	%f122, %f121, %f133;
	mul.ftz.f32 	%f134, %f131, %f129;
	sub.ftz.f32 	%f124, %f123, %f134;
$Lt_22_239618:
	.loc	20	418	0
	rcp.approx.ftz.f32 	%f135, %f125;
	mul.ftz.f32 	%f136, %f135, %f9;
	mul.ftz.f32 	%f120, %f136, %f120;
	.loc	20	419	0
	mul.ftz.f32 	%f122, %f136, %f122;
	.loc	20	420	0
	mul.ftz.f32 	%f124, %f136, %f124;
	.loc	20	422	0
	mov.u32 	%r154, 0;
	setp.eq.s32 	%p103, %r18, %r154;
	@%p103 bra 	$Lt_22_241666;
	.loc	20	57	0
	@!%p35 bra 	$Lt_22_177410;
	.loc	20	59	0
	mov.f32 	%f137, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_17;
$Lt_22_177410:
	.loc	20	61	0
	mov.u32 	%r155, 64;
	setp.ne.s32 	%p104, %r9, %r155;
	@%p104 bra 	$Lt_22_177666;
	.loc	20	63	0
	mov.f32 	%f137, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_17;
$Lt_22_177666:
	.loc	20	65	0
	mov.u32 	%r156, 128;
	setp.ne.s32 	%p105, %r9, %r156;
	@%p105 bra 	$Lt_22_177922;
	.loc	20	68	0
	mov.f32 	%f137, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_17;
$Lt_22_177922:
	.loc	20	70	0
	mov.u32 	%r157, 192;
	setp.ne.s32 	%p106, %r9, %r157;
	@%p106 bra 	$Lt_22_178178;
	.loc	20	72	0
	mov.f32 	%f137, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_17;
$Lt_22_178178:
	.loc	20	76	0
	mov.f32 	%f137, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_17:
	.loc	20	107	0
	and.b32 	%r122, %r4, 2048;
	mov.s32 	%r158, 0;
	setp.ne.s32 	%p84, %r122, %r158;
	@!%p84 bra 	$Lt_22_242434;
	.loc	20	100	0
	ld.const.f32 	%f138, [kYCbCrOffset+0];
	bra.uni 	$Lt_22_242178;
$Lt_22_242434:
	ld.const.f32 	%f138, [kYCbCrFullRangeOffset+0];
$Lt_22_242178:
	.loc	20	107	0
	@!%p84 bra 	$Lt_22_242946;
	.loc	20	100	0
	ld.const.f32 	%f139, [kYCbCrOffset+4];
	bra.uni 	$Lt_22_242690;
$Lt_22_242946:
	ld.const.f32 	%f139, [kYCbCrFullRangeOffset+4];
$Lt_22_242690:
	.loc	20	107	0
	@!%p84 bra 	$Lt_22_243458;
	.loc	20	100	0
	ld.const.f32 	%f140, [kYCbCrOffset+8];
	bra.uni 	$Lt_22_243202;
$Lt_22_243458:
	ld.const.f32 	%f140, [kYCbCrFullRangeOffset+8];
$Lt_22_243202:
	.loc	20	424	0
	mov.f32 	%f141, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f142, %f137, %f141;
	fma.rn.ftz.f32 	%f120, %f142, %f138, %f120;
	fma.rn.ftz.f32 	%f122, %f142, %f139, %f122;
	fma.rn.ftz.f32 	%f124, %f142, %f140, %f124;
$Lt_22_241666:
	.loc	20	543	0
	mov.f32 	%f10, %f120;
	mov.f32 	%f11, %f122;
	mov.f32 	%f12, %f124;
	@!%p94 bra 	$Lt_22_239106;
	.loc	20	57	0
	@!%p35 bra 	$Lt_22_180482;
	.loc	20	59	0
	mov.f32 	%f118, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_13;
$Lt_22_180482:
	.loc	20	61	0
	mov.u32 	%r159, 64;
	setp.ne.s32 	%p107, %r9, %r159;
	@%p107 bra 	$Lt_22_180738;
	.loc	20	63	0
	mov.f32 	%f118, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_13;
$Lt_22_180738:
	.loc	20	65	0
	mov.u32 	%r160, 128;
	setp.ne.s32 	%p108, %r9, %r160;
	@%p108 bra 	$Lt_22_180994;
	.loc	20	68	0
	mov.f32 	%f118, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_13;
$Lt_22_180994:
	.loc	20	70	0
	mov.u32 	%r161, 192;
	setp.ne.s32 	%p109, %r9, %r161;
	@%p109 bra 	$Lt_22_181250;
	.loc	20	72	0
	mov.f32 	%f118, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_13;
$Lt_22_181250:
	.loc	20	76	0
	mov.f32 	%f118, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_13:
	.loc	20	546	0
	mov.f32 	%f9, %f118;
	bra.uni 	$Lt_22_239106;
$Lt_22_239362:
	.loc	20	433	0
	mov.f32 	%f143, %f9;
	mov.f32 	%f144, %f10;
	mov.f32 	%f145, %f144;
	mov.f32 	%f146, %f11;
	mov.f32 	%f147, %f146;
	mov.f32 	%f148, %f12;
	mov.f32 	%f149, %f148;
	.loc	20	435	0
	mov.u32 	%r162, 0;
	setp.eq.s32 	%p110, %r18, %r162;
	@%p110 bra 	$Lt_22_244226;
	.loc	20	57	0
	mov.u32 	%r163, 0;
	setp.ne.s32 	%p111, %r9, %r163;
	@%p111 bra 	$Lt_22_181762;
	.loc	20	59	0
	mov.f32 	%f150, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_11;
$Lt_22_181762:
	.loc	20	61	0
	mov.u32 	%r164, 64;
	setp.ne.s32 	%p112, %r9, %r164;
	@%p112 bra 	$Lt_22_182018;
	.loc	20	63	0
	mov.f32 	%f150, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_11;
$Lt_22_182018:
	.loc	20	65	0
	mov.u32 	%r165, 128;
	setp.ne.s32 	%p113, %r9, %r165;
	@%p113 bra 	$Lt_22_182274;
	.loc	20	68	0
	mov.f32 	%f150, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_11;
$Lt_22_182274:
	.loc	20	70	0
	mov.u32 	%r166, 192;
	setp.ne.s32 	%p114, %r9, %r166;
	@%p114 bra 	$Lt_22_182530;
	.loc	20	72	0
	mov.f32 	%f150, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_11;
$Lt_22_182530:
	.loc	20	76	0
	mov.f32 	%f150, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_11:
	.loc	20	118	0
	and.b32 	%r122, %r4, 2048;
	mov.s32 	%r167, 0;
	setp.ne.s32 	%p84, %r122, %r167;
	@!%p84 bra 	$Lt_22_244994;
	.loc	20	100	0
	ld.const.f32 	%f151, [kYCbCrOffset+0];
	bra.uni 	$Lt_22_244738;
$Lt_22_244994:
	ld.const.f32 	%f151, [kYCbCrFullRangeOffset+0];
$Lt_22_244738:
	.loc	20	118	0
	@!%p84 bra 	$Lt_22_245506;
	.loc	20	100	0
	ld.const.f32 	%f152, [kYCbCrOffset+4];
	bra.uni 	$Lt_22_245250;
$Lt_22_245506:
	ld.const.f32 	%f152, [kYCbCrFullRangeOffset+4];
$Lt_22_245250:
	.loc	20	118	0
	@!%p84 bra 	$Lt_22_246018;
	.loc	20	100	0
	ld.const.f32 	%f153, [kYCbCrOffset+8];
	bra.uni 	$Lt_22_245762;
$Lt_22_246018:
	ld.const.f32 	%f153, [kYCbCrFullRangeOffset+8];
$Lt_22_245762:
	.loc	20	437	0
	mov.f32 	%f154, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f155, %f150, %f154;
	mul.ftz.f32 	%f156, %f155, %f151;
	sub.ftz.f32 	%f145, %f144, %f156;
	mul.ftz.f32 	%f157, %f155, %f152;
	sub.ftz.f32 	%f147, %f146, %f157;
	mul.ftz.f32 	%f158, %f155, %f153;
	sub.ftz.f32 	%f149, %f148, %f158;
$Lt_22_244226:
	mov.f32 	%f159, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f160, %f9, %f159;
	mov.f32 	%f161, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p115, %f160, %f161;
	@!%p115 bra 	$Lt_22_246530;
	mov.f32 	%f149, 0f00000000;   	// 0
	mov.f32 	%f147, 0f00000000;   	// 0
	mov.f32 	%f145, 0f00000000;   	// 0
	mov.f32 	%f143, 0f00000000;   	// 0
	bra.uni 	$Lt_22_246274;
$Lt_22_246530:
	.loc	20	57	0
	mov.u32 	%r168, 0;
	setp.ne.s32 	%p116, %r9, %r168;
	@%p116 bra 	$Lt_22_184834;
	.loc	20	59	0
	mov.f32 	%f162, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_7;
$Lt_22_184834:
	.loc	20	61	0
	mov.u32 	%r169, 64;
	setp.ne.s32 	%p117, %r9, %r169;
	@%p117 bra 	$Lt_22_185090;
	.loc	20	63	0
	mov.f32 	%f162, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_7;
$Lt_22_185090:
	.loc	20	65	0
	mov.u32 	%r170, 128;
	setp.ne.s32 	%p118, %r9, %r170;
	@%p118 bra 	$Lt_22_185346;
	.loc	20	68	0
	mov.f32 	%f162, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_7;
$Lt_22_185346:
	.loc	20	70	0
	mov.u32 	%r171, 192;
	setp.ne.s32 	%p119, %r9, %r171;
	@%p119 bra 	$Lt_22_185602;
	.loc	20	72	0
	mov.f32 	%f162, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_7;
$Lt_22_185602:
	.loc	20	76	0
	mov.f32 	%f162, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_7:
	.loc	20	447	0
	div.approx.ftz.f32 	%f163, %f162, %f9;
	mul.ftz.f32 	%f145, %f163, %f145;
	.loc	20	448	0
	mul.ftz.f32 	%f147, %f163, %f147;
	.loc	20	449	0
	mul.ftz.f32 	%f149, %f163, %f149;
$Lt_22_246274:
	.loc	20	452	0
	mov.u32 	%r172, 0;
	setp.eq.s32 	%p120, %r18, %r172;
	@%p120 bra 	$Lt_22_246786;
	.loc	20	57	0
	mov.u32 	%r173, 0;
	setp.ne.s32 	%p121, %r9, %r173;
	@%p121 bra 	$Lt_22_186114;
	.loc	20	59	0
	mov.f32 	%f164, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_5;
$Lt_22_186114:
	.loc	20	61	0
	mov.u32 	%r174, 64;
	setp.ne.s32 	%p122, %r9, %r174;
	@%p122 bra 	$Lt_22_186370;
	.loc	20	63	0
	mov.f32 	%f164, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_5;
$Lt_22_186370:
	.loc	20	65	0
	mov.u32 	%r175, 128;
	setp.ne.s32 	%p123, %r9, %r175;
	@%p123 bra 	$Lt_22_186626;
	.loc	20	68	0
	mov.f32 	%f164, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_5;
$Lt_22_186626:
	.loc	20	70	0
	mov.u32 	%r176, 192;
	setp.ne.s32 	%p124, %r9, %r176;
	@%p124 bra 	$Lt_22_186882;
	.loc	20	72	0
	mov.f32 	%f164, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_5;
$Lt_22_186882:
	.loc	20	76	0
	mov.f32 	%f164, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_5:
	.loc	20	107	0
	and.b32 	%r122, %r4, 2048;
	mov.s32 	%r177, 0;
	setp.ne.s32 	%p84, %r122, %r177;
	@!%p84 bra 	$Lt_22_247554;
	.loc	20	100	0
	ld.const.f32 	%f165, [kYCbCrOffset+0];
	bra.uni 	$Lt_22_247298;
$Lt_22_247554:
	ld.const.f32 	%f165, [kYCbCrFullRangeOffset+0];
$Lt_22_247298:
	.loc	20	107	0
	@!%p84 bra 	$Lt_22_248066;
	.loc	20	100	0
	ld.const.f32 	%f166, [kYCbCrOffset+4];
	bra.uni 	$Lt_22_247810;
$Lt_22_248066:
	ld.const.f32 	%f166, [kYCbCrFullRangeOffset+4];
$Lt_22_247810:
	.loc	20	107	0
	@!%p84 bra 	$Lt_22_248578;
	.loc	20	100	0
	ld.const.f32 	%f167, [kYCbCrOffset+8];
	bra.uni 	$Lt_22_248322;
$Lt_22_248578:
	ld.const.f32 	%f167, [kYCbCrFullRangeOffset+8];
$Lt_22_248322:
	.loc	20	454	0
	mov.f32 	%f168, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f169, %f164, %f168;
	fma.rn.ftz.f32 	%f145, %f169, %f165, %f145;
	fma.rn.ftz.f32 	%f147, %f169, %f166, %f147;
	fma.rn.ftz.f32 	%f149, %f169, %f167, %f149;
$Lt_22_246786:
	.loc	20	551	0
	mov.f32 	%f9, %f143;
	mov.f32 	%f10, %f145;
	mov.f32 	%f11, %f147;
	mov.f32 	%f12, %f149;
$Lt_22_239106:
$L_22_221442:
$Lt_22_238594:
	.loc	20	540	0
	and.b32 	%r178, %r4, 4096;
	mov.u32 	%r179, 0;
	setp.ne.s32 	%p125, %r178, %r179;
	@%p125 bra 	$Lt_22_248834;
	.loc	21	268	0
	mov.f32 	%f170, %f10;
	.loc	21	269	0
	mov.f32 	%f171, %f9;
	.loc	20	558	0
	mov.f32 	%f9, %f12;
	mov.f32 	%f10, %f11;
	mov.f32 	%f11, %f170;
	mov.f32 	%f12, %f171;
$Lt_22_248834:
	@!%p3 bra 	$Lt_22_249346;
	.loc	20	57	0
	mov.u32 	%r180, 0;
	setp.ne.s32 	%p126, %r9, %r180;
	@%p126 bra 	$Lt_22_189442;
	.loc	20	59	0
	mov.f32 	%f172, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_1;
$Lt_22_189442:
	.loc	20	61	0
	mov.u32 	%r181, 64;
	setp.ne.s32 	%p127, %r9, %r181;
	@%p127 bra 	$Lt_22_189698;
	.loc	20	63	0
	mov.f32 	%f172, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_1;
$Lt_22_189698:
	.loc	20	65	0
	mov.u32 	%r182, 128;
	setp.ne.s32 	%p128, %r9, %r182;
	@%p128 bra 	$Lt_22_189954;
	.loc	20	68	0
	mov.f32 	%f172, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_1;
$Lt_22_189954:
	.loc	20	70	0
	mov.u32 	%r183, 192;
	setp.ne.s32 	%p129, %r9, %r183;
	@%p129 bra 	$Lt_22_190210;
	.loc	20	72	0
	mov.f32 	%f172, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_199_1;
$Lt_22_190210:
	.loc	20	76	0
	mov.f32 	%f172, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_199_1:
	.loc	20	564	0
	mov.f32 	%f173, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f174, %f9, %f173;
	mov.f32 	%f175, 0f00000000;   	// 0
	mov.f32 	%f176, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p130, %f174, %f176;
	selp.f32 	%f177, %f174, %f175, %p130;
	min.ftz.f32 	%f9, %f177, %f172;
	mov.f32 	%f178, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f179, %f10, %f178;
	mov.f32 	%f180, 0f00000000;   	// 0
	mov.f32 	%f181, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p131, %f179, %f181;
	selp.f32 	%f182, %f179, %f180, %p131;
	min.ftz.f32 	%f10, %f182, %f172;
	mov.f32 	%f183, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f184, %f11, %f183;
	mov.f32 	%f185, 0f00000000;   	// 0
	mov.f32 	%f186, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p132, %f184, %f186;
	selp.f32 	%f187, %f184, %f185, %p132;
	min.ftz.f32 	%f11, %f187, %f172;
	mov.f32 	%f188, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f189, %f12, %f188;
	mov.f32 	%f190, 0f00000000;   	// 0
	mov.f32 	%f191, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p133, %f189, %f191;
	selp.f32 	%f192, %f189, %f190, %p133;
	min.ftz.f32 	%f12, %f192, %f172;
$Lt_22_249346:
	.loc	20	567	0
	mov.f32 	%f193, %f9;
	st.param.f32 	[__cudaretf__Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_+0], %f193;
	mov.f32 	%f194, %f10;
	st.param.f32 	[__cudaretf__Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_+4], %f194;
	mov.f32 	%f195, %f11;
	st.param.f32 	[__cudaretf__Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_+8], %f195;
	mov.f32 	%f196, %f12;
	st.param.f32 	[__cudaretf__Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_+12], %f196;
	ret;
$LDWend__Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_:
	} // _Z23ConvertPixel_444_To_4446float414IR_PixelFormatS0_

	.visible .func (.param .align 16 .b8 __cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_[16]) _Z18SwapComponentOrderI6float4ET_RKS1_ (.param .u64 __cudaparmf1__Z18SwapComponentOrderI6float4ET_RKS1_)
	{
	.reg .u64 %rd<4>;
	.reg .f32 %f<6>;
	.loc	21	264	0
$LDWbegin__Z18SwapComponentOrderI6float4ET_RKS1_:
	ld.param.u64 	%rd1, [__cudaparmf1__Z18SwapComponentOrderI6float4ET_RKS1_];
	mov.s64 	%rd2, %rd1;
	.loc	21	270	0
	ld.f32 	%f1, [%rd2+12];
	st.param.f32 	[__cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_+0], %f1;
	ld.f32 	%f2, [%rd2+8];
	st.param.f32 	[__cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_+4], %f2;
	ld.f32 	%f3, [%rd2+4];
	st.param.f32 	[__cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_+8], %f3;
	ld.f32 	%f4, [%rd2+0];
	st.param.f32 	[__cudaretf__Z18SwapComponentOrderI6float4ET_RKS1_+12], %f4;
	ret;
$LDWend__Z18SwapComponentOrderI6float4ET_RKS1_:
	} // _Z18SwapComponentOrderI6float4ET_RKS1_

	.visible .func (.param .align 16 .b8 __cudaretf__ZplI6float4ET_RKS1_f[16]) _ZplI6float4ET_RKS1_f (.param .u64 __cudaparmf1__ZplI6float4ET_RKS1_f, .param .f32 __cudaparmf2__ZplI6float4ET_RKS1_f)
	{
	.reg .u64 %rd<4>;
	.reg .f32 %f<12>;
	.loc	21	113	0
$LDWbegin__ZplI6float4ET_RKS1_f:
	ld.param.u64 	%rd1, [__cudaparmf1__ZplI6float4ET_RKS1_f];
	mov.s64 	%rd2, %rd1;
	ld.param.f32 	%f1, [__cudaparmf2__ZplI6float4ET_RKS1_f];
	mov.f32 	%f2, %f1;
	ld.v4.f32 	{%f3,%f4,%f5,%f6}, [%rd2+0];
	.loc	21	116	0
	add.ftz.f32 	%f7, %f3, %f2;
	st.param.f32 	[__cudaretf__ZplI6float4ET_RKS1_f+0], %f7;
	add.ftz.f32 	%f8, %f4, %f2;
	st.param.f32 	[__cudaretf__ZplI6float4ET_RKS1_f+4], %f8;
	add.ftz.f32 	%f9, %f5, %f2;
	st.param.f32 	[__cudaretf__ZplI6float4ET_RKS1_f+8], %f9;
	add.ftz.f32 	%f10, %f6, %f2;
	st.param.f32 	[__cudaretf__ZplI6float4ET_RKS1_f+12], %f10;
	ret;
$LDWend__ZplI6float4ET_RKS1_f:
	} // _ZplI6float4ET_RKS1_f

	.visible .func (.param .align 16 .b8 __cudaretf__Z15ClampComponentsI6float4ET_RKS1_ff[16]) _Z15ClampComponentsI6float4ET_RKS1_ff (.param .u64 __cudaparmf1__Z15ClampComponentsI6float4ET_RKS1_ff, .param .f32 __cudaparmf2__Z15ClampComponentsI6float4ET_RKS1_ff, .param .f32 __cudaparmf3__Z15ClampComponentsI6float4ET_RKS1_ff)
	{
	.reg .u64 %rd<4>;
	.reg .f32 %f<18>;
	.reg .pred %p<6>;
	.loc	21	342	0
$LDWbegin__Z15ClampComponentsI6float4ET_RKS1_ff:
	ld.param.u64 	%rd1, [__cudaparmf1__Z15ClampComponentsI6float4ET_RKS1_ff];
	mov.s64 	%rd2, %rd1;
	ld.param.f32 	%f1, [__cudaparmf2__Z15ClampComponentsI6float4ET_RKS1_ff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf3__Z15ClampComponentsI6float4ET_RKS1_ff];
	mov.f32 	%f4, %f3;
	.loc	21	344	0
	ld.f32 	%f5, [%rd2+0];
	.loc	21	345	0
	ld.f32 	%f6, [%rd2+4];
	.loc	21	346	0
	ld.f32 	%f7, [%rd2+8];
	.loc	21	347	0
	ld.f32 	%f8, [%rd2+12];
	.loc	21	348	0
	setp.lt.ftz.f32 	%p1, %f2, %f5;
	selp.f32 	%f9, %f5, %f2, %p1;
	min.ftz.f32 	%f10, %f9, %f4;
	st.param.f32 	[__cudaretf__Z15ClampComponentsI6float4ET_RKS1_ff+0], %f10;
	setp.lt.ftz.f32 	%p2, %f2, %f6;
	selp.f32 	%f11, %f6, %f2, %p2;
	min.ftz.f32 	%f12, %f11, %f4;
	st.param.f32 	[__cudaretf__Z15ClampComponentsI6float4ET_RKS1_ff+4], %f12;
	setp.lt.ftz.f32 	%p3, %f2, %f7;
	selp.f32 	%f13, %f7, %f2, %p3;
	min.ftz.f32 	%f14, %f13, %f4;
	st.param.f32 	[__cudaretf__Z15ClampComponentsI6float4ET_RKS1_ff+8], %f14;
	setp.lt.ftz.f32 	%p4, %f2, %f8;
	selp.f32 	%f15, %f8, %f2, %p4;
	min.ftz.f32 	%f16, %f15, %f4;
	st.param.f32 	[__cudaretf__Z15ClampComponentsI6float4ET_RKS1_ff+12], %f16;
	ret;
$LDWend__Z15ClampComponentsI6float4ET_RKS1_ff:
	} // _Z15ClampComponentsI6float4ET_RKS1_ff

	.visible .func (.param .align 8 .b8 __cudaretf__Z6Read2DI7ushort4ET_PKS1_iii[8]) _Z6Read2DI7ushort4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI7ushort4ET_PKS1_iii)
	{
	.reg .u32 %r<14>;
	.reg .u64 %rd<7>;
	.loc	19	114	0
$LDWbegin__Z6Read2DI7ushort4ET_PKS1_iii:
	ld.param.u64 	%rd1, [__cudaparmf1__Z6Read2DI7ushort4ET_PKS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf2__Z6Read2DI7ushort4ET_PKS1_iii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf3__Z6Read2DI7ushort4ET_PKS1_iii];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf4__Z6Read2DI7ushort4ET_PKS1_iii];
	mov.s32 	%r6, %r5;
	.loc	19	115	0
	mul.lo.s32 	%r7, %r2, %r6;
	add.s32 	%r8, %r4, %r7;
	cvt.s64.s32 	%rd3, %r8;
	mul.wide.s32 	%rd4, %r8, 8;
	add.u64 	%rd5, %rd2, %rd4;
	ld.v4.u16 	{%r9,%r10,%r11,%r12}, [%rd5+0];
	st.param.u16 	[__cudaretf__Z6Read2DI7ushort4ET_PKS1_iii+0], %r9;
	st.param.u16 	[__cudaretf__Z6Read2DI7ushort4ET_PKS1_iii+2], %r10;
	st.param.u16 	[__cudaretf__Z6Read2DI7ushort4ET_PKS1_iii+4], %r11;
	st.param.u16 	[__cudaretf__Z6Read2DI7ushort4ET_PKS1_iii+6], %r12;
	ret;
$LDWend__Z6Read2DI7ushort4ET_PKS1_iii:
	} // _Z6Read2DI7ushort4ET_PKS1_iii

	.visible .func (.param .align 16 .b8 __cudaretf__Z6Read2DI6float4ET_PKS1_iii[16]) _Z6Read2DI6float4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI6float4ET_PKS1_iii)
	{
	.reg .u32 %r<10>;
	.reg .u64 %rd<7>;
	.reg .f32 %f<6>;
	.loc	19	114	0
$LDWbegin__Z6Read2DI6float4ET_PKS1_iii:
	ld.param.u64 	%rd1, [__cudaparmf1__Z6Read2DI6float4ET_PKS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf2__Z6Read2DI6float4ET_PKS1_iii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf3__Z6Read2DI6float4ET_PKS1_iii];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf4__Z6Read2DI6float4ET_PKS1_iii];
	mov.s32 	%r6, %r5;
	.loc	19	115	0
	mul.lo.s32 	%r7, %r2, %r6;
	add.s32 	%r8, %r4, %r7;
	cvt.s64.s32 	%rd3, %r8;
	mul.wide.s32 	%rd4, %r8, 16;
	add.u64 	%rd5, %rd2, %rd4;
	ld.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd5+0];
	st.param.f32 	[__cudaretf__Z6Read2DI6float4ET_PKS1_iii+0], %f1;
	st.param.f32 	[__cudaretf__Z6Read2DI6float4ET_PKS1_iii+4], %f2;
	st.param.f32 	[__cudaretf__Z6Read2DI6float4ET_PKS1_iii+8], %f3;
	st.param.f32 	[__cudaretf__Z6Read2DI6float4ET_PKS1_iii+12], %f4;
	ret;
$LDWend__Z6Read2DI6float4ET_PKS1_iii:
	} // _Z6Read2DI6float4ET_PKS1_iii

	.visible .func _Z7Write2DI7ushort4EvT_PS1_iii (.param .align 8 .b8 __cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii[8], .param .u64 __cudaparmf2__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI7ushort4EvT_PS1_iii)
	{
	.reg .u32 %r<18>;
	.reg .u64 %rd<7>;
	.loc	19	125	0
$LDWbegin__Z7Write2DI7ushort4EvT_PS1_iii:
	ld.param.u16 	%r1, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+0];
	mov.s32 	%r2, %r1;
	ld.param.u16 	%r3, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+2];
	mov.s32 	%r4, %r3;
	ld.param.u16 	%r5, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+4];
	mov.s32 	%r6, %r5;
	ld.param.u16 	%r7, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+6];
	mov.s32 	%r8, %r7;
	ld.param.u64 	%rd1, [__cudaparmf2__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r9, [__cudaparmf3__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s32 	%r10, %r9;
	ld.param.u32 	%r11, [__cudaparmf4__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s32 	%r12, %r11;
	ld.param.u32 	%r13, [__cudaparmf5__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s32 	%r14, %r13;
	.loc	19	126	0
	mul.lo.s32 	%r15, %r10, %r14;
	add.s32 	%r16, %r12, %r15;
	cvt.s64.s32 	%rd3, %r16;
	mul.wide.s32 	%rd4, %r16, 8;
	add.u64 	%rd5, %rd2, %rd4;
	st.v4.u16 	[%rd5+0], {%r2,%r4,%r6,%r8};
	.loc	19	127	0
	ret;
$LDWend__Z7Write2DI7ushort4EvT_PS1_iii:
	} // _Z7Write2DI7ushort4EvT_PS1_iii

	.visible .func _Z7Write2DI6float4EvT_PS1_iii (.param .align 16 .b8 __cudaparmf1__Z7Write2DI6float4EvT_PS1_iii[16], .param .u64 __cudaparmf2__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI6float4EvT_PS1_iii)
	{
	.reg .u32 %r<10>;
	.reg .u64 %rd<7>;
	.reg .f32 %f<10>;
	.loc	19	125	0
$LDWbegin__Z7Write2DI6float4EvT_PS1_iii:
	ld.param.f32 	%f1, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+12];
	mov.f32 	%f8, %f7;
	ld.param.u64 	%rd1, [__cudaparmf2__Z7Write2DI6float4EvT_PS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf3__Z7Write2DI6float4EvT_PS1_iii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf4__Z7Write2DI6float4EvT_PS1_iii];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf5__Z7Write2DI6float4EvT_PS1_iii];
	mov.s32 	%r6, %r5;
	.loc	19	126	0
	mul.lo.s32 	%r7, %r2, %r6;
	add.s32 	%r8, %r4, %r7;
	cvt.s64.s32 	%rd3, %r8;
	mul.wide.s32 	%rd4, %r8, 16;
	add.u64 	%rd5, %rd2, %rd4;
	st.v4.f32 	[%rd5+0], {%f2,%f4,%f6,%f8};
	.loc	19	127	0
	ret;
$LDWend__Z7Write2DI6float4EvT_PS1_iii:
	} // _Z7Write2DI6float4EvT_PS1_iii

	.visible .func (.param .align 16 .b8 __cudaretf__Z18UnpremultiplyPixel8PixelRGB[16]) _Z18UnpremultiplyPixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z18UnpremultiplyPixel8PixelRGB[16])
	{
	.reg .f32 %f<23>;
	.reg .pred %p<3>;
	.loc	4	206	0
$LDWbegin__Z18UnpremultiplyPixel8PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+12];
	mov.f32 	%f8, %f7;
	.loc	4	208	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f9, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f12, %f13;
	@%p1 bra 	$Lt_30_1282;
	.loc	4	213	0
	rcp.approx.ftz.f32 	%f14, %f9;
	mul.ftz.f32 	%f15, %f14, %f6;
	.loc	4	214	0
	mul.ftz.f32 	%f16, %f14, %f4;
	.loc	4	215	0
	mul.ftz.f32 	%f17, %f14, %f2;
	bra.uni 	$Lt_30_1026;
$Lt_30_1282:
	.loc	4	219	0
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	mov.f32 	%f17, 0f00000000;    	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
$Lt_30_1026:
	.loc	4	224	0
	mov.f32 	%f18, %f17;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+0], %f18;
	mov.f32 	%f19, %f16;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+4], %f19;
	mov.f32 	%f20, %f15;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+8], %f20;
	mov.f32 	%f21, %f10;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+12], %f21;
	ret;
$LDWend__Z18UnpremultiplyPixel8PixelRGB:
	} // _Z18UnpremultiplyPixel8PixelRGB

	.visible .func (.param .f32 __cudaretf__Z13ToLinearColorf) _Z13ToLinearColorf (.param .f32 __cudaparmf1__Z13ToLinearColorf)
	{
	.reg .f32 %f<15>;
	.reg .pred %p<3>;
	.loc	4	231	0
$LDWbegin__Z13ToLinearColorf:
	ld.param.f32 	%f1, [__cudaparmf1__Z13ToLinearColorf];
	mov.f32 	%f2, %f1;
	mov.f32 	%f3, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f3;
	@!%p1 bra 	$Lt_31_1026;
	.loc	4	234	0
	neg.ftz.f32 	%f4, %f2;
	lg2.approx.ftz.f32 	%f5, %f4;
	mov.f32 	%f6, 0f400ccccd;     	// 2.2
	mul.ftz.f32 	%f7, %f5, %f6;
	ex2.approx.ftz.f32 	%f8, %f7;
	neg.ftz.f32 	%f9, %f8;
	bra.uni 	$LBB4__Z13ToLinearColorf;
$Lt_31_1026:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f10, %f2;
	mov.f32 	%f11, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f12, %f10, %f11;
	ex2.approx.ftz.f32 	%f9, %f12;
$LBB4__Z13ToLinearColorf:
	mov.f32 	%f13, %f9;
	st.param.f32 	[__cudaretf__Z13ToLinearColorf], %f13;
	ret;
$LDWend__Z13ToLinearColorf:
	} // _Z13ToLinearColorf

	.visible .func (.param .f32 __cudaretf__Z15FromLinearColorf) _Z15FromLinearColorf (.param .f32 __cudaparmf1__Z15FromLinearColorf)
	{
	.reg .f32 %f<15>;
	.reg .pred %p<3>;
	.loc	4	239	0
$LDWbegin__Z15FromLinearColorf:
	ld.param.f32 	%f1, [__cudaparmf1__Z15FromLinearColorf];
	mov.f32 	%f2, %f1;
	mov.f32 	%f3, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f3;
	@!%p1 bra 	$Lt_32_1026;
	.loc	4	242	0
	neg.ftz.f32 	%f4, %f2;
	lg2.approx.ftz.f32 	%f5, %f4;
	mov.f32 	%f6, 0f3ee8ba2e;     	// 0.454545
	mul.ftz.f32 	%f7, %f5, %f6;
	ex2.approx.ftz.f32 	%f8, %f7;
	neg.ftz.f32 	%f9, %f8;
	bra.uni 	$LBB4__Z15FromLinearColorf;
$Lt_32_1026:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f10, %f2;
	mov.f32 	%f11, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f12, %f10, %f11;
	ex2.approx.ftz.f32 	%f9, %f12;
$LBB4__Z15FromLinearColorf:
	mov.f32 	%f13, %f9;
	st.param.f32 	[__cudaretf__Z15FromLinearColorf], %f13;
	ret;
$LDWend__Z15FromLinearColorf:
	} // _Z15FromLinearColorf

	.visible .func (.param .align 16 .b8 __cudaretf__Z25PremultiplyLinearizePixel8PixelRGB[16]) _Z25PremultiplyLinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB[16])
	{
	.reg .f32 %f<47>;
	.reg .pred %p<5>;
	.loc	4	252	0
$LDWbegin__Z25PremultiplyLinearizePixel8PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+12];
	mov.f32 	%f8, %f7;
	.loc	4	254	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	.loc	4	255	0
	mov.f32 	%f10, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f10;
	@!%p1 bra 	$Lt_33_4098;
	.loc	4	234	0
	neg.ftz.f32 	%f11, %f2;
	lg2.approx.ftz.f32 	%f12, %f11;
	mov.f32 	%f13, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f15, %f14;
	neg.ftz.f32 	%f16, %f15;
	bra.uni 	$LDWendi___log2f_211_5;
$Lt_33_4098:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f17, %f2;
	mov.f32 	%f18, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f16, %f19;
$LDWendi___log2f_211_5:
	.loc	4	256	0
	mov.f32 	%f20, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f4, %f20;
	@!%p2 bra 	$Lt_33_4610;
	.loc	4	234	0
	neg.ftz.f32 	%f21, %f4;
	lg2.approx.ftz.f32 	%f22, %f21;
	mov.f32 	%f23, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f25, %f24;
	neg.ftz.f32 	%f26, %f25;
	bra.uni 	$LDWendi___log2f_211_3;
$Lt_33_4610:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f27, %f4;
	mov.f32 	%f28, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f26, %f29;
$LDWendi___log2f_211_3:
	.loc	4	257	0
	mov.f32 	%f30, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f6, %f30;
	@!%p3 bra 	$Lt_33_5122;
	.loc	4	234	0
	neg.ftz.f32 	%f31, %f6;
	lg2.approx.ftz.f32 	%f32, %f31;
	mov.f32 	%f33, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f35, %f34;
	neg.ftz.f32 	%f36, %f35;
	bra.uni 	$LDWendi___log2f_211_1;
$Lt_33_5122:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f37, %f6;
	mov.f32 	%f38, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f39, %f37, %f38;
	ex2.approx.ftz.f32 	%f36, %f39;
$LDWendi___log2f_211_1:
	.loc	4	259	0
	mul.ftz.f32 	%f40, %f36, %f9;
	mul.ftz.f32 	%f41, %f26, %f9;
	mul.ftz.f32 	%f42, %f16, %f9;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+0], %f42;
	mov.f32 	%f43, %f41;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+4], %f43;
	mov.f32 	%f44, %f40;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+8], %f44;
	mov.f32 	%f45, %f9;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+12], %f45;
	ret;
$LDWend__Z25PremultiplyLinearizePixel8PixelRGB:
	} // _Z25PremultiplyLinearizePixel8PixelRGB

	.visible .func (.param .align 16 .b8 __cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16]) _Z29UnpremultiplyUnlinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16])
	{
	.reg .f32 %f<53>;
	.reg .pred %p<6>;
	.loc	4	263	0
$LDWbegin__Z29UnpremultiplyUnlinearizePixel8PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+12];
	mov.f32 	%f8, %f7;
	.loc	4	208	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f9, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f12, %f13;
	@%p1 bra 	$Lt_34_5122;
	.loc	4	213	0
	rcp.approx.ftz.f32 	%f14, %f9;
	mul.ftz.f32 	%f15, %f14, %f6;
	.loc	4	214	0
	mul.ftz.f32 	%f16, %f14, %f4;
	.loc	4	215	0
	mul.ftz.f32 	%f17, %f14, %f2;
	bra.uni 	$Lt_34_4866;
$Lt_34_5122:
	.loc	4	219	0
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	mov.f32 	%f17, 0f00000000;    	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
$Lt_34_4866:
	.loc	4	266	0
	mov.f32 	%f18, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f17, %f18;
	@!%p2 bra 	$Lt_34_5378;
	.loc	4	242	0
	neg.ftz.f32 	%f19, %f17;
	lg2.approx.ftz.f32 	%f20, %f19;
	mov.f32 	%f21, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f22, %f20, %f21;
	ex2.approx.ftz.f32 	%f23, %f22;
	neg.ftz.f32 	%f24, %f23;
	bra.uni 	$LDWendi___log2f_212_5;
$Lt_34_5378:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f25, %f17;
	mov.f32 	%f26, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f27, %f25, %f26;
	ex2.approx.ftz.f32 	%f24, %f27;
$LDWendi___log2f_212_5:
	.loc	4	267	0
	mov.f32 	%f28, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f16, %f28;
	@!%p3 bra 	$Lt_34_5890;
	.loc	4	242	0
	neg.ftz.f32 	%f29, %f16;
	lg2.approx.ftz.f32 	%f30, %f29;
	mov.f32 	%f31, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f32, %f30, %f31;
	ex2.approx.ftz.f32 	%f33, %f32;
	neg.ftz.f32 	%f34, %f33;
	bra.uni 	$LDWendi___log2f_212_3;
$Lt_34_5890:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f35, %f16;
	mov.f32 	%f36, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f37, %f35, %f36;
	ex2.approx.ftz.f32 	%f34, %f37;
$LDWendi___log2f_212_3:
	.loc	4	268	0
	mov.f32 	%f38, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f15, %f38;
	@!%p4 bra 	$Lt_34_6402;
	.loc	4	242	0
	neg.ftz.f32 	%f39, %f15;
	lg2.approx.ftz.f32 	%f40, %f39;
	mov.f32 	%f41, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f42, %f40, %f41;
	ex2.approx.ftz.f32 	%f43, %f42;
	neg.ftz.f32 	%f44, %f43;
	bra.uni 	$LDWendi___log2f_212_1;
$Lt_34_6402:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f45, %f15;
	mov.f32 	%f46, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f47, %f45, %f46;
	ex2.approx.ftz.f32 	%f44, %f47;
$LDWendi___log2f_212_1:
	.loc	4	269	0
	mov.f32 	%f48, %f24;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+0], %f48;
	mov.f32 	%f49, %f34;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+4], %f49;
	mov.f32 	%f50, %f44;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+8], %f50;
	mov.f32 	%f51, %f10;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+12], %f51;
	ret;
$LDWend__Z29UnpremultiplyUnlinearizePixel8PixelRGB:
	} // _Z29UnpremultiplyUnlinearizePixel8PixelRGB

	.visible .func (.param .align 16 .b8 __cudaretf__Z20PremultiplyLinearize6float4[16]) _Z20PremultiplyLinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z20PremultiplyLinearize6float4[16])
	{
	.reg .f32 %f<47>;
	.reg .pred %p<5>;
	.loc	4	277	0
$LDWbegin__Z20PremultiplyLinearize6float4:
	ld.param.f32 	%f1, [__cudaparmf1__Z20PremultiplyLinearize6float4+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z20PremultiplyLinearize6float4+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z20PremultiplyLinearize6float4+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z20PremultiplyLinearize6float4+12];
	mov.f32 	%f8, %f7;
	.loc	4	254	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	.loc	4	255	0
	mov.f32 	%f10, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f10;
	@!%p1 bra 	$Lt_35_4098;
	.loc	4	234	0
	neg.ftz.f32 	%f11, %f2;
	lg2.approx.ftz.f32 	%f12, %f11;
	mov.f32 	%f13, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f15, %f14;
	neg.ftz.f32 	%f16, %f15;
	bra.uni 	$LDWendi___log2f_213_5;
$Lt_35_4098:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f17, %f2;
	mov.f32 	%f18, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f16, %f19;
$LDWendi___log2f_213_5:
	.loc	4	256	0
	mov.f32 	%f20, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f4, %f20;
	@!%p2 bra 	$Lt_35_4610;
	.loc	4	234	0
	neg.ftz.f32 	%f21, %f4;
	lg2.approx.ftz.f32 	%f22, %f21;
	mov.f32 	%f23, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f25, %f24;
	neg.ftz.f32 	%f26, %f25;
	bra.uni 	$LDWendi___log2f_213_3;
$Lt_35_4610:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f27, %f4;
	mov.f32 	%f28, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f26, %f29;
$LDWendi___log2f_213_3:
	.loc	4	257	0
	mov.f32 	%f30, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f6, %f30;
	@!%p3 bra 	$Lt_35_5122;
	.loc	4	234	0
	neg.ftz.f32 	%f31, %f6;
	lg2.approx.ftz.f32 	%f32, %f31;
	mov.f32 	%f33, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f35, %f34;
	neg.ftz.f32 	%f36, %f35;
	bra.uni 	$LDWendi___log2f_213_1;
$Lt_35_5122:
	.loc	4	236	0
	lg2.approx.ftz.f32 	%f37, %f6;
	mov.f32 	%f38, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f39, %f37, %f38;
	ex2.approx.ftz.f32 	%f36, %f39;
$LDWendi___log2f_213_1:
	.loc	4	259	0
	mul.ftz.f32 	%f40, %f36, %f9;
	mul.ftz.f32 	%f41, %f26, %f9;
	.loc	4	278	0
	mul.ftz.f32 	%f42, %f16, %f9;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+0], %f42;
	mov.f32 	%f43, %f41;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+4], %f43;
	mov.f32 	%f44, %f40;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+8], %f44;
	mov.f32 	%f45, %f9;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+12], %f45;
	ret;
$LDWend__Z20PremultiplyLinearize6float4:
	} // _Z20PremultiplyLinearize6float4

	.visible .func (.param .align 16 .b8 __cudaretf__Z24UnpremultiplyUnlinearize6float4[16]) _Z24UnpremultiplyUnlinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z24UnpremultiplyUnlinearize6float4[16])
	{
	.reg .f32 %f<53>;
	.reg .pred %p<6>;
	.loc	4	284	0
$LDWbegin__Z24UnpremultiplyUnlinearize6float4:
	ld.param.f32 	%f1, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+12];
	mov.f32 	%f8, %f7;
	.loc	4	208	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f9, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f12, %f13;
	@%p1 bra 	$Lt_36_5122;
	.loc	4	213	0
	rcp.approx.ftz.f32 	%f14, %f9;
	mul.ftz.f32 	%f15, %f14, %f6;
	.loc	4	214	0
	mul.ftz.f32 	%f16, %f14, %f4;
	.loc	4	215	0
	mul.ftz.f32 	%f17, %f14, %f2;
	bra.uni 	$Lt_36_4866;
$Lt_36_5122:
	.loc	4	219	0
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	mov.f32 	%f17, 0f00000000;    	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
$Lt_36_4866:
	.loc	4	266	0
	mov.f32 	%f18, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f17, %f18;
	@!%p2 bra 	$Lt_36_5378;
	.loc	4	242	0
	neg.ftz.f32 	%f19, %f17;
	lg2.approx.ftz.f32 	%f20, %f19;
	mov.f32 	%f21, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f22, %f20, %f21;
	ex2.approx.ftz.f32 	%f23, %f22;
	neg.ftz.f32 	%f24, %f23;
	bra.uni 	$LDWendi___log2f_214_5;
$Lt_36_5378:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f25, %f17;
	mov.f32 	%f26, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f27, %f25, %f26;
	ex2.approx.ftz.f32 	%f24, %f27;
$LDWendi___log2f_214_5:
	.loc	4	267	0
	mov.f32 	%f28, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f16, %f28;
	@!%p3 bra 	$Lt_36_5890;
	.loc	4	242	0
	neg.ftz.f32 	%f29, %f16;
	lg2.approx.ftz.f32 	%f30, %f29;
	mov.f32 	%f31, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f32, %f30, %f31;
	ex2.approx.ftz.f32 	%f33, %f32;
	neg.ftz.f32 	%f34, %f33;
	bra.uni 	$LDWendi___log2f_214_3;
$Lt_36_5890:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f35, %f16;
	mov.f32 	%f36, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f37, %f35, %f36;
	ex2.approx.ftz.f32 	%f34, %f37;
$LDWendi___log2f_214_3:
	.loc	4	268	0
	mov.f32 	%f38, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f15, %f38;
	@!%p4 bra 	$Lt_36_6402;
	.loc	4	242	0
	neg.ftz.f32 	%f39, %f15;
	lg2.approx.ftz.f32 	%f40, %f39;
	mov.f32 	%f41, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f42, %f40, %f41;
	ex2.approx.ftz.f32 	%f43, %f42;
	neg.ftz.f32 	%f44, %f43;
	bra.uni 	$LDWendi___log2f_214_1;
$Lt_36_6402:
	.loc	4	244	0
	lg2.approx.ftz.f32 	%f45, %f15;
	mov.f32 	%f46, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f47, %f45, %f46;
	ex2.approx.ftz.f32 	%f44, %f47;
$LDWendi___log2f_214_1:
	.loc	4	285	0
	mov.f32 	%f48, %f24;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+0], %f48;
	mov.f32 	%f49, %f34;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+4], %f49;
	mov.f32 	%f50, %f44;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+8], %f50;
	mov.f32 	%f51, %f10;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+12], %f51;
	ret;
$LDWend__Z24UnpremultiplyUnlinearize6float4:
	} // _Z24UnpremultiplyUnlinearize6float4

	.visible .func (.param .align 16 .b8 __cudaretf__Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_[16]) _Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_ (.param .align 4 .b8 __cudaparmf1__Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_[4], .param .s32 __cudaparmf2__Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_, .param .s32 __cudaparmf3__Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_)
	{
	.reg .u32 %r<197>;
	.reg .u64 %rd<3>;
	.reg .f32 %f<192>;
	.reg .pred %p<135>;
	.loc	22	34	0
$LDWbegin__Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_:
	ld.param.u8 	%r1, [__cudaparmf1__Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_+0];
	mov.s32 	%r2, %r1;
	ld.param.u8 	%r3, [__cudaparmf1__Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_+1];
	mov.s32 	%r4, %r3;
	ld.param.u8 	%r5, [__cudaparmf1__Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_+2];
	mov.s32 	%r6, %r5;
	ld.param.u8 	%r7, [__cudaparmf1__Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_+3];
	mov.s32 	%r8, %r7;
	ld.param.u32 	%r9, [__cudaparmf2__Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_];
	mov.s32 	%r10, %r9;
	ld.param.u32 	%r11, [__cudaparmf3__Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_];
	mov.s32 	%r12, %r11;
	.loc	20	469	0
	cvt.u8.u32 	%r13, %r2;
	cvt.rn.f32.u32 	%f1, %r13;
	mov.f32 	%f2, %f1;
	cvt.u8.u32 	%r14, %r4;
	cvt.rn.f32.u32 	%f3, %r14;
	mov.f32 	%f4, %f3;
	cvt.u8.u32 	%r15, %r6;
	cvt.rn.f32.u32 	%f5, %r15;
	cvt.u8.u32 	%r16, %r8;
	cvt.rn.f32.u32 	%f6, %r16;
	and.b32 	%r17, %r10, 4096;
	mov.u32 	%r18, 0;
	setp.ne.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_37_222722;
	.loc	20	473	0
	mov.f32 	%f2, %f6;
	mov.f32 	%f4, %f5;
	mov.f32 	%f5, %f3;
	mov.f32 	%f6, %f1;
$Lt_37_222722:
	.loc	20	476	0
	and.b32 	%r19, %r10, 448;
	mov.s32 	%r20, %r10;
	and.b32 	%r21, %r12, 448;
	mov.s32 	%r22, %r12;
	mov.s32 	%r23, 256;
	setp.ne.s32 	%p2, %r19, %r23;
	and.b32 	%r24, %r20, 1;
	mov.s32 	%r25, 256;
	setp.ne.s32 	%p3, %r21, %r25;
	and.b32 	%r26, %r22, 1;
	selp.s32 	%r27, 1, 0, %p2;
	selp.s32 	%r28, 1, 0, %p3;
	and.b32 	%r29, %r24, %r27;
	and.b32 	%r30, %r26, %r28;
	mov.u32 	%r31, 0;
	setp.eq.s32 	%p4, %r29, %r31;
	@%p4 bra 	$Lt_37_249858;
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p5, %r30, %r32;
	@%p5 bra 	$Lt_37_249858;
	.loc	20	57	0
	mov.u32 	%r33, 0;
	setp.ne.s32 	%p6, %r19, %r33;
	@%p6 bra 	$Lt_37_140802;
	.loc	20	59	0
	mov.f32 	%f7, 0f437f0000;     	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_75;
$Lt_37_140802:
	.loc	20	61	0
	mov.u32 	%r34, 64;
	setp.ne.s32 	%p7, %r19, %r34;
	@%p7 bra 	$Lt_37_141058;
	.loc	20	63	0
	mov.f32 	%f7, 0f447fc000;     	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_75;
$Lt_37_141058:
	.loc	20	65	0
	mov.u32 	%r35, 128;
	setp.ne.s32 	%p8, %r19, %r35;
	@%p8 bra 	$Lt_37_141314;
	.loc	20	68	0
	mov.f32 	%f7, 0f47000000;     	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_75;
$Lt_37_141314:
	.loc	20	70	0
	mov.u32 	%r36, 192;
	setp.ne.s32 	%p9, %r19, %r36;
	@%p9 bra 	$Lt_37_141570;
	.loc	20	72	0
	mov.f32 	%f7, 0fbf800000;     	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_75;
$Lt_37_141570:
	.loc	20	76	0
	mov.f32 	%f7, 0f3f800000;     	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_75:
	.loc	20	118	0
	and.b32 	%r37, %r10, 2048;
	mov.s32 	%r38, 0;
	setp.ne.s32 	%p10, %r37, %r38;
	@!%p10 bra 	$Lt_37_223490;
	.loc	20	100	0
	ld.const.f32 	%f8, [kYCbCrOffset+0];
	bra.uni 	$Lt_37_223234;
$Lt_37_223490:
	ld.const.f32 	%f8, [kYCbCrFullRangeOffset+0];
$Lt_37_223234:
	.loc	20	118	0
	@!%p10 bra 	$Lt_37_224002;
	.loc	20	100	0
	ld.const.f32 	%f9, [kYCbCrOffset+4];
	bra.uni 	$Lt_37_223746;
$Lt_37_224002:
	ld.const.f32 	%f9, [kYCbCrFullRangeOffset+4];
$Lt_37_223746:
	.loc	20	118	0
	@!%p10 bra 	$Lt_37_224514;
	.loc	20	100	0
	ld.const.f32 	%f10, [kYCbCrOffset+8];
	bra.uni 	$Lt_37_224258;
$Lt_37_224514:
	ld.const.f32 	%f10, [kYCbCrFullRangeOffset+8];
$Lt_37_224258:
	.loc	20	478	0
	mov.f32 	%f11, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f12, %f7, %f11;
	mul.ftz.f32 	%f13, %f12, %f8;
	sub.ftz.f32 	%f4, %f4, %f13;
	mul.ftz.f32 	%f14, %f12, %f9;
	sub.ftz.f32 	%f5, %f5, %f14;
	mul.ftz.f32 	%f15, %f12, %f10;
	sub.ftz.f32 	%f6, %f6, %f15;
$Lt_37_249858:
$Lt_37_26114:
	.loc	20	481	0
	and.b32 	%r39, %r10, 2;
	and.b32 	%r40, %r12, 2;
	mov.u32 	%r41, 0;
	setp.eq.s32 	%p11, %r39, %r41;
	@%p11 bra 	$Lt_37_250370;
	mov.u32 	%r42, 0;
	setp.ne.s32 	%p12, %r40, %r42;
	@%p12 bra 	$Lt_37_250370;
	.loc	20	483	0
	mov.f32 	%f16, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p13, %f4, %f16;
	@!%p13 bra 	$Lt_37_224770;
	.loc	20	372	0
	neg.ftz.f32 	%f17, %f4;
	lg2.approx.ftz.f32 	%f18, %f17;
	mov.f32 	%f19, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f20, %f18, %f19;
	ex2.approx.ftz.f32 	%f21, %f20;
	neg.ftz.f32 	%f22, %f21;
	bra.uni 	$LDWendi___log2f_215_71;
$Lt_37_224770:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f23, %f4;
	mov.f32 	%f24, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f25, %f23, %f24;
	ex2.approx.ftz.f32 	%f22, %f25;
$LDWendi___log2f_215_71:
	.loc	20	483	0
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p14, %f5, %f26;
	@!%p14 bra 	$Lt_37_225282;
	.loc	20	372	0
	neg.ftz.f32 	%f27, %f5;
	lg2.approx.ftz.f32 	%f28, %f27;
	mov.f32 	%f29, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f30, %f28, %f29;
	ex2.approx.ftz.f32 	%f31, %f30;
	neg.ftz.f32 	%f32, %f31;
	bra.uni 	$LDWendi___log2f_215_69;
$Lt_37_225282:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f33, %f5;
	mov.f32 	%f34, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f35, %f33, %f34;
	ex2.approx.ftz.f32 	%f32, %f35;
$LDWendi___log2f_215_69:
	.loc	20	483	0
	mov.f32 	%f36, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p15, %f6, %f36;
	@!%p15 bra 	$Lt_37_225794;
	.loc	20	372	0
	neg.ftz.f32 	%f37, %f6;
	lg2.approx.ftz.f32 	%f38, %f37;
	mov.f32 	%f39, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f40, %f38, %f39;
	ex2.approx.ftz.f32 	%f41, %f40;
	neg.ftz.f32 	%f42, %f41;
	bra.uni 	$LDWendi___log2f_215_67;
$Lt_37_225794:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f43, %f6;
	mov.f32 	%f44, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f45, %f43, %f44;
	ex2.approx.ftz.f32 	%f42, %f45;
$LDWendi___log2f_215_67:
	.loc	20	483	0
	mov.f32 	%f4, %f22;
	mov.f32 	%f5, %f32;
	mov.f32 	%f6, %f42;
$Lt_37_250370:
$Lt_37_29954:
	.loc	20	486	0
	and.b32 	%r43, %r10, 1;
	and.b32 	%r44, %r12, 1;
	and.b32 	%r45, %r10, 1536;
	and.b32 	%r46, %r12, 1536;
	set.ne.u32.s32 	%r47, %r43, %r44;
	neg.s32 	%r48, %r47;
	set.ne.u32.s32 	%r49, %r45, %r46;
	neg.s32 	%r50, %r49;
	or.b32 	%r51, %r48, %r50;
	mov.u32 	%r52, 0;
	setp.ne.s32 	%p16, %r51, %r52;
	@%p16 bra 	$Lt_37_80642;
	setp.eq.s32 	%p17, %r29, %r30;
	@%p17 bra 	$Lt_37_80898;
$Lt_37_80642:
	.loc	20	490	0
	mov.u32 	%r53, 0;
	setp.ne.s32 	%p18, %r43, %r53;
	@%p18 bra 	$Lt_37_226562;
	mov.s32 	%r54, 256;
	setp.eq.s32 	%p19, %r21, %r54;
	mov.u32 	%r55, 256;
	setp.ne.s32 	%p20, %r19, %r55;
	@%p20 bra 	$Lt_37_227074;
	.loc	20	137	0
	mov.s32 	%r56, 512;
	setp.eq.s32 	%p21, %r46, %r56;
	@!%p19 bra 	$Lt_37_144898;
	.loc	20	139	0
	@!%p21 bra 	$Lt_37_145154;
	.loc	20	141	0
	mov.u64 	%rd1, kRGB32f_To_709YPbPr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__215_65;
$Lt_37_145154:
	.loc	20	145	0
	mov.u64 	%rd1, kRGB32f_To_601YPbPr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__215_65;
$Lt_37_144898:
	.loc	20	150	0
	@!%p21 bra 	$Lt_37_145410;
	.loc	20	152	0
	mov.u64 	%rd1, kRGB32f_To_709YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__215_65;
$Lt_37_145410:
	.loc	20	154	0
	and.b32 	%r57, %r12, 2048;
	mov.u32 	%r58, 0;
	setp.ne.s32 	%p22, %r57, %r58;
	@%p22 bra 	$Lt_37_145666;
	.loc	20	156	0
	mov.u64 	%rd1, kRGB32f_To_601YCbCrFullRange;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__215_65;
$Lt_37_145666:
	.loc	20	160	0
	mov.u64 	%rd1, kRGB32f_To_601YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__215_65;
$Lt_37_227074:
	@!%p19 bra 	$Lt_37_227586;
	bra.uni 	$Lt_37_226306;
$Lt_37_227586:
	.loc	20	179	0
	mov.u32 	%r59, 512;
	setp.ne.s32 	%p23, %r46, %r59;
	@%p23 bra 	$Lt_37_146434;
	.loc	20	181	0
	mov.u64 	%rd1, kRGB8u_To_709YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__215_65;
$Lt_37_146434:
	.loc	20	183	0
	and.b32 	%r60, %r12, 2048;
	mov.u32 	%r61, 0;
	setp.ne.s32 	%p24, %r60, %r61;
	@%p24 bra 	$Lt_37_146690;
	.loc	20	185	0
	mov.u64 	%rd1, kRGB8u_To_601YCbCrFullRange;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__215_65;
$Lt_37_146690:
	.loc	20	189	0
	mov.u64 	%rd1, kRGB8u_To_601YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__215_65;
$Lt_37_226562:
	mov.s32 	%r62, 0;
	setp.eq.s32 	%p25, %r44, %r62;
	mov.u32 	%r63, 512;
	setp.ne.s32 	%p26, %r45, %r63;
	@%p26 bra 	$Lt_37_228098;
	mov.s32 	%r64, 256;
	setp.eq.s32 	%p27, %r19, %r64;
	@!%p25 bra 	$Lt_37_228610;
	mov.s32 	%r65, 256;
	setp.eq.s32 	%p28, %r21, %r65;
	@!%p27 bra 	$Lt_37_229122;
	@!%p28 bra 	$Lt_37_226306;
	.loc	20	202	0
	mov.u64 	%rd1, k709YPbPr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__215_65;
$Lt_37_229122:
	.loc	20	211	0
	@!%p28 bra 	$Lt_37_147970;
	.loc	20	213	0
	mov.u64 	%rd1, k709YCbCr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__215_65;
$Lt_37_147970:
	.loc	20	217	0
	mov.u64 	%rd1, k709YCbCr_To_RGB8u;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__215_65;
$Lt_37_228610:
	@!%p27 bra 	$Lt_37_230146;
	bra.uni 	$Lt_37_226306;
$Lt_37_230146:
	mov.s32 	%r66, 256;
	set.eq.u32.s32 	%r67, %r21, %r66;
	neg.s32 	%r68, %r67;
	and.b32 	%r69, %r12, 2048;
	mov.s32 	%r70, 0;
	set.eq.u32.s32 	%r71, %r69, %r70;
	neg.s32 	%r72, %r71;
	or.b32 	%r73, %r68, %r72;
	mov.u32 	%r74, 0;
	setp.eq.s32 	%p29, %r73, %r74;
	@%p29 bra 	$Lt_37_230658;
	bra.uni 	$Lt_37_226306;
$Lt_37_230658:
	.loc	20	250	0
	mov.u64 	%rd1, k709YCbCr_To_601YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__215_65;
$Lt_37_228098:
	and.b32 	%r75, %r10, 2048;
	mov.s32 	%r76, 0;
	setp.eq.s32 	%p30, %r75, %r76;
	@!%p30 bra 	$Lt_37_231170;
	@!%p25 bra 	$Lt_37_226306;
	.loc	20	259	0
	mov.u32 	%r77, 256;
	setp.ne.s32 	%p31, %r21, %r77;
	@%p31 bra 	$Lt_37_149506;
	.loc	20	261	0
	mov.u64 	%rd1, k601YCbCrFullRange_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__215_65;
$Lt_37_149506:
	.loc	20	265	0
	mov.u64 	%rd1, k601YCbCrFullRange_To_RGB8u;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__215_65;
$Lt_37_231170:
	mov.s32 	%r78, 256;
	setp.eq.s32 	%p27, %r19, %r78;
	@!%p25 bra 	$Lt_37_232194;
	mov.s32 	%r79, 256;
	setp.eq.s32 	%p32, %r21, %r79;
	@!%p27 bra 	$Lt_37_232706;
	@!%p32 bra 	$Lt_37_226306;
	.loc	20	302	0
	mov.u64 	%rd1, k601YPbPr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__215_65;
$Lt_37_232706:
	.loc	20	311	0
	@!%p32 bra 	$Lt_37_151298;
	.loc	20	313	0
	mov.u64 	%rd1, k601YCbCr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__215_65;
$Lt_37_151298:
	.loc	20	317	0
	mov.u64 	%rd1, k601YCbCr_To_RGB8u;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__215_65;
$Lt_37_232194:
	@!%p27 bra 	$Lt_37_233730;
	bra.uni 	$Lt_37_226306;
$Lt_37_233730:
	selp.s32 	%r80, 1, 0, %p30;
	mov.s32 	%r81, 256;
	set.eq.u32.s32 	%r82, %r21, %r81;
	neg.s32 	%r83, %r82;
	or.b32 	%r84, %r80, %r83;
	mov.u32 	%r85, 0;
	setp.eq.s32 	%p33, %r84, %r85;
	@%p33 bra 	$Lt_37_234242;
	bra.uni 	$Lt_37_226306;
$Lt_37_234242:
	.loc	20	350	0
	mov.u64 	%rd1, k601YCbCr_To_709YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__215_65;
$Lt_37_226306:
	.loc	20	355	0
	mov.u64 	%rd1, 0;
$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__215_65:
	.loc	20	490	0
	ld.global.f32 	%f46, [%rd1+16];
	mul.ftz.f32 	%f47, %f46, %f5;
	ld.global.f32 	%f48, [%rd1+12];
	fma.rn.ftz.f32 	%f49, %f48, %f4, %f47;
	ld.global.f32 	%f50, [%rd1+20];
	fma.rn.ftz.f32 	%f51, %f50, %f6, %f49;
	ld.global.f32 	%f52, [%rd1+28];
	mul.ftz.f32 	%f53, %f52, %f5;
	ld.global.f32 	%f54, [%rd1+24];
	fma.rn.ftz.f32 	%f55, %f54, %f4, %f53;
	ld.global.f32 	%f56, [%rd1+32];
	fma.rn.ftz.f32 	%f57, %f56, %f6, %f55;
	ld.global.f32 	%f58, [%rd1+4];
	mul.ftz.f32 	%f59, %f58, %f5;
	ld.global.f32 	%f60, [%rd1+0];
	fma.rn.ftz.f32 	%f61, %f60, %f4, %f59;
	ld.global.f32 	%f62, [%rd1+8];
	fma.rn.ftz.f32 	%f4, %f62, %f6, %f61;
	mov.f32 	%f5, %f51;
	mov.f32 	%f6, %f57;
	setp.eq.s32 	%p34, %r19, %r21;
	@%p34 bra 	$Lt_37_235010;
	.loc	20	494	0
	mov.s32 	%r86, 256;
	setp.eq.s32 	%p27, %r19, %r86;
	@!%p27 bra 	$L_37_220162;
	mov.s32 	%r87, 0;
	setp.eq.s32 	%p35, %r21, %r87;
	@%p35 bra 	$Lt_37_251394;
$L_37_220162:
	mov.s32 	%r88, 0;
	setp.eq.s32 	%p36, %r19, %r88;
	@!%p36 bra 	$Lt_37_251650;
	mov.u32 	%r89, 256;
	setp.ne.s32 	%p37, %r21, %r89;
	@%p37 bra 	$Lt_37_251650;
	mov.s32 	%r90, 0;
	setp.eq.s32 	%p35, %r21, %r90;
	bra.uni 	$L_37_219906;
$Lt_37_251394:
	mov.s32 	%r91, 0;
	setp.eq.s32 	%p36, %r19, %r91;
$L_37_219906:
	.loc	20	57	0
	@!%p35 bra 	$Lt_37_152834;
	.loc	20	59	0
	mov.f32 	%f63, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_63;
$Lt_37_152834:
	.loc	20	61	0
	mov.u32 	%r92, 64;
	setp.ne.s32 	%p38, %r21, %r92;
	@%p38 bra 	$Lt_37_153090;
	.loc	20	63	0
	mov.f32 	%f63, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_63;
$Lt_37_153090:
	.loc	20	65	0
	mov.u32 	%r93, 128;
	setp.ne.s32 	%p39, %r21, %r93;
	@%p39 bra 	$Lt_37_153346;
	.loc	20	68	0
	mov.f32 	%f63, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_63;
$Lt_37_153346:
	.loc	20	70	0
	mov.u32 	%r94, 192;
	setp.ne.s32 	%p40, %r21, %r94;
	@%p40 bra 	$Lt_37_153602;
	.loc	20	72	0
	mov.f32 	%f63, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_63;
$Lt_37_153602:
	.loc	20	76	0
	mov.f32 	%f63, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_63:
	.loc	20	57	0
	@!%p36 bra 	$Lt_37_153858;
	.loc	20	59	0
	mov.f32 	%f64, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_61;
$Lt_37_153858:
	.loc	20	61	0
	mov.u32 	%r95, 64;
	setp.ne.s32 	%p41, %r19, %r95;
	@%p41 bra 	$Lt_37_154114;
	.loc	20	63	0
	mov.f32 	%f64, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_61;
$Lt_37_154114:
	.loc	20	65	0
	mov.u32 	%r96, 128;
	setp.ne.s32 	%p42, %r19, %r96;
	@%p42 bra 	$Lt_37_154370;
	.loc	20	68	0
	mov.f32 	%f64, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_61;
$Lt_37_154370:
	.loc	20	70	0
	mov.u32 	%r97, 192;
	setp.ne.s32 	%p43, %r19, %r97;
	@%p43 bra 	$Lt_37_154626;
	.loc	20	72	0
	mov.f32 	%f64, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_61;
$Lt_37_154626:
	.loc	20	76	0
	mov.f32 	%f64, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_61:
	.loc	20	498	0
	div.approx.ftz.f32 	%f65, %f63, %f64;
	mul.ftz.f32 	%f2, %f2, %f65;
	bra.uni 	$Lt_37_235010;
$Lt_37_251650:
$L_37_219650:
	.loc	20	500	0
	@!%p27 bra 	$L_37_221186;
	@%p3 bra 	$L_37_220930;
$L_37_221186:
	@!%p2 bra 	$Lt_37_252674;
	mov.u32 	%r98, 256;
	setp.ne.s32 	%p44, %r21, %r98;
	@%p44 bra 	$Lt_37_252674;
$L_37_220930:
	.loc	20	57	0
	mov.u32 	%r99, 0;
	setp.ne.s32 	%p45, %r21, %r99;
	@%p45 bra 	$Lt_37_155138;
	.loc	20	59	0
	mov.f32 	%f63, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_59;
$Lt_37_155138:
	.loc	20	61	0
	mov.u32 	%r100, 64;
	setp.ne.s32 	%p46, %r21, %r100;
	@%p46 bra 	$Lt_37_155394;
	.loc	20	63	0
	mov.f32 	%f63, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_59;
$Lt_37_155394:
	.loc	20	65	0
	mov.u32 	%r101, 128;
	setp.ne.s32 	%p47, %r21, %r101;
	@%p47 bra 	$Lt_37_155650;
	.loc	20	68	0
	mov.f32 	%f63, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_59;
$Lt_37_155650:
	.loc	20	70	0
	mov.u32 	%r102, 192;
	setp.ne.s32 	%p48, %r21, %r102;
	@%p48 bra 	$Lt_37_155906;
	.loc	20	72	0
	mov.f32 	%f63, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_59;
$Lt_37_155906:
	.loc	20	76	0
	mov.f32 	%f63, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_59:
	.loc	20	57	0
	@!%p36 bra 	$Lt_37_156162;
	.loc	20	59	0
	mov.f32 	%f64, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_57;
$Lt_37_156162:
	.loc	20	61	0
	mov.u32 	%r103, 64;
	setp.ne.s32 	%p49, %r19, %r103;
	@%p49 bra 	$Lt_37_156418;
	.loc	20	63	0
	mov.f32 	%f64, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_57;
$Lt_37_156418:
	.loc	20	65	0
	mov.u32 	%r104, 128;
	setp.ne.s32 	%p50, %r19, %r104;
	@%p50 bra 	$Lt_37_156674;
	.loc	20	68	0
	mov.f32 	%f64, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_57;
$Lt_37_156674:
	.loc	20	70	0
	mov.u32 	%r105, 192;
	setp.ne.s32 	%p51, %r19, %r105;
	@%p51 bra 	$Lt_37_156930;
	.loc	20	72	0
	mov.f32 	%f64, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_57;
$Lt_37_156930:
	.loc	20	76	0
	mov.f32 	%f64, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_57:
	.loc	20	504	0
	div.approx.ftz.f32 	%f66, %f63, %f64;
	mul.ftz.f32 	%f2, %f2, %f66;
	.loc	20	57	0
	@!%p36 bra 	$Lt_37_158210;
	.loc	20	59	0
	mov.f32 	%f64, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_53;
$Lt_37_158210:
	.loc	20	61	0
	mov.u32 	%r106, 64;
	setp.ne.s32 	%p52, %r19, %r106;
	@%p52 bra 	$Lt_37_158466;
	.loc	20	63	0
	mov.f32 	%f64, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_53;
$Lt_37_158466:
	.loc	20	65	0
	mov.u32 	%r107, 128;
	setp.ne.s32 	%p53, %r19, %r107;
	@%p53 bra 	$Lt_37_158722;
	.loc	20	68	0
	mov.f32 	%f64, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_53;
$Lt_37_158722:
	.loc	20	70	0
	mov.u32 	%r108, 192;
	setp.ne.s32 	%p54, %r19, %r108;
	@%p54 bra 	$Lt_37_158978;
	.loc	20	72	0
	mov.f32 	%f64, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_53;
$Lt_37_158978:
	.loc	20	76	0
	mov.f32 	%f64, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_53:
	.loc	20	505	0
	mov.f32 	%f67, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f68, %f67, %f64;
	mul.ftz.f32 	%f4, %f4, %f68;
	.loc	20	57	0
	@!%p36 bra 	$Lt_37_160258;
	.loc	20	59	0
	mov.f32 	%f64, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_49;
$Lt_37_160258:
	.loc	20	61	0
	mov.u32 	%r109, 64;
	setp.ne.s32 	%p55, %r19, %r109;
	@%p55 bra 	$Lt_37_160514;
	.loc	20	63	0
	mov.f32 	%f64, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_49;
$Lt_37_160514:
	.loc	20	65	0
	mov.u32 	%r110, 128;
	setp.ne.s32 	%p56, %r19, %r110;
	@%p56 bra 	$Lt_37_160770;
	.loc	20	68	0
	mov.f32 	%f64, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_49;
$Lt_37_160770:
	.loc	20	70	0
	mov.u32 	%r111, 192;
	setp.ne.s32 	%p57, %r19, %r111;
	@%p57 bra 	$Lt_37_161026;
	.loc	20	72	0
	mov.f32 	%f64, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_49;
$Lt_37_161026:
	.loc	20	76	0
	mov.f32 	%f64, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_49:
	.loc	20	506	0
	mov.f32 	%f69, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f70, %f69, %f64;
	mul.ftz.f32 	%f5, %f51, %f70;
	.loc	20	57	0
	@!%p36 bra 	$Lt_37_162306;
	.loc	20	59	0
	mov.f32 	%f64, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_45;
$Lt_37_162306:
	.loc	20	61	0
	mov.u32 	%r112, 64;
	setp.ne.s32 	%p58, %r19, %r112;
	@%p58 bra 	$Lt_37_162562;
	.loc	20	63	0
	mov.f32 	%f64, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_45;
$Lt_37_162562:
	.loc	20	65	0
	mov.u32 	%r113, 128;
	setp.ne.s32 	%p59, %r19, %r113;
	@%p59 bra 	$Lt_37_162818;
	.loc	20	68	0
	mov.f32 	%f64, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_45;
$Lt_37_162818:
	.loc	20	70	0
	mov.u32 	%r114, 192;
	setp.ne.s32 	%p60, %r19, %r114;
	@%p60 bra 	$Lt_37_163074;
	.loc	20	72	0
	mov.f32 	%f64, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_45;
$Lt_37_163074:
	.loc	20	76	0
	mov.f32 	%f64, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_45:
	.loc	20	507	0
	mov.f32 	%f71, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f72, %f71, %f64;
	mul.ftz.f32 	%f6, %f57, %f72;
	bra.uni 	$Lt_37_235010;
$Lt_37_252674:
$L_37_220674:
	.loc	20	57	0
	@!%p36 bra 	$Lt_37_163330;
	.loc	20	59	0
	mov.f32 	%f63, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_43;
$Lt_37_163330:
	.loc	20	61	0
	mov.u32 	%r115, 64;
	setp.ne.s32 	%p61, %r19, %r115;
	@%p61 bra 	$Lt_37_163586;
	.loc	20	63	0
	mov.f32 	%f63, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_43;
$Lt_37_163586:
	.loc	20	65	0
	mov.u32 	%r116, 128;
	setp.ne.s32 	%p62, %r19, %r116;
	@%p62 bra 	$Lt_37_163842;
	.loc	20	68	0
	mov.f32 	%f63, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_43;
$Lt_37_163842:
	.loc	20	70	0
	mov.u32 	%r117, 192;
	setp.ne.s32 	%p63, %r19, %r117;
	@%p63 bra 	$Lt_37_164098;
	.loc	20	72	0
	mov.f32 	%f63, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_43;
$Lt_37_164098:
	.loc	20	76	0
	mov.f32 	%f63, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_43:
	.loc	20	511	0
	mov.f32 	%f73, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f74, %f63, %f73;
	mul.ftz.f32 	%f2, %f74, %f2;
	mul.ftz.f32 	%f4, %f74, %f4;
	mul.ftz.f32 	%f5, %f74, %f51;
	mul.ftz.f32 	%f6, %f74, %f57;
	bra.uni 	$Lt_37_235010;
$Lt_37_80898:
	.loc	20	486	0
	setp.eq.s32 	%p64, %r19, %r21;
	@%p64 bra 	$Lt_37_235010;
	.loc	20	57	0
	mov.u32 	%r118, 0;
	setp.ne.s32 	%p65, %r21, %r118;
	@%p65 bra 	$Lt_37_165634;
	.loc	20	59	0
	mov.f32 	%f63, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_39;
$Lt_37_165634:
	.loc	20	61	0
	mov.u32 	%r119, 64;
	setp.ne.s32 	%p66, %r21, %r119;
	@%p66 bra 	$Lt_37_165890;
	.loc	20	63	0
	mov.f32 	%f63, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_39;
$Lt_37_165890:
	.loc	20	65	0
	mov.u32 	%r120, 128;
	setp.ne.s32 	%p67, %r21, %r120;
	@%p67 bra 	$Lt_37_166146;
	.loc	20	68	0
	mov.f32 	%f63, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_39;
$Lt_37_166146:
	.loc	20	70	0
	mov.u32 	%r121, 192;
	setp.ne.s32 	%p68, %r21, %r121;
	@%p68 bra 	$Lt_37_166402;
	.loc	20	72	0
	mov.f32 	%f63, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_39;
$Lt_37_166402:
	.loc	20	76	0
	mov.f32 	%f63, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_39:
	.loc	20	57	0
	mov.u32 	%r122, 0;
	setp.ne.s32 	%p69, %r19, %r122;
	@%p69 bra 	$Lt_37_166658;
	.loc	20	59	0
	mov.f32 	%f64, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_37;
$Lt_37_166658:
	.loc	20	61	0
	mov.u32 	%r123, 64;
	setp.ne.s32 	%p70, %r19, %r123;
	@%p70 bra 	$Lt_37_166914;
	.loc	20	63	0
	mov.f32 	%f64, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_37;
$Lt_37_166914:
	.loc	20	65	0
	mov.u32 	%r124, 128;
	setp.ne.s32 	%p71, %r19, %r124;
	@%p71 bra 	$Lt_37_167170;
	.loc	20	68	0
	mov.f32 	%f64, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_37;
$Lt_37_167170:
	.loc	20	70	0
	mov.u32 	%r125, 192;
	setp.ne.s32 	%p72, %r19, %r125;
	@%p72 bra 	$Lt_37_167426;
	.loc	20	72	0
	mov.f32 	%f64, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_37;
$Lt_37_167426:
	.loc	20	76	0
	mov.f32 	%f64, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_37:
	.loc	20	517	0
	div.approx.ftz.f32 	%f75, %f63, %f64;
	mul.ftz.f32 	%f2, %f75, %f2;
	mul.ftz.f32 	%f4, %f75, %f4;
	mul.ftz.f32 	%f5, %f75, %f5;
	mul.ftz.f32 	%f6, %f75, %f6;
$Lt_37_235010:
$Lt_37_83202:
	.loc	20	520	0
	mov.u32 	%r126, 0;
	setp.eq.s32 	%p73, %r40, %r126;
	@%p73 bra 	$Lt_37_253186;
	mov.u32 	%r127, 0;
	setp.ne.s32 	%p74, %r39, %r127;
	@%p74 bra 	$Lt_37_253186;
	.loc	20	522	0
	mov.f32 	%f76, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p75, %f4, %f76;
	@!%p75 bra 	$Lt_37_235522;
	.loc	20	372	0
	neg.ftz.f32 	%f77, %f4;
	lg2.approx.ftz.f32 	%f78, %f77;
	mov.f32 	%f79, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f80, %f78, %f79;
	ex2.approx.ftz.f32 	%f81, %f80;
	neg.ftz.f32 	%f82, %f81;
	bra.uni 	$LDWendi___log2f_215_35;
$Lt_37_235522:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f83, %f4;
	mov.f32 	%f84, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f85, %f83, %f84;
	ex2.approx.ftz.f32 	%f82, %f85;
$LDWendi___log2f_215_35:
	.loc	20	522	0
	mov.f32 	%f86, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p76, %f5, %f86;
	@!%p76 bra 	$Lt_37_236034;
	.loc	20	372	0
	neg.ftz.f32 	%f87, %f5;
	lg2.approx.ftz.f32 	%f88, %f87;
	mov.f32 	%f89, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f90, %f88, %f89;
	ex2.approx.ftz.f32 	%f91, %f90;
	neg.ftz.f32 	%f92, %f91;
	bra.uni 	$LDWendi___log2f_215_33;
$Lt_37_236034:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f93, %f5;
	mov.f32 	%f94, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f95, %f93, %f94;
	ex2.approx.ftz.f32 	%f92, %f95;
$LDWendi___log2f_215_33:
	.loc	20	522	0
	mov.f32 	%f96, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p77, %f6, %f96;
	@!%p77 bra 	$Lt_37_236546;
	.loc	20	372	0
	neg.ftz.f32 	%f97, %f6;
	lg2.approx.ftz.f32 	%f98, %f97;
	mov.f32 	%f99, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f100, %f98, %f99;
	ex2.approx.ftz.f32 	%f101, %f100;
	neg.ftz.f32 	%f102, %f101;
	bra.uni 	$LDWendi___log2f_215_31;
$Lt_37_236546:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f103, %f6;
	mov.f32 	%f104, 0f400e38e4;   	// 2.22222
	mul.ftz.f32 	%f105, %f103, %f104;
	ex2.approx.ftz.f32 	%f102, %f105;
$LDWendi___log2f_215_31:
	.loc	20	522	0
	mov.f32 	%f4, %f82;
	mov.f32 	%f5, %f92;
	mov.f32 	%f6, %f102;
$Lt_37_253186:
$Lt_37_85250:
	.loc	20	525	0
	mov.u32 	%r128, 0;
	setp.eq.s32 	%p78, %r30, %r128;
	@%p78 bra 	$Lt_37_253698;
	mov.u32 	%r129, 0;
	setp.ne.s32 	%p79, %r29, %r129;
	@%p79 bra 	$Lt_37_253698;
	.loc	20	57	0
	mov.u32 	%r130, 0;
	setp.ne.s32 	%p80, %r21, %r130;
	@%p80 bra 	$Lt_37_168450;
	.loc	20	59	0
	mov.f32 	%f106, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_29;
$Lt_37_168450:
	.loc	20	61	0
	mov.u32 	%r131, 64;
	setp.ne.s32 	%p81, %r21, %r131;
	@%p81 bra 	$Lt_37_168706;
	.loc	20	63	0
	mov.f32 	%f106, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_29;
$Lt_37_168706:
	.loc	20	65	0
	mov.u32 	%r132, 128;
	setp.ne.s32 	%p82, %r21, %r132;
	@%p82 bra 	$Lt_37_168962;
	.loc	20	68	0
	mov.f32 	%f106, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_29;
$Lt_37_168962:
	.loc	20	70	0
	mov.u32 	%r133, 192;
	setp.ne.s32 	%p83, %r21, %r133;
	@%p83 bra 	$Lt_37_169218;
	.loc	20	72	0
	mov.f32 	%f106, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_29;
$Lt_37_169218:
	.loc	20	76	0
	mov.f32 	%f106, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_29:
	.loc	20	107	0
	and.b32 	%r134, %r12, 2048;
	mov.s32 	%r135, 0;
	setp.ne.s32 	%p84, %r134, %r135;
	@!%p84 bra 	$Lt_37_237314;
	.loc	20	100	0
	ld.const.f32 	%f107, [kYCbCrOffset+0];
	bra.uni 	$Lt_37_237058;
$Lt_37_237314:
	ld.const.f32 	%f107, [kYCbCrFullRangeOffset+0];
$Lt_37_237058:
	.loc	20	107	0
	@!%p84 bra 	$Lt_37_237826;
	.loc	20	100	0
	ld.const.f32 	%f108, [kYCbCrOffset+4];
	bra.uni 	$Lt_37_237570;
$Lt_37_237826:
	ld.const.f32 	%f108, [kYCbCrFullRangeOffset+4];
$Lt_37_237570:
	.loc	20	107	0
	@!%p84 bra 	$Lt_37_238338;
	.loc	20	100	0
	ld.const.f32 	%f109, [kYCbCrOffset+8];
	bra.uni 	$Lt_37_238082;
$Lt_37_238338:
	ld.const.f32 	%f109, [kYCbCrFullRangeOffset+8];
$Lt_37_238082:
	.loc	20	527	0
	mov.f32 	%f110, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f111, %f106, %f110;
	fma.rn.ftz.f32 	%f4, %f111, %f107, %f4;
	fma.rn.ftz.f32 	%f5, %f111, %f108, %f5;
	fma.rn.ftz.f32 	%f6, %f111, %f109, %f6;
$Lt_37_253698:
$Lt_37_91650:
	.loc	20	525	0
	and.b32 	%r136, %r10, 12;
	and.b32 	%r137, %r12, 12;
	setp.eq.s32 	%p85, %r136, %r137;
	@%p85 bra 	$Lt_37_239106;
	.loc	20	532	0
	mov.u32 	%r138, 8;
	setp.ne.s32 	%p86, %r136, %r138;
	@%p86 bra 	$L_37_222466;
	mov.u32 	%r139, 12;
	setp.eq.s32 	%p87, %r137, %r139;
	@%p87 bra 	$Lt_37_254466;
$L_37_222466:
	mov.u32 	%r140, 12;
	setp.eq.s32 	%p88, %r136, %r140;
	@%p88 bra 	$Lt_37_254466;
	mov.u32 	%r141, 0;
	setp.ne.s32 	%p89, %r136, %r141;
	@%p89 bra 	$L_37_221698;
$Lt_37_254466:
$L_37_221954:
	.loc	20	57	0
	mov.u32 	%r142, 0;
	setp.ne.s32 	%p90, %r21, %r142;
	@%p90 bra 	$Lt_37_171778;
	.loc	20	59	0
	mov.f32 	%f112, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_25;
$Lt_37_171778:
	.loc	20	61	0
	mov.u32 	%r143, 64;
	setp.ne.s32 	%p91, %r21, %r143;
	@%p91 bra 	$Lt_37_172034;
	.loc	20	63	0
	mov.f32 	%f112, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_25;
$Lt_37_172034:
	.loc	20	65	0
	mov.u32 	%r144, 128;
	setp.ne.s32 	%p92, %r21, %r144;
	@%p92 bra 	$Lt_37_172290;
	.loc	20	68	0
	mov.f32 	%f112, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_25;
$Lt_37_172290:
	.loc	20	70	0
	mov.u32 	%r145, 192;
	setp.ne.s32 	%p93, %r21, %r145;
	@%p93 bra 	$Lt_37_172546;
	.loc	20	72	0
	mov.f32 	%f112, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_25;
$Lt_37_172546:
	.loc	20	76	0
	mov.f32 	%f112, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_25:
	.loc	20	536	0
	mov.f32 	%f2, %f112;
	bra.uni 	$Lt_37_239106;
$L_37_221698:
	.loc	20	540	0
	mov.s32 	%r146, 12;
	setp.eq.s32 	%p94, %r137, %r146;
	mov.s32 	%r147, 4;
	set.eq.u32.s32 	%r148, %r136, %r147;
	neg.s32 	%r149, %r148;
	selp.s32 	%r150, 1, 0, %p94;
	mov.s32 	%r151, 8;
	set.eq.u32.s32 	%r152, %r137, %r151;
	neg.s32 	%r153, %r152;
	or.b32 	%r154, %r150, %r153;
	and.b32 	%r155, %r149, %r154;
	mov.u32 	%r156, 0;
	setp.eq.s32 	%p95, %r155, %r156;
	@%p95 bra 	$Lt_37_239362;
	.loc	20	410	0
	mov.f32 	%f113, %f4;
	mov.f32 	%f114, %f113;
	mov.f32 	%f115, %f5;
	mov.f32 	%f116, %f115;
	mov.f32 	%f117, %f6;
	mov.f32 	%f118, %f117;
	.loc	20	57	0
	mov.s32 	%r157, 0;
	setp.eq.s32 	%p35, %r21, %r157;
	@!%p35 bra 	$Lt_37_173058;
	.loc	20	59	0
	mov.f32 	%f119, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_23;
$Lt_37_173058:
	.loc	20	61	0
	mov.u32 	%r158, 64;
	setp.ne.s32 	%p96, %r21, %r158;
	@%p96 bra 	$Lt_37_173314;
	.loc	20	63	0
	mov.f32 	%f119, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_23;
$Lt_37_173314:
	.loc	20	65	0
	mov.u32 	%r159, 128;
	setp.ne.s32 	%p97, %r21, %r159;
	@%p97 bra 	$Lt_37_173570;
	.loc	20	68	0
	mov.f32 	%f119, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_23;
$Lt_37_173570:
	.loc	20	70	0
	mov.u32 	%r160, 192;
	setp.ne.s32 	%p98, %r21, %r160;
	@%p98 bra 	$Lt_37_173826;
	.loc	20	72	0
	mov.f32 	%f119, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_23;
$Lt_37_173826:
	.loc	20	76	0
	mov.f32 	%f119, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_23:
	.loc	20	413	0
	mov.u32 	%r161, 0;
	setp.eq.s32 	%p99, %r30, %r161;
	@%p99 bra 	$Lt_37_239618;
	.loc	20	57	0
	@!%p35 bra 	$Lt_37_174338;
	.loc	20	59	0
	mov.f32 	%f120, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_21;
$Lt_37_174338:
	.loc	20	61	0
	mov.u32 	%r162, 64;
	setp.ne.s32 	%p100, %r21, %r162;
	@%p100 bra 	$Lt_37_174594;
	.loc	20	63	0
	mov.f32 	%f120, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_21;
$Lt_37_174594:
	.loc	20	65	0
	mov.u32 	%r163, 128;
	setp.ne.s32 	%p101, %r21, %r163;
	@%p101 bra 	$Lt_37_174850;
	.loc	20	68	0
	mov.f32 	%f120, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_21;
$Lt_37_174850:
	.loc	20	70	0
	mov.u32 	%r164, 192;
	setp.ne.s32 	%p102, %r21, %r164;
	@%p102 bra 	$Lt_37_175106;
	.loc	20	72	0
	mov.f32 	%f120, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_21;
$Lt_37_175106:
	.loc	20	76	0
	mov.f32 	%f120, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_21:
	.loc	20	118	0
	and.b32 	%r134, %r12, 2048;
	mov.s32 	%r165, 0;
	setp.ne.s32 	%p84, %r134, %r165;
	@!%p84 bra 	$Lt_37_240386;
	.loc	20	100	0
	ld.const.f32 	%f121, [kYCbCrOffset+0];
	bra.uni 	$Lt_37_240130;
$Lt_37_240386:
	ld.const.f32 	%f121, [kYCbCrFullRangeOffset+0];
$Lt_37_240130:
	.loc	20	118	0
	@!%p84 bra 	$Lt_37_240898;
	.loc	20	100	0
	ld.const.f32 	%f122, [kYCbCrOffset+4];
	bra.uni 	$Lt_37_240642;
$Lt_37_240898:
	ld.const.f32 	%f122, [kYCbCrFullRangeOffset+4];
$Lt_37_240642:
	.loc	20	118	0
	@!%p84 bra 	$Lt_37_241410;
	.loc	20	100	0
	ld.const.f32 	%f123, [kYCbCrOffset+8];
	bra.uni 	$Lt_37_241154;
$Lt_37_241410:
	ld.const.f32 	%f123, [kYCbCrFullRangeOffset+8];
$Lt_37_241154:
	.loc	20	415	0
	mov.f32 	%f124, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f125, %f120, %f124;
	mul.ftz.f32 	%f126, %f125, %f121;
	sub.ftz.f32 	%f114, %f113, %f126;
	mul.ftz.f32 	%f127, %f125, %f122;
	sub.ftz.f32 	%f116, %f115, %f127;
	mul.ftz.f32 	%f128, %f125, %f123;
	sub.ftz.f32 	%f118, %f117, %f128;
$Lt_37_239618:
	.loc	20	418	0
	rcp.approx.ftz.f32 	%f129, %f119;
	mul.ftz.f32 	%f130, %f129, %f2;
	mul.ftz.f32 	%f114, %f130, %f114;
	.loc	20	419	0
	mul.ftz.f32 	%f116, %f130, %f116;
	.loc	20	420	0
	mul.ftz.f32 	%f118, %f130, %f118;
	.loc	20	422	0
	mov.u32 	%r166, 0;
	setp.eq.s32 	%p103, %r30, %r166;
	@%p103 bra 	$Lt_37_241666;
	.loc	20	57	0
	@!%p35 bra 	$Lt_37_177410;
	.loc	20	59	0
	mov.f32 	%f131, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_17;
$Lt_37_177410:
	.loc	20	61	0
	mov.u32 	%r167, 64;
	setp.ne.s32 	%p104, %r21, %r167;
	@%p104 bra 	$Lt_37_177666;
	.loc	20	63	0
	mov.f32 	%f131, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_17;
$Lt_37_177666:
	.loc	20	65	0
	mov.u32 	%r168, 128;
	setp.ne.s32 	%p105, %r21, %r168;
	@%p105 bra 	$Lt_37_177922;
	.loc	20	68	0
	mov.f32 	%f131, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_17;
$Lt_37_177922:
	.loc	20	70	0
	mov.u32 	%r169, 192;
	setp.ne.s32 	%p106, %r21, %r169;
	@%p106 bra 	$Lt_37_178178;
	.loc	20	72	0
	mov.f32 	%f131, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_17;
$Lt_37_178178:
	.loc	20	76	0
	mov.f32 	%f131, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_17:
	.loc	20	107	0
	and.b32 	%r134, %r12, 2048;
	mov.s32 	%r170, 0;
	setp.ne.s32 	%p84, %r134, %r170;
	@!%p84 bra 	$Lt_37_242434;
	.loc	20	100	0
	ld.const.f32 	%f132, [kYCbCrOffset+0];
	bra.uni 	$Lt_37_242178;
$Lt_37_242434:
	ld.const.f32 	%f132, [kYCbCrFullRangeOffset+0];
$Lt_37_242178:
	.loc	20	107	0
	@!%p84 bra 	$Lt_37_242946;
	.loc	20	100	0
	ld.const.f32 	%f133, [kYCbCrOffset+4];
	bra.uni 	$Lt_37_242690;
$Lt_37_242946:
	ld.const.f32 	%f133, [kYCbCrFullRangeOffset+4];
$Lt_37_242690:
	.loc	20	107	0
	@!%p84 bra 	$Lt_37_243458;
	.loc	20	100	0
	ld.const.f32 	%f134, [kYCbCrOffset+8];
	bra.uni 	$Lt_37_243202;
$Lt_37_243458:
	ld.const.f32 	%f134, [kYCbCrFullRangeOffset+8];
$Lt_37_243202:
	.loc	20	424	0
	mov.f32 	%f135, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f136, %f131, %f135;
	fma.rn.ftz.f32 	%f114, %f136, %f132, %f114;
	fma.rn.ftz.f32 	%f116, %f136, %f133, %f116;
	fma.rn.ftz.f32 	%f118, %f136, %f134, %f118;
$Lt_37_241666:
	.loc	20	543	0
	mov.f32 	%f4, %f114;
	mov.f32 	%f5, %f116;
	mov.f32 	%f6, %f118;
	@!%p94 bra 	$Lt_37_239106;
	.loc	20	57	0
	@!%p35 bra 	$Lt_37_180482;
	.loc	20	59	0
	mov.f32 	%f112, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_13;
$Lt_37_180482:
	.loc	20	61	0
	mov.u32 	%r171, 64;
	setp.ne.s32 	%p107, %r21, %r171;
	@%p107 bra 	$Lt_37_180738;
	.loc	20	63	0
	mov.f32 	%f112, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_13;
$Lt_37_180738:
	.loc	20	65	0
	mov.u32 	%r172, 128;
	setp.ne.s32 	%p108, %r21, %r172;
	@%p108 bra 	$Lt_37_180994;
	.loc	20	68	0
	mov.f32 	%f112, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_13;
$Lt_37_180994:
	.loc	20	70	0
	mov.u32 	%r173, 192;
	setp.ne.s32 	%p109, %r21, %r173;
	@%p109 bra 	$Lt_37_181250;
	.loc	20	72	0
	mov.f32 	%f112, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_13;
$Lt_37_181250:
	.loc	20	76	0
	mov.f32 	%f112, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_13:
	.loc	20	546	0
	mov.f32 	%f2, %f112;
	bra.uni 	$Lt_37_239106;
$Lt_37_239362:
	.loc	20	433	0
	mov.f32 	%f137, %f2;
	mov.f32 	%f138, %f4;
	mov.f32 	%f139, %f138;
	mov.f32 	%f140, %f5;
	mov.f32 	%f141, %f140;
	mov.f32 	%f142, %f6;
	mov.f32 	%f143, %f142;
	.loc	20	435	0
	mov.u32 	%r174, 0;
	setp.eq.s32 	%p110, %r30, %r174;
	@%p110 bra 	$Lt_37_244226;
	.loc	20	57	0
	mov.u32 	%r175, 0;
	setp.ne.s32 	%p111, %r21, %r175;
	@%p111 bra 	$Lt_37_181762;
	.loc	20	59	0
	mov.f32 	%f144, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_11;
$Lt_37_181762:
	.loc	20	61	0
	mov.u32 	%r176, 64;
	setp.ne.s32 	%p112, %r21, %r176;
	@%p112 bra 	$Lt_37_182018;
	.loc	20	63	0
	mov.f32 	%f144, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_11;
$Lt_37_182018:
	.loc	20	65	0
	mov.u32 	%r177, 128;
	setp.ne.s32 	%p113, %r21, %r177;
	@%p113 bra 	$Lt_37_182274;
	.loc	20	68	0
	mov.f32 	%f144, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_11;
$Lt_37_182274:
	.loc	20	70	0
	mov.u32 	%r178, 192;
	setp.ne.s32 	%p114, %r21, %r178;
	@%p114 bra 	$Lt_37_182530;
	.loc	20	72	0
	mov.f32 	%f144, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_11;
$Lt_37_182530:
	.loc	20	76	0
	mov.f32 	%f144, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_11:
	.loc	20	118	0
	and.b32 	%r134, %r12, 2048;
	mov.s32 	%r179, 0;
	setp.ne.s32 	%p84, %r134, %r179;
	@!%p84 bra 	$Lt_37_244994;
	.loc	20	100	0
	ld.const.f32 	%f145, [kYCbCrOffset+0];
	bra.uni 	$Lt_37_244738;
$Lt_37_244994:
	ld.const.f32 	%f145, [kYCbCrFullRangeOffset+0];
$Lt_37_244738:
	.loc	20	118	0
	@!%p84 bra 	$Lt_37_245506;
	.loc	20	100	0
	ld.const.f32 	%f146, [kYCbCrOffset+4];
	bra.uni 	$Lt_37_245250;
$Lt_37_245506:
	ld.const.f32 	%f146, [kYCbCrFullRangeOffset+4];
$Lt_37_245250:
	.loc	20	118	0
	@!%p84 bra 	$Lt_37_246018;
	.loc	20	100	0
	ld.const.f32 	%f147, [kYCbCrOffset+8];
	bra.uni 	$Lt_37_245762;
$Lt_37_246018:
	ld.const.f32 	%f147, [kYCbCrFullRangeOffset+8];
$Lt_37_245762:
	.loc	20	437	0
	mov.f32 	%f148, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f149, %f144, %f148;
	mul.ftz.f32 	%f150, %f149, %f145;
	sub.ftz.f32 	%f139, %f138, %f150;
	mul.ftz.f32 	%f151, %f149, %f146;
	sub.ftz.f32 	%f141, %f140, %f151;
	mul.ftz.f32 	%f152, %f149, %f147;
	sub.ftz.f32 	%f143, %f142, %f152;
$Lt_37_244226:
	mov.f32 	%f153, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f154, %f2, %f153;
	mov.f32 	%f155, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p115, %f154, %f155;
	@!%p115 bra 	$Lt_37_246530;
	mov.f32 	%f143, 0f00000000;   	// 0
	mov.f32 	%f141, 0f00000000;   	// 0
	mov.f32 	%f139, 0f00000000;   	// 0
	mov.f32 	%f137, 0f00000000;   	// 0
	bra.uni 	$Lt_37_246274;
$Lt_37_246530:
	.loc	20	57	0
	mov.u32 	%r180, 0;
	setp.ne.s32 	%p116, %r21, %r180;
	@%p116 bra 	$Lt_37_184834;
	.loc	20	59	0
	mov.f32 	%f156, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_7;
$Lt_37_184834:
	.loc	20	61	0
	mov.u32 	%r181, 64;
	setp.ne.s32 	%p117, %r21, %r181;
	@%p117 bra 	$Lt_37_185090;
	.loc	20	63	0
	mov.f32 	%f156, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_7;
$Lt_37_185090:
	.loc	20	65	0
	mov.u32 	%r182, 128;
	setp.ne.s32 	%p118, %r21, %r182;
	@%p118 bra 	$Lt_37_185346;
	.loc	20	68	0
	mov.f32 	%f156, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_7;
$Lt_37_185346:
	.loc	20	70	0
	mov.u32 	%r183, 192;
	setp.ne.s32 	%p119, %r21, %r183;
	@%p119 bra 	$Lt_37_185602;
	.loc	20	72	0
	mov.f32 	%f156, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_7;
$Lt_37_185602:
	.loc	20	76	0
	mov.f32 	%f156, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_7:
	.loc	20	447	0
	div.approx.ftz.f32 	%f157, %f156, %f2;
	mul.ftz.f32 	%f139, %f157, %f139;
	.loc	20	448	0
	mul.ftz.f32 	%f141, %f157, %f141;
	.loc	20	449	0
	mul.ftz.f32 	%f143, %f157, %f143;
$Lt_37_246274:
	.loc	20	452	0
	mov.u32 	%r184, 0;
	setp.eq.s32 	%p120, %r30, %r184;
	@%p120 bra 	$Lt_37_246786;
	.loc	20	57	0
	mov.u32 	%r185, 0;
	setp.ne.s32 	%p121, %r21, %r185;
	@%p121 bra 	$Lt_37_186114;
	.loc	20	59	0
	mov.f32 	%f158, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_5;
$Lt_37_186114:
	.loc	20	61	0
	mov.u32 	%r186, 64;
	setp.ne.s32 	%p122, %r21, %r186;
	@%p122 bra 	$Lt_37_186370;
	.loc	20	63	0
	mov.f32 	%f158, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_5;
$Lt_37_186370:
	.loc	20	65	0
	mov.u32 	%r187, 128;
	setp.ne.s32 	%p123, %r21, %r187;
	@%p123 bra 	$Lt_37_186626;
	.loc	20	68	0
	mov.f32 	%f158, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_5;
$Lt_37_186626:
	.loc	20	70	0
	mov.u32 	%r188, 192;
	setp.ne.s32 	%p124, %r21, %r188;
	@%p124 bra 	$Lt_37_186882;
	.loc	20	72	0
	mov.f32 	%f158, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_5;
$Lt_37_186882:
	.loc	20	76	0
	mov.f32 	%f158, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_5:
	.loc	20	107	0
	and.b32 	%r134, %r12, 2048;
	mov.s32 	%r189, 0;
	setp.ne.s32 	%p84, %r134, %r189;
	@!%p84 bra 	$Lt_37_247554;
	.loc	20	100	0
	ld.const.f32 	%f159, [kYCbCrOffset+0];
	bra.uni 	$Lt_37_247298;
$Lt_37_247554:
	ld.const.f32 	%f159, [kYCbCrFullRangeOffset+0];
$Lt_37_247298:
	.loc	20	107	0
	@!%p84 bra 	$Lt_37_248066;
	.loc	20	100	0
	ld.const.f32 	%f160, [kYCbCrOffset+4];
	bra.uni 	$Lt_37_247810;
$Lt_37_248066:
	ld.const.f32 	%f160, [kYCbCrFullRangeOffset+4];
$Lt_37_247810:
	.loc	20	107	0
	@!%p84 bra 	$Lt_37_248578;
	.loc	20	100	0
	ld.const.f32 	%f161, [kYCbCrOffset+8];
	bra.uni 	$Lt_37_248322;
$Lt_37_248578:
	ld.const.f32 	%f161, [kYCbCrFullRangeOffset+8];
$Lt_37_248322:
	.loc	20	454	0
	mov.f32 	%f162, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f163, %f158, %f162;
	fma.rn.ftz.f32 	%f139, %f163, %f159, %f139;
	fma.rn.ftz.f32 	%f141, %f163, %f160, %f141;
	fma.rn.ftz.f32 	%f143, %f163, %f161, %f143;
$Lt_37_246786:
	.loc	20	551	0
	mov.f32 	%f2, %f137;
	mov.f32 	%f4, %f139;
	mov.f32 	%f5, %f141;
	mov.f32 	%f6, %f143;
$Lt_37_239106:
$L_37_221442:
$Lt_37_238594:
	.loc	20	540	0
	and.b32 	%r190, %r12, 4096;
	mov.u32 	%r191, 0;
	setp.ne.s32 	%p125, %r190, %r191;
	@%p125 bra 	$Lt_37_248834;
	.loc	21	268	0
	mov.f32 	%f164, %f4;
	.loc	21	269	0
	mov.f32 	%f165, %f2;
	.loc	20	558	0
	mov.f32 	%f2, %f6;
	mov.f32 	%f4, %f5;
	mov.f32 	%f5, %f164;
	mov.f32 	%f6, %f165;
$Lt_37_248834:
	@!%p3 bra 	$Lt_37_249346;
	.loc	20	57	0
	mov.u32 	%r192, 0;
	setp.ne.s32 	%p126, %r21, %r192;
	@%p126 bra 	$Lt_37_189442;
	.loc	20	59	0
	mov.f32 	%f166, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_1;
$Lt_37_189442:
	.loc	20	61	0
	mov.u32 	%r193, 64;
	setp.ne.s32 	%p127, %r21, %r193;
	@%p127 bra 	$Lt_37_189698;
	.loc	20	63	0
	mov.f32 	%f166, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_1;
$Lt_37_189698:
	.loc	20	65	0
	mov.u32 	%r194, 128;
	setp.ne.s32 	%p128, %r21, %r194;
	@%p128 bra 	$Lt_37_189954;
	.loc	20	68	0
	mov.f32 	%f166, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_1;
$Lt_37_189954:
	.loc	20	70	0
	mov.u32 	%r195, 192;
	setp.ne.s32 	%p129, %r21, %r195;
	@%p129 bra 	$Lt_37_190210;
	.loc	20	72	0
	mov.f32 	%f166, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_215_1;
$Lt_37_190210:
	.loc	20	76	0
	mov.f32 	%f166, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_215_1:
	.loc	20	564	0
	mov.f32 	%f167, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f168, %f2, %f167;
	mov.f32 	%f169, 0f00000000;   	// 0
	mov.f32 	%f170, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p130, %f168, %f170;
	selp.f32 	%f171, %f168, %f169, %p130;
	min.ftz.f32 	%f2, %f171, %f166;
	mov.f32 	%f172, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f173, %f4, %f172;
	mov.f32 	%f174, 0f00000000;   	// 0
	mov.f32 	%f175, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p131, %f173, %f175;
	selp.f32 	%f176, %f173, %f174, %p131;
	min.ftz.f32 	%f4, %f176, %f166;
	mov.f32 	%f177, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f178, %f5, %f177;
	mov.f32 	%f179, 0f00000000;   	// 0
	mov.f32 	%f180, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p132, %f178, %f180;
	selp.f32 	%f181, %f178, %f179, %p132;
	min.ftz.f32 	%f5, %f181, %f166;
	mov.f32 	%f182, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f183, %f6, %f182;
	mov.f32 	%f184, 0f00000000;   	// 0
	mov.f32 	%f185, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p133, %f183, %f185;
	selp.f32 	%f186, %f183, %f184, %p133;
	min.ftz.f32 	%f6, %f186, %f166;
$Lt_37_249346:
	.loc	22	35	0
	mov.f32 	%f187, %f2;
	st.param.f32 	[__cudaretf__Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_+0], %f187;
	mov.f32 	%f188, %f4;
	st.param.f32 	[__cudaretf__Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_+4], %f188;
	mov.f32 	%f189, %f5;
	st.param.f32 	[__cudaretf__Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_+8], %f189;
	mov.f32 	%f190, %f6;
	st.param.f32 	[__cudaretf__Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_+12], %f190;
	ret;
$LDWend__Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_:
	} // _Z30ConvertPixel_444_8u_To_444_32f6uchar414IR_PixelFormatS0_

	.visible .func (.param .align 16 .b8 __cudaretf__Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_[16]) _Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_ (.param .align 8 .b8 __cudaparmf1__Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_[8], .param .s32 __cudaparmf2__Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_, .param .s32 __cudaparmf3__Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_)
	{
	.reg .u32 %r<197>;
	.reg .u64 %rd<3>;
	.reg .f32 %f<192>;
	.reg .pred %p<135>;
	.loc	22	44	0
$LDWbegin__Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_:
	ld.param.u16 	%r1, [__cudaparmf1__Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_+0];
	mov.s32 	%r2, %r1;
	ld.param.u16 	%r3, [__cudaparmf1__Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_+2];
	mov.s32 	%r4, %r3;
	ld.param.u16 	%r5, [__cudaparmf1__Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_+4];
	mov.s32 	%r6, %r5;
	ld.param.u16 	%r7, [__cudaparmf1__Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_+6];
	mov.s32 	%r8, %r7;
	ld.param.u32 	%r9, [__cudaparmf2__Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_];
	mov.s32 	%r10, %r9;
	ld.param.u32 	%r11, [__cudaparmf3__Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_];
	mov.s32 	%r12, %r11;
	.loc	20	469	0
	cvt.u16.u32 	%r13, %r2;
	cvt.rn.f32.u32 	%f1, %r13;
	mov.f32 	%f2, %f1;
	cvt.u16.u32 	%r14, %r4;
	cvt.rn.f32.u32 	%f3, %r14;
	mov.f32 	%f4, %f3;
	cvt.u16.u32 	%r15, %r6;
	cvt.rn.f32.u32 	%f5, %r15;
	cvt.u16.u32 	%r16, %r8;
	cvt.rn.f32.u32 	%f6, %r16;
	and.b32 	%r17, %r10, 4096;
	mov.u32 	%r18, 0;
	setp.ne.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_38_222722;
	.loc	20	473	0
	mov.f32 	%f2, %f6;
	mov.f32 	%f4, %f5;
	mov.f32 	%f5, %f3;
	mov.f32 	%f6, %f1;
$Lt_38_222722:
	.loc	20	476	0
	and.b32 	%r19, %r10, 448;
	mov.s32 	%r20, %r10;
	and.b32 	%r21, %r12, 448;
	mov.s32 	%r22, %r12;
	mov.s32 	%r23, 256;
	setp.ne.s32 	%p2, %r19, %r23;
	and.b32 	%r24, %r20, 1;
	mov.s32 	%r25, 256;
	setp.ne.s32 	%p3, %r21, %r25;
	and.b32 	%r26, %r22, 1;
	selp.s32 	%r27, 1, 0, %p2;
	selp.s32 	%r28, 1, 0, %p3;
	and.b32 	%r29, %r24, %r27;
	and.b32 	%r30, %r26, %r28;
	mov.u32 	%r31, 0;
	setp.eq.s32 	%p4, %r29, %r31;
	@%p4 bra 	$Lt_38_249858;
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p5, %r30, %r32;
	@%p5 bra 	$Lt_38_249858;
	.loc	20	57	0
	mov.u32 	%r33, 0;
	setp.ne.s32 	%p6, %r19, %r33;
	@%p6 bra 	$Lt_38_140802;
	.loc	20	59	0
	mov.f32 	%f7, 0f437f0000;     	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_75;
$Lt_38_140802:
	.loc	20	61	0
	mov.u32 	%r34, 64;
	setp.ne.s32 	%p7, %r19, %r34;
	@%p7 bra 	$Lt_38_141058;
	.loc	20	63	0
	mov.f32 	%f7, 0f447fc000;     	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_75;
$Lt_38_141058:
	.loc	20	65	0
	mov.u32 	%r35, 128;
	setp.ne.s32 	%p8, %r19, %r35;
	@%p8 bra 	$Lt_38_141314;
	.loc	20	68	0
	mov.f32 	%f7, 0f47000000;     	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_75;
$Lt_38_141314:
	.loc	20	70	0
	mov.u32 	%r36, 192;
	setp.ne.s32 	%p9, %r19, %r36;
	@%p9 bra 	$Lt_38_141570;
	.loc	20	72	0
	mov.f32 	%f7, 0fbf800000;     	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_75;
$Lt_38_141570:
	.loc	20	76	0
	mov.f32 	%f7, 0f3f800000;     	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_75:
	.loc	20	118	0
	and.b32 	%r37, %r10, 2048;
	mov.s32 	%r38, 0;
	setp.ne.s32 	%p10, %r37, %r38;
	@!%p10 bra 	$Lt_38_223490;
	.loc	20	100	0
	ld.const.f32 	%f8, [kYCbCrOffset+0];
	bra.uni 	$Lt_38_223234;
$Lt_38_223490:
	ld.const.f32 	%f8, [kYCbCrFullRangeOffset+0];
$Lt_38_223234:
	.loc	20	118	0
	@!%p10 bra 	$Lt_38_224002;
	.loc	20	100	0
	ld.const.f32 	%f9, [kYCbCrOffset+4];
	bra.uni 	$Lt_38_223746;
$Lt_38_224002:
	ld.const.f32 	%f9, [kYCbCrFullRangeOffset+4];
$Lt_38_223746:
	.loc	20	118	0
	@!%p10 bra 	$Lt_38_224514;
	.loc	20	100	0
	ld.const.f32 	%f10, [kYCbCrOffset+8];
	bra.uni 	$Lt_38_224258;
$Lt_38_224514:
	ld.const.f32 	%f10, [kYCbCrFullRangeOffset+8];
$Lt_38_224258:
	.loc	20	478	0
	mov.f32 	%f11, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f12, %f7, %f11;
	mul.ftz.f32 	%f13, %f12, %f8;
	sub.ftz.f32 	%f4, %f4, %f13;
	mul.ftz.f32 	%f14, %f12, %f9;
	sub.ftz.f32 	%f5, %f5, %f14;
	mul.ftz.f32 	%f15, %f12, %f10;
	sub.ftz.f32 	%f6, %f6, %f15;
$Lt_38_249858:
$Lt_38_26114:
	.loc	20	481	0
	and.b32 	%r39, %r10, 2;
	and.b32 	%r40, %r12, 2;
	mov.u32 	%r41, 0;
	setp.eq.s32 	%p11, %r39, %r41;
	@%p11 bra 	$Lt_38_250370;
	mov.u32 	%r42, 0;
	setp.ne.s32 	%p12, %r40, %r42;
	@%p12 bra 	$Lt_38_250370;
	.loc	20	483	0
	mov.f32 	%f16, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p13, %f4, %f16;
	@!%p13 bra 	$Lt_38_224770;
	.loc	20	372	0
	neg.ftz.f32 	%f17, %f4;
	lg2.approx.ftz.f32 	%f18, %f17;
	mov.f32 	%f19, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f20, %f18, %f19;
	ex2.approx.ftz.f32 	%f21, %f20;
	neg.ftz.f32 	%f22, %f21;
	bra.uni 	$LDWendi___log2f_216_71;
$Lt_38_224770:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f23, %f4;
	mov.f32 	%f24, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f25, %f23, %f24;
	ex2.approx.ftz.f32 	%f22, %f25;
$LDWendi___log2f_216_71:
	.loc	20	483	0
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p14, %f5, %f26;
	@!%p14 bra 	$Lt_38_225282;
	.loc	20	372	0
	neg.ftz.f32 	%f27, %f5;
	lg2.approx.ftz.f32 	%f28, %f27;
	mov.f32 	%f29, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f30, %f28, %f29;
	ex2.approx.ftz.f32 	%f31, %f30;
	neg.ftz.f32 	%f32, %f31;
	bra.uni 	$LDWendi___log2f_216_69;
$Lt_38_225282:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f33, %f5;
	mov.f32 	%f34, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f35, %f33, %f34;
	ex2.approx.ftz.f32 	%f32, %f35;
$LDWendi___log2f_216_69:
	.loc	20	483	0
	mov.f32 	%f36, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p15, %f6, %f36;
	@!%p15 bra 	$Lt_38_225794;
	.loc	20	372	0
	neg.ftz.f32 	%f37, %f6;
	lg2.approx.ftz.f32 	%f38, %f37;
	mov.f32 	%f39, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f40, %f38, %f39;
	ex2.approx.ftz.f32 	%f41, %f40;
	neg.ftz.f32 	%f42, %f41;
	bra.uni 	$LDWendi___log2f_216_67;
$Lt_38_225794:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f43, %f6;
	mov.f32 	%f44, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f45, %f43, %f44;
	ex2.approx.ftz.f32 	%f42, %f45;
$LDWendi___log2f_216_67:
	.loc	20	483	0
	mov.f32 	%f4, %f22;
	mov.f32 	%f5, %f32;
	mov.f32 	%f6, %f42;
$Lt_38_250370:
$Lt_38_29954:
	.loc	20	486	0
	and.b32 	%r43, %r10, 1;
	and.b32 	%r44, %r12, 1;
	and.b32 	%r45, %r10, 1536;
	and.b32 	%r46, %r12, 1536;
	set.ne.u32.s32 	%r47, %r43, %r44;
	neg.s32 	%r48, %r47;
	set.ne.u32.s32 	%r49, %r45, %r46;
	neg.s32 	%r50, %r49;
	or.b32 	%r51, %r48, %r50;
	mov.u32 	%r52, 0;
	setp.ne.s32 	%p16, %r51, %r52;
	@%p16 bra 	$Lt_38_80642;
	setp.eq.s32 	%p17, %r29, %r30;
	@%p17 bra 	$Lt_38_80898;
$Lt_38_80642:
	.loc	20	490	0
	mov.u32 	%r53, 0;
	setp.ne.s32 	%p18, %r43, %r53;
	@%p18 bra 	$Lt_38_226562;
	mov.s32 	%r54, 256;
	setp.eq.s32 	%p19, %r21, %r54;
	mov.u32 	%r55, 256;
	setp.ne.s32 	%p20, %r19, %r55;
	@%p20 bra 	$Lt_38_227074;
	.loc	20	137	0
	mov.s32 	%r56, 512;
	setp.eq.s32 	%p21, %r46, %r56;
	@!%p19 bra 	$Lt_38_144898;
	.loc	20	139	0
	@!%p21 bra 	$Lt_38_145154;
	.loc	20	141	0
	mov.u64 	%rd1, kRGB32f_To_709YPbPr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__216_65;
$Lt_38_145154:
	.loc	20	145	0
	mov.u64 	%rd1, kRGB32f_To_601YPbPr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__216_65;
$Lt_38_144898:
	.loc	20	150	0
	@!%p21 bra 	$Lt_38_145410;
	.loc	20	152	0
	mov.u64 	%rd1, kRGB32f_To_709YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__216_65;
$Lt_38_145410:
	.loc	20	154	0
	and.b32 	%r57, %r12, 2048;
	mov.u32 	%r58, 0;
	setp.ne.s32 	%p22, %r57, %r58;
	@%p22 bra 	$Lt_38_145666;
	.loc	20	156	0
	mov.u64 	%rd1, kRGB32f_To_601YCbCrFullRange;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__216_65;
$Lt_38_145666:
	.loc	20	160	0
	mov.u64 	%rd1, kRGB32f_To_601YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__216_65;
$Lt_38_227074:
	@!%p19 bra 	$Lt_38_227586;
	bra.uni 	$Lt_38_226306;
$Lt_38_227586:
	.loc	20	179	0
	mov.u32 	%r59, 512;
	setp.ne.s32 	%p23, %r46, %r59;
	@%p23 bra 	$Lt_38_146434;
	.loc	20	181	0
	mov.u64 	%rd1, kRGB8u_To_709YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__216_65;
$Lt_38_146434:
	.loc	20	183	0
	and.b32 	%r60, %r12, 2048;
	mov.u32 	%r61, 0;
	setp.ne.s32 	%p24, %r60, %r61;
	@%p24 bra 	$Lt_38_146690;
	.loc	20	185	0
	mov.u64 	%rd1, kRGB8u_To_601YCbCrFullRange;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__216_65;
$Lt_38_146690:
	.loc	20	189	0
	mov.u64 	%rd1, kRGB8u_To_601YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__216_65;
$Lt_38_226562:
	mov.s32 	%r62, 0;
	setp.eq.s32 	%p25, %r44, %r62;
	mov.u32 	%r63, 512;
	setp.ne.s32 	%p26, %r45, %r63;
	@%p26 bra 	$Lt_38_228098;
	mov.s32 	%r64, 256;
	setp.eq.s32 	%p27, %r19, %r64;
	@!%p25 bra 	$Lt_38_228610;
	mov.s32 	%r65, 256;
	setp.eq.s32 	%p28, %r21, %r65;
	@!%p27 bra 	$Lt_38_229122;
	@!%p28 bra 	$Lt_38_226306;
	.loc	20	202	0
	mov.u64 	%rd1, k709YPbPr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__216_65;
$Lt_38_229122:
	.loc	20	211	0
	@!%p28 bra 	$Lt_38_147970;
	.loc	20	213	0
	mov.u64 	%rd1, k709YCbCr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__216_65;
$Lt_38_147970:
	.loc	20	217	0
	mov.u64 	%rd1, k709YCbCr_To_RGB8u;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__216_65;
$Lt_38_228610:
	@!%p27 bra 	$Lt_38_230146;
	bra.uni 	$Lt_38_226306;
$Lt_38_230146:
	mov.s32 	%r66, 256;
	set.eq.u32.s32 	%r67, %r21, %r66;
	neg.s32 	%r68, %r67;
	and.b32 	%r69, %r12, 2048;
	mov.s32 	%r70, 0;
	set.eq.u32.s32 	%r71, %r69, %r70;
	neg.s32 	%r72, %r71;
	or.b32 	%r73, %r68, %r72;
	mov.u32 	%r74, 0;
	setp.eq.s32 	%p29, %r73, %r74;
	@%p29 bra 	$Lt_38_230658;
	bra.uni 	$Lt_38_226306;
$Lt_38_230658:
	.loc	20	250	0
	mov.u64 	%rd1, k709YCbCr_To_601YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__216_65;
$Lt_38_228098:
	and.b32 	%r75, %r10, 2048;
	mov.s32 	%r76, 0;
	setp.eq.s32 	%p30, %r75, %r76;
	@!%p30 bra 	$Lt_38_231170;
	@!%p25 bra 	$Lt_38_226306;
	.loc	20	259	0
	mov.u32 	%r77, 256;
	setp.ne.s32 	%p31, %r21, %r77;
	@%p31 bra 	$Lt_38_149506;
	.loc	20	261	0
	mov.u64 	%rd1, k601YCbCrFullRange_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__216_65;
$Lt_38_149506:
	.loc	20	265	0
	mov.u64 	%rd1, k601YCbCrFullRange_To_RGB8u;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__216_65;
$Lt_38_231170:
	mov.s32 	%r78, 256;
	setp.eq.s32 	%p27, %r19, %r78;
	@!%p25 bra 	$Lt_38_232194;
	mov.s32 	%r79, 256;
	setp.eq.s32 	%p32, %r21, %r79;
	@!%p27 bra 	$Lt_38_232706;
	@!%p32 bra 	$Lt_38_226306;
	.loc	20	302	0
	mov.u64 	%rd1, k601YPbPr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__216_65;
$Lt_38_232706:
	.loc	20	311	0
	@!%p32 bra 	$Lt_38_151298;
	.loc	20	313	0
	mov.u64 	%rd1, k601YCbCr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__216_65;
$Lt_38_151298:
	.loc	20	317	0
	mov.u64 	%rd1, k601YCbCr_To_RGB8u;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__216_65;
$Lt_38_232194:
	@!%p27 bra 	$Lt_38_233730;
	bra.uni 	$Lt_38_226306;
$Lt_38_233730:
	selp.s32 	%r80, 1, 0, %p30;
	mov.s32 	%r81, 256;
	set.eq.u32.s32 	%r82, %r21, %r81;
	neg.s32 	%r83, %r82;
	or.b32 	%r84, %r80, %r83;
	mov.u32 	%r85, 0;
	setp.eq.s32 	%p33, %r84, %r85;
	@%p33 bra 	$Lt_38_234242;
	bra.uni 	$Lt_38_226306;
$Lt_38_234242:
	.loc	20	350	0
	mov.u64 	%rd1, k601YCbCr_To_709YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__216_65;
$Lt_38_226306:
	.loc	20	355	0
	mov.u64 	%rd1, 0;
$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__216_65:
	.loc	20	490	0
	ld.global.f32 	%f46, [%rd1+16];
	mul.ftz.f32 	%f47, %f46, %f5;
	ld.global.f32 	%f48, [%rd1+12];
	fma.rn.ftz.f32 	%f49, %f48, %f4, %f47;
	ld.global.f32 	%f50, [%rd1+20];
	fma.rn.ftz.f32 	%f51, %f50, %f6, %f49;
	ld.global.f32 	%f52, [%rd1+28];
	mul.ftz.f32 	%f53, %f52, %f5;
	ld.global.f32 	%f54, [%rd1+24];
	fma.rn.ftz.f32 	%f55, %f54, %f4, %f53;
	ld.global.f32 	%f56, [%rd1+32];
	fma.rn.ftz.f32 	%f57, %f56, %f6, %f55;
	ld.global.f32 	%f58, [%rd1+4];
	mul.ftz.f32 	%f59, %f58, %f5;
	ld.global.f32 	%f60, [%rd1+0];
	fma.rn.ftz.f32 	%f61, %f60, %f4, %f59;
	ld.global.f32 	%f62, [%rd1+8];
	fma.rn.ftz.f32 	%f4, %f62, %f6, %f61;
	mov.f32 	%f5, %f51;
	mov.f32 	%f6, %f57;
	setp.eq.s32 	%p34, %r19, %r21;
	@%p34 bra 	$Lt_38_235010;
	.loc	20	494	0
	mov.s32 	%r86, 256;
	setp.eq.s32 	%p27, %r19, %r86;
	@!%p27 bra 	$L_38_220162;
	mov.s32 	%r87, 0;
	setp.eq.s32 	%p35, %r21, %r87;
	@%p35 bra 	$Lt_38_251394;
$L_38_220162:
	mov.s32 	%r88, 0;
	setp.eq.s32 	%p36, %r19, %r88;
	@!%p36 bra 	$Lt_38_251650;
	mov.u32 	%r89, 256;
	setp.ne.s32 	%p37, %r21, %r89;
	@%p37 bra 	$Lt_38_251650;
	mov.s32 	%r90, 0;
	setp.eq.s32 	%p35, %r21, %r90;
	bra.uni 	$L_38_219906;
$Lt_38_251394:
	mov.s32 	%r91, 0;
	setp.eq.s32 	%p36, %r19, %r91;
$L_38_219906:
	.loc	20	57	0
	@!%p35 bra 	$Lt_38_152834;
	.loc	20	59	0
	mov.f32 	%f63, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_63;
$Lt_38_152834:
	.loc	20	61	0
	mov.u32 	%r92, 64;
	setp.ne.s32 	%p38, %r21, %r92;
	@%p38 bra 	$Lt_38_153090;
	.loc	20	63	0
	mov.f32 	%f63, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_63;
$Lt_38_153090:
	.loc	20	65	0
	mov.u32 	%r93, 128;
	setp.ne.s32 	%p39, %r21, %r93;
	@%p39 bra 	$Lt_38_153346;
	.loc	20	68	0
	mov.f32 	%f63, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_63;
$Lt_38_153346:
	.loc	20	70	0
	mov.u32 	%r94, 192;
	setp.ne.s32 	%p40, %r21, %r94;
	@%p40 bra 	$Lt_38_153602;
	.loc	20	72	0
	mov.f32 	%f63, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_63;
$Lt_38_153602:
	.loc	20	76	0
	mov.f32 	%f63, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_63:
	.loc	20	57	0
	@!%p36 bra 	$Lt_38_153858;
	.loc	20	59	0
	mov.f32 	%f64, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_61;
$Lt_38_153858:
	.loc	20	61	0
	mov.u32 	%r95, 64;
	setp.ne.s32 	%p41, %r19, %r95;
	@%p41 bra 	$Lt_38_154114;
	.loc	20	63	0
	mov.f32 	%f64, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_61;
$Lt_38_154114:
	.loc	20	65	0
	mov.u32 	%r96, 128;
	setp.ne.s32 	%p42, %r19, %r96;
	@%p42 bra 	$Lt_38_154370;
	.loc	20	68	0
	mov.f32 	%f64, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_61;
$Lt_38_154370:
	.loc	20	70	0
	mov.u32 	%r97, 192;
	setp.ne.s32 	%p43, %r19, %r97;
	@%p43 bra 	$Lt_38_154626;
	.loc	20	72	0
	mov.f32 	%f64, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_61;
$Lt_38_154626:
	.loc	20	76	0
	mov.f32 	%f64, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_61:
	.loc	20	498	0
	div.approx.ftz.f32 	%f65, %f63, %f64;
	mul.ftz.f32 	%f2, %f2, %f65;
	bra.uni 	$Lt_38_235010;
$Lt_38_251650:
$L_38_219650:
	.loc	20	500	0
	@!%p27 bra 	$L_38_221186;
	@%p3 bra 	$L_38_220930;
$L_38_221186:
	@!%p2 bra 	$Lt_38_252674;
	mov.u32 	%r98, 256;
	setp.ne.s32 	%p44, %r21, %r98;
	@%p44 bra 	$Lt_38_252674;
$L_38_220930:
	.loc	20	57	0
	mov.u32 	%r99, 0;
	setp.ne.s32 	%p45, %r21, %r99;
	@%p45 bra 	$Lt_38_155138;
	.loc	20	59	0
	mov.f32 	%f63, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_59;
$Lt_38_155138:
	.loc	20	61	0
	mov.u32 	%r100, 64;
	setp.ne.s32 	%p46, %r21, %r100;
	@%p46 bra 	$Lt_38_155394;
	.loc	20	63	0
	mov.f32 	%f63, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_59;
$Lt_38_155394:
	.loc	20	65	0
	mov.u32 	%r101, 128;
	setp.ne.s32 	%p47, %r21, %r101;
	@%p47 bra 	$Lt_38_155650;
	.loc	20	68	0
	mov.f32 	%f63, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_59;
$Lt_38_155650:
	.loc	20	70	0
	mov.u32 	%r102, 192;
	setp.ne.s32 	%p48, %r21, %r102;
	@%p48 bra 	$Lt_38_155906;
	.loc	20	72	0
	mov.f32 	%f63, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_59;
$Lt_38_155906:
	.loc	20	76	0
	mov.f32 	%f63, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_59:
	.loc	20	57	0
	@!%p36 bra 	$Lt_38_156162;
	.loc	20	59	0
	mov.f32 	%f64, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_57;
$Lt_38_156162:
	.loc	20	61	0
	mov.u32 	%r103, 64;
	setp.ne.s32 	%p49, %r19, %r103;
	@%p49 bra 	$Lt_38_156418;
	.loc	20	63	0
	mov.f32 	%f64, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_57;
$Lt_38_156418:
	.loc	20	65	0
	mov.u32 	%r104, 128;
	setp.ne.s32 	%p50, %r19, %r104;
	@%p50 bra 	$Lt_38_156674;
	.loc	20	68	0
	mov.f32 	%f64, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_57;
$Lt_38_156674:
	.loc	20	70	0
	mov.u32 	%r105, 192;
	setp.ne.s32 	%p51, %r19, %r105;
	@%p51 bra 	$Lt_38_156930;
	.loc	20	72	0
	mov.f32 	%f64, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_57;
$Lt_38_156930:
	.loc	20	76	0
	mov.f32 	%f64, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_57:
	.loc	20	504	0
	div.approx.ftz.f32 	%f66, %f63, %f64;
	mul.ftz.f32 	%f2, %f2, %f66;
	.loc	20	57	0
	@!%p36 bra 	$Lt_38_158210;
	.loc	20	59	0
	mov.f32 	%f64, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_53;
$Lt_38_158210:
	.loc	20	61	0
	mov.u32 	%r106, 64;
	setp.ne.s32 	%p52, %r19, %r106;
	@%p52 bra 	$Lt_38_158466;
	.loc	20	63	0
	mov.f32 	%f64, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_53;
$Lt_38_158466:
	.loc	20	65	0
	mov.u32 	%r107, 128;
	setp.ne.s32 	%p53, %r19, %r107;
	@%p53 bra 	$Lt_38_158722;
	.loc	20	68	0
	mov.f32 	%f64, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_53;
$Lt_38_158722:
	.loc	20	70	0
	mov.u32 	%r108, 192;
	setp.ne.s32 	%p54, %r19, %r108;
	@%p54 bra 	$Lt_38_158978;
	.loc	20	72	0
	mov.f32 	%f64, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_53;
$Lt_38_158978:
	.loc	20	76	0
	mov.f32 	%f64, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_53:
	.loc	20	505	0
	mov.f32 	%f67, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f68, %f67, %f64;
	mul.ftz.f32 	%f4, %f4, %f68;
	.loc	20	57	0
	@!%p36 bra 	$Lt_38_160258;
	.loc	20	59	0
	mov.f32 	%f64, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_49;
$Lt_38_160258:
	.loc	20	61	0
	mov.u32 	%r109, 64;
	setp.ne.s32 	%p55, %r19, %r109;
	@%p55 bra 	$Lt_38_160514;
	.loc	20	63	0
	mov.f32 	%f64, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_49;
$Lt_38_160514:
	.loc	20	65	0
	mov.u32 	%r110, 128;
	setp.ne.s32 	%p56, %r19, %r110;
	@%p56 bra 	$Lt_38_160770;
	.loc	20	68	0
	mov.f32 	%f64, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_49;
$Lt_38_160770:
	.loc	20	70	0
	mov.u32 	%r111, 192;
	setp.ne.s32 	%p57, %r19, %r111;
	@%p57 bra 	$Lt_38_161026;
	.loc	20	72	0
	mov.f32 	%f64, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_49;
$Lt_38_161026:
	.loc	20	76	0
	mov.f32 	%f64, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_49:
	.loc	20	506	0
	mov.f32 	%f69, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f70, %f69, %f64;
	mul.ftz.f32 	%f5, %f51, %f70;
	.loc	20	57	0
	@!%p36 bra 	$Lt_38_162306;
	.loc	20	59	0
	mov.f32 	%f64, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_45;
$Lt_38_162306:
	.loc	20	61	0
	mov.u32 	%r112, 64;
	setp.ne.s32 	%p58, %r19, %r112;
	@%p58 bra 	$Lt_38_162562;
	.loc	20	63	0
	mov.f32 	%f64, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_45;
$Lt_38_162562:
	.loc	20	65	0
	mov.u32 	%r113, 128;
	setp.ne.s32 	%p59, %r19, %r113;
	@%p59 bra 	$Lt_38_162818;
	.loc	20	68	0
	mov.f32 	%f64, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_45;
$Lt_38_162818:
	.loc	20	70	0
	mov.u32 	%r114, 192;
	setp.ne.s32 	%p60, %r19, %r114;
	@%p60 bra 	$Lt_38_163074;
	.loc	20	72	0
	mov.f32 	%f64, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_45;
$Lt_38_163074:
	.loc	20	76	0
	mov.f32 	%f64, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_45:
	.loc	20	507	0
	mov.f32 	%f71, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f72, %f71, %f64;
	mul.ftz.f32 	%f6, %f57, %f72;
	bra.uni 	$Lt_38_235010;
$Lt_38_252674:
$L_38_220674:
	.loc	20	57	0
	@!%p36 bra 	$Lt_38_163330;
	.loc	20	59	0
	mov.f32 	%f63, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_43;
$Lt_38_163330:
	.loc	20	61	0
	mov.u32 	%r115, 64;
	setp.ne.s32 	%p61, %r19, %r115;
	@%p61 bra 	$Lt_38_163586;
	.loc	20	63	0
	mov.f32 	%f63, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_43;
$Lt_38_163586:
	.loc	20	65	0
	mov.u32 	%r116, 128;
	setp.ne.s32 	%p62, %r19, %r116;
	@%p62 bra 	$Lt_38_163842;
	.loc	20	68	0
	mov.f32 	%f63, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_43;
$Lt_38_163842:
	.loc	20	70	0
	mov.u32 	%r117, 192;
	setp.ne.s32 	%p63, %r19, %r117;
	@%p63 bra 	$Lt_38_164098;
	.loc	20	72	0
	mov.f32 	%f63, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_43;
$Lt_38_164098:
	.loc	20	76	0
	mov.f32 	%f63, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_43:
	.loc	20	511	0
	mov.f32 	%f73, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f74, %f63, %f73;
	mul.ftz.f32 	%f2, %f74, %f2;
	mul.ftz.f32 	%f4, %f74, %f4;
	mul.ftz.f32 	%f5, %f74, %f51;
	mul.ftz.f32 	%f6, %f74, %f57;
	bra.uni 	$Lt_38_235010;
$Lt_38_80898:
	.loc	20	486	0
	setp.eq.s32 	%p64, %r19, %r21;
	@%p64 bra 	$Lt_38_235010;
	.loc	20	57	0
	mov.u32 	%r118, 0;
	setp.ne.s32 	%p65, %r21, %r118;
	@%p65 bra 	$Lt_38_165634;
	.loc	20	59	0
	mov.f32 	%f63, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_39;
$Lt_38_165634:
	.loc	20	61	0
	mov.u32 	%r119, 64;
	setp.ne.s32 	%p66, %r21, %r119;
	@%p66 bra 	$Lt_38_165890;
	.loc	20	63	0
	mov.f32 	%f63, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_39;
$Lt_38_165890:
	.loc	20	65	0
	mov.u32 	%r120, 128;
	setp.ne.s32 	%p67, %r21, %r120;
	@%p67 bra 	$Lt_38_166146;
	.loc	20	68	0
	mov.f32 	%f63, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_39;
$Lt_38_166146:
	.loc	20	70	0
	mov.u32 	%r121, 192;
	setp.ne.s32 	%p68, %r21, %r121;
	@%p68 bra 	$Lt_38_166402;
	.loc	20	72	0
	mov.f32 	%f63, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_39;
$Lt_38_166402:
	.loc	20	76	0
	mov.f32 	%f63, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_39:
	.loc	20	57	0
	mov.u32 	%r122, 0;
	setp.ne.s32 	%p69, %r19, %r122;
	@%p69 bra 	$Lt_38_166658;
	.loc	20	59	0
	mov.f32 	%f64, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_37;
$Lt_38_166658:
	.loc	20	61	0
	mov.u32 	%r123, 64;
	setp.ne.s32 	%p70, %r19, %r123;
	@%p70 bra 	$Lt_38_166914;
	.loc	20	63	0
	mov.f32 	%f64, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_37;
$Lt_38_166914:
	.loc	20	65	0
	mov.u32 	%r124, 128;
	setp.ne.s32 	%p71, %r19, %r124;
	@%p71 bra 	$Lt_38_167170;
	.loc	20	68	0
	mov.f32 	%f64, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_37;
$Lt_38_167170:
	.loc	20	70	0
	mov.u32 	%r125, 192;
	setp.ne.s32 	%p72, %r19, %r125;
	@%p72 bra 	$Lt_38_167426;
	.loc	20	72	0
	mov.f32 	%f64, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_37;
$Lt_38_167426:
	.loc	20	76	0
	mov.f32 	%f64, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_37:
	.loc	20	517	0
	div.approx.ftz.f32 	%f75, %f63, %f64;
	mul.ftz.f32 	%f2, %f75, %f2;
	mul.ftz.f32 	%f4, %f75, %f4;
	mul.ftz.f32 	%f5, %f75, %f5;
	mul.ftz.f32 	%f6, %f75, %f6;
$Lt_38_235010:
$Lt_38_83202:
	.loc	20	520	0
	mov.u32 	%r126, 0;
	setp.eq.s32 	%p73, %r40, %r126;
	@%p73 bra 	$Lt_38_253186;
	mov.u32 	%r127, 0;
	setp.ne.s32 	%p74, %r39, %r127;
	@%p74 bra 	$Lt_38_253186;
	.loc	20	522	0
	mov.f32 	%f76, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p75, %f4, %f76;
	@!%p75 bra 	$Lt_38_235522;
	.loc	20	372	0
	neg.ftz.f32 	%f77, %f4;
	lg2.approx.ftz.f32 	%f78, %f77;
	mov.f32 	%f79, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f80, %f78, %f79;
	ex2.approx.ftz.f32 	%f81, %f80;
	neg.ftz.f32 	%f82, %f81;
	bra.uni 	$LDWendi___log2f_216_35;
$Lt_38_235522:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f83, %f4;
	mov.f32 	%f84, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f85, %f83, %f84;
	ex2.approx.ftz.f32 	%f82, %f85;
$LDWendi___log2f_216_35:
	.loc	20	522	0
	mov.f32 	%f86, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p76, %f5, %f86;
	@!%p76 bra 	$Lt_38_236034;
	.loc	20	372	0
	neg.ftz.f32 	%f87, %f5;
	lg2.approx.ftz.f32 	%f88, %f87;
	mov.f32 	%f89, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f90, %f88, %f89;
	ex2.approx.ftz.f32 	%f91, %f90;
	neg.ftz.f32 	%f92, %f91;
	bra.uni 	$LDWendi___log2f_216_33;
$Lt_38_236034:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f93, %f5;
	mov.f32 	%f94, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f95, %f93, %f94;
	ex2.approx.ftz.f32 	%f92, %f95;
$LDWendi___log2f_216_33:
	.loc	20	522	0
	mov.f32 	%f96, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p77, %f6, %f96;
	@!%p77 bra 	$Lt_38_236546;
	.loc	20	372	0
	neg.ftz.f32 	%f97, %f6;
	lg2.approx.ftz.f32 	%f98, %f97;
	mov.f32 	%f99, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f100, %f98, %f99;
	ex2.approx.ftz.f32 	%f101, %f100;
	neg.ftz.f32 	%f102, %f101;
	bra.uni 	$LDWendi___log2f_216_31;
$Lt_38_236546:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f103, %f6;
	mov.f32 	%f104, 0f400e38e4;   	// 2.22222
	mul.ftz.f32 	%f105, %f103, %f104;
	ex2.approx.ftz.f32 	%f102, %f105;
$LDWendi___log2f_216_31:
	.loc	20	522	0
	mov.f32 	%f4, %f82;
	mov.f32 	%f5, %f92;
	mov.f32 	%f6, %f102;
$Lt_38_253186:
$Lt_38_85250:
	.loc	20	525	0
	mov.u32 	%r128, 0;
	setp.eq.s32 	%p78, %r30, %r128;
	@%p78 bra 	$Lt_38_253698;
	mov.u32 	%r129, 0;
	setp.ne.s32 	%p79, %r29, %r129;
	@%p79 bra 	$Lt_38_253698;
	.loc	20	57	0
	mov.u32 	%r130, 0;
	setp.ne.s32 	%p80, %r21, %r130;
	@%p80 bra 	$Lt_38_168450;
	.loc	20	59	0
	mov.f32 	%f106, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_29;
$Lt_38_168450:
	.loc	20	61	0
	mov.u32 	%r131, 64;
	setp.ne.s32 	%p81, %r21, %r131;
	@%p81 bra 	$Lt_38_168706;
	.loc	20	63	0
	mov.f32 	%f106, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_29;
$Lt_38_168706:
	.loc	20	65	0
	mov.u32 	%r132, 128;
	setp.ne.s32 	%p82, %r21, %r132;
	@%p82 bra 	$Lt_38_168962;
	.loc	20	68	0
	mov.f32 	%f106, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_29;
$Lt_38_168962:
	.loc	20	70	0
	mov.u32 	%r133, 192;
	setp.ne.s32 	%p83, %r21, %r133;
	@%p83 bra 	$Lt_38_169218;
	.loc	20	72	0
	mov.f32 	%f106, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_29;
$Lt_38_169218:
	.loc	20	76	0
	mov.f32 	%f106, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_29:
	.loc	20	107	0
	and.b32 	%r134, %r12, 2048;
	mov.s32 	%r135, 0;
	setp.ne.s32 	%p84, %r134, %r135;
	@!%p84 bra 	$Lt_38_237314;
	.loc	20	100	0
	ld.const.f32 	%f107, [kYCbCrOffset+0];
	bra.uni 	$Lt_38_237058;
$Lt_38_237314:
	ld.const.f32 	%f107, [kYCbCrFullRangeOffset+0];
$Lt_38_237058:
	.loc	20	107	0
	@!%p84 bra 	$Lt_38_237826;
	.loc	20	100	0
	ld.const.f32 	%f108, [kYCbCrOffset+4];
	bra.uni 	$Lt_38_237570;
$Lt_38_237826:
	ld.const.f32 	%f108, [kYCbCrFullRangeOffset+4];
$Lt_38_237570:
	.loc	20	107	0
	@!%p84 bra 	$Lt_38_238338;
	.loc	20	100	0
	ld.const.f32 	%f109, [kYCbCrOffset+8];
	bra.uni 	$Lt_38_238082;
$Lt_38_238338:
	ld.const.f32 	%f109, [kYCbCrFullRangeOffset+8];
$Lt_38_238082:
	.loc	20	527	0
	mov.f32 	%f110, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f111, %f106, %f110;
	fma.rn.ftz.f32 	%f4, %f111, %f107, %f4;
	fma.rn.ftz.f32 	%f5, %f111, %f108, %f5;
	fma.rn.ftz.f32 	%f6, %f111, %f109, %f6;
$Lt_38_253698:
$Lt_38_91650:
	.loc	20	525	0
	and.b32 	%r136, %r10, 12;
	and.b32 	%r137, %r12, 12;
	setp.eq.s32 	%p85, %r136, %r137;
	@%p85 bra 	$Lt_38_239106;
	.loc	20	532	0
	mov.u32 	%r138, 8;
	setp.ne.s32 	%p86, %r136, %r138;
	@%p86 bra 	$L_38_222466;
	mov.u32 	%r139, 12;
	setp.eq.s32 	%p87, %r137, %r139;
	@%p87 bra 	$Lt_38_254466;
$L_38_222466:
	mov.u32 	%r140, 12;
	setp.eq.s32 	%p88, %r136, %r140;
	@%p88 bra 	$Lt_38_254466;
	mov.u32 	%r141, 0;
	setp.ne.s32 	%p89, %r136, %r141;
	@%p89 bra 	$L_38_221698;
$Lt_38_254466:
$L_38_221954:
	.loc	20	57	0
	mov.u32 	%r142, 0;
	setp.ne.s32 	%p90, %r21, %r142;
	@%p90 bra 	$Lt_38_171778;
	.loc	20	59	0
	mov.f32 	%f112, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_25;
$Lt_38_171778:
	.loc	20	61	0
	mov.u32 	%r143, 64;
	setp.ne.s32 	%p91, %r21, %r143;
	@%p91 bra 	$Lt_38_172034;
	.loc	20	63	0
	mov.f32 	%f112, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_25;
$Lt_38_172034:
	.loc	20	65	0
	mov.u32 	%r144, 128;
	setp.ne.s32 	%p92, %r21, %r144;
	@%p92 bra 	$Lt_38_172290;
	.loc	20	68	0
	mov.f32 	%f112, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_25;
$Lt_38_172290:
	.loc	20	70	0
	mov.u32 	%r145, 192;
	setp.ne.s32 	%p93, %r21, %r145;
	@%p93 bra 	$Lt_38_172546;
	.loc	20	72	0
	mov.f32 	%f112, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_25;
$Lt_38_172546:
	.loc	20	76	0
	mov.f32 	%f112, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_25:
	.loc	20	536	0
	mov.f32 	%f2, %f112;
	bra.uni 	$Lt_38_239106;
$L_38_221698:
	.loc	20	540	0
	mov.s32 	%r146, 12;
	setp.eq.s32 	%p94, %r137, %r146;
	mov.s32 	%r147, 4;
	set.eq.u32.s32 	%r148, %r136, %r147;
	neg.s32 	%r149, %r148;
	selp.s32 	%r150, 1, 0, %p94;
	mov.s32 	%r151, 8;
	set.eq.u32.s32 	%r152, %r137, %r151;
	neg.s32 	%r153, %r152;
	or.b32 	%r154, %r150, %r153;
	and.b32 	%r155, %r149, %r154;
	mov.u32 	%r156, 0;
	setp.eq.s32 	%p95, %r155, %r156;
	@%p95 bra 	$Lt_38_239362;
	.loc	20	410	0
	mov.f32 	%f113, %f4;
	mov.f32 	%f114, %f113;
	mov.f32 	%f115, %f5;
	mov.f32 	%f116, %f115;
	mov.f32 	%f117, %f6;
	mov.f32 	%f118, %f117;
	.loc	20	57	0
	mov.s32 	%r157, 0;
	setp.eq.s32 	%p35, %r21, %r157;
	@!%p35 bra 	$Lt_38_173058;
	.loc	20	59	0
	mov.f32 	%f119, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_23;
$Lt_38_173058:
	.loc	20	61	0
	mov.u32 	%r158, 64;
	setp.ne.s32 	%p96, %r21, %r158;
	@%p96 bra 	$Lt_38_173314;
	.loc	20	63	0
	mov.f32 	%f119, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_23;
$Lt_38_173314:
	.loc	20	65	0
	mov.u32 	%r159, 128;
	setp.ne.s32 	%p97, %r21, %r159;
	@%p97 bra 	$Lt_38_173570;
	.loc	20	68	0
	mov.f32 	%f119, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_23;
$Lt_38_173570:
	.loc	20	70	0
	mov.u32 	%r160, 192;
	setp.ne.s32 	%p98, %r21, %r160;
	@%p98 bra 	$Lt_38_173826;
	.loc	20	72	0
	mov.f32 	%f119, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_23;
$Lt_38_173826:
	.loc	20	76	0
	mov.f32 	%f119, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_23:
	.loc	20	413	0
	mov.u32 	%r161, 0;
	setp.eq.s32 	%p99, %r30, %r161;
	@%p99 bra 	$Lt_38_239618;
	.loc	20	57	0
	@!%p35 bra 	$Lt_38_174338;
	.loc	20	59	0
	mov.f32 	%f120, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_21;
$Lt_38_174338:
	.loc	20	61	0
	mov.u32 	%r162, 64;
	setp.ne.s32 	%p100, %r21, %r162;
	@%p100 bra 	$Lt_38_174594;
	.loc	20	63	0
	mov.f32 	%f120, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_21;
$Lt_38_174594:
	.loc	20	65	0
	mov.u32 	%r163, 128;
	setp.ne.s32 	%p101, %r21, %r163;
	@%p101 bra 	$Lt_38_174850;
	.loc	20	68	0
	mov.f32 	%f120, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_21;
$Lt_38_174850:
	.loc	20	70	0
	mov.u32 	%r164, 192;
	setp.ne.s32 	%p102, %r21, %r164;
	@%p102 bra 	$Lt_38_175106;
	.loc	20	72	0
	mov.f32 	%f120, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_21;
$Lt_38_175106:
	.loc	20	76	0
	mov.f32 	%f120, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_21:
	.loc	20	118	0
	and.b32 	%r134, %r12, 2048;
	mov.s32 	%r165, 0;
	setp.ne.s32 	%p84, %r134, %r165;
	@!%p84 bra 	$Lt_38_240386;
	.loc	20	100	0
	ld.const.f32 	%f121, [kYCbCrOffset+0];
	bra.uni 	$Lt_38_240130;
$Lt_38_240386:
	ld.const.f32 	%f121, [kYCbCrFullRangeOffset+0];
$Lt_38_240130:
	.loc	20	118	0
	@!%p84 bra 	$Lt_38_240898;
	.loc	20	100	0
	ld.const.f32 	%f122, [kYCbCrOffset+4];
	bra.uni 	$Lt_38_240642;
$Lt_38_240898:
	ld.const.f32 	%f122, [kYCbCrFullRangeOffset+4];
$Lt_38_240642:
	.loc	20	118	0
	@!%p84 bra 	$Lt_38_241410;
	.loc	20	100	0
	ld.const.f32 	%f123, [kYCbCrOffset+8];
	bra.uni 	$Lt_38_241154;
$Lt_38_241410:
	ld.const.f32 	%f123, [kYCbCrFullRangeOffset+8];
$Lt_38_241154:
	.loc	20	415	0
	mov.f32 	%f124, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f125, %f120, %f124;
	mul.ftz.f32 	%f126, %f125, %f121;
	sub.ftz.f32 	%f114, %f113, %f126;
	mul.ftz.f32 	%f127, %f125, %f122;
	sub.ftz.f32 	%f116, %f115, %f127;
	mul.ftz.f32 	%f128, %f125, %f123;
	sub.ftz.f32 	%f118, %f117, %f128;
$Lt_38_239618:
	.loc	20	418	0
	rcp.approx.ftz.f32 	%f129, %f119;
	mul.ftz.f32 	%f130, %f129, %f2;
	mul.ftz.f32 	%f114, %f130, %f114;
	.loc	20	419	0
	mul.ftz.f32 	%f116, %f130, %f116;
	.loc	20	420	0
	mul.ftz.f32 	%f118, %f130, %f118;
	.loc	20	422	0
	mov.u32 	%r166, 0;
	setp.eq.s32 	%p103, %r30, %r166;
	@%p103 bra 	$Lt_38_241666;
	.loc	20	57	0
	@!%p35 bra 	$Lt_38_177410;
	.loc	20	59	0
	mov.f32 	%f131, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_17;
$Lt_38_177410:
	.loc	20	61	0
	mov.u32 	%r167, 64;
	setp.ne.s32 	%p104, %r21, %r167;
	@%p104 bra 	$Lt_38_177666;
	.loc	20	63	0
	mov.f32 	%f131, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_17;
$Lt_38_177666:
	.loc	20	65	0
	mov.u32 	%r168, 128;
	setp.ne.s32 	%p105, %r21, %r168;
	@%p105 bra 	$Lt_38_177922;
	.loc	20	68	0
	mov.f32 	%f131, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_17;
$Lt_38_177922:
	.loc	20	70	0
	mov.u32 	%r169, 192;
	setp.ne.s32 	%p106, %r21, %r169;
	@%p106 bra 	$Lt_38_178178;
	.loc	20	72	0
	mov.f32 	%f131, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_17;
$Lt_38_178178:
	.loc	20	76	0
	mov.f32 	%f131, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_17:
	.loc	20	107	0
	and.b32 	%r134, %r12, 2048;
	mov.s32 	%r170, 0;
	setp.ne.s32 	%p84, %r134, %r170;
	@!%p84 bra 	$Lt_38_242434;
	.loc	20	100	0
	ld.const.f32 	%f132, [kYCbCrOffset+0];
	bra.uni 	$Lt_38_242178;
$Lt_38_242434:
	ld.const.f32 	%f132, [kYCbCrFullRangeOffset+0];
$Lt_38_242178:
	.loc	20	107	0
	@!%p84 bra 	$Lt_38_242946;
	.loc	20	100	0
	ld.const.f32 	%f133, [kYCbCrOffset+4];
	bra.uni 	$Lt_38_242690;
$Lt_38_242946:
	ld.const.f32 	%f133, [kYCbCrFullRangeOffset+4];
$Lt_38_242690:
	.loc	20	107	0
	@!%p84 bra 	$Lt_38_243458;
	.loc	20	100	0
	ld.const.f32 	%f134, [kYCbCrOffset+8];
	bra.uni 	$Lt_38_243202;
$Lt_38_243458:
	ld.const.f32 	%f134, [kYCbCrFullRangeOffset+8];
$Lt_38_243202:
	.loc	20	424	0
	mov.f32 	%f135, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f136, %f131, %f135;
	fma.rn.ftz.f32 	%f114, %f136, %f132, %f114;
	fma.rn.ftz.f32 	%f116, %f136, %f133, %f116;
	fma.rn.ftz.f32 	%f118, %f136, %f134, %f118;
$Lt_38_241666:
	.loc	20	543	0
	mov.f32 	%f4, %f114;
	mov.f32 	%f5, %f116;
	mov.f32 	%f6, %f118;
	@!%p94 bra 	$Lt_38_239106;
	.loc	20	57	0
	@!%p35 bra 	$Lt_38_180482;
	.loc	20	59	0
	mov.f32 	%f112, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_13;
$Lt_38_180482:
	.loc	20	61	0
	mov.u32 	%r171, 64;
	setp.ne.s32 	%p107, %r21, %r171;
	@%p107 bra 	$Lt_38_180738;
	.loc	20	63	0
	mov.f32 	%f112, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_13;
$Lt_38_180738:
	.loc	20	65	0
	mov.u32 	%r172, 128;
	setp.ne.s32 	%p108, %r21, %r172;
	@%p108 bra 	$Lt_38_180994;
	.loc	20	68	0
	mov.f32 	%f112, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_13;
$Lt_38_180994:
	.loc	20	70	0
	mov.u32 	%r173, 192;
	setp.ne.s32 	%p109, %r21, %r173;
	@%p109 bra 	$Lt_38_181250;
	.loc	20	72	0
	mov.f32 	%f112, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_13;
$Lt_38_181250:
	.loc	20	76	0
	mov.f32 	%f112, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_13:
	.loc	20	546	0
	mov.f32 	%f2, %f112;
	bra.uni 	$Lt_38_239106;
$Lt_38_239362:
	.loc	20	433	0
	mov.f32 	%f137, %f2;
	mov.f32 	%f138, %f4;
	mov.f32 	%f139, %f138;
	mov.f32 	%f140, %f5;
	mov.f32 	%f141, %f140;
	mov.f32 	%f142, %f6;
	mov.f32 	%f143, %f142;
	.loc	20	435	0
	mov.u32 	%r174, 0;
	setp.eq.s32 	%p110, %r30, %r174;
	@%p110 bra 	$Lt_38_244226;
	.loc	20	57	0
	mov.u32 	%r175, 0;
	setp.ne.s32 	%p111, %r21, %r175;
	@%p111 bra 	$Lt_38_181762;
	.loc	20	59	0
	mov.f32 	%f144, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_11;
$Lt_38_181762:
	.loc	20	61	0
	mov.u32 	%r176, 64;
	setp.ne.s32 	%p112, %r21, %r176;
	@%p112 bra 	$Lt_38_182018;
	.loc	20	63	0
	mov.f32 	%f144, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_11;
$Lt_38_182018:
	.loc	20	65	0
	mov.u32 	%r177, 128;
	setp.ne.s32 	%p113, %r21, %r177;
	@%p113 bra 	$Lt_38_182274;
	.loc	20	68	0
	mov.f32 	%f144, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_11;
$Lt_38_182274:
	.loc	20	70	0
	mov.u32 	%r178, 192;
	setp.ne.s32 	%p114, %r21, %r178;
	@%p114 bra 	$Lt_38_182530;
	.loc	20	72	0
	mov.f32 	%f144, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_11;
$Lt_38_182530:
	.loc	20	76	0
	mov.f32 	%f144, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_11:
	.loc	20	118	0
	and.b32 	%r134, %r12, 2048;
	mov.s32 	%r179, 0;
	setp.ne.s32 	%p84, %r134, %r179;
	@!%p84 bra 	$Lt_38_244994;
	.loc	20	100	0
	ld.const.f32 	%f145, [kYCbCrOffset+0];
	bra.uni 	$Lt_38_244738;
$Lt_38_244994:
	ld.const.f32 	%f145, [kYCbCrFullRangeOffset+0];
$Lt_38_244738:
	.loc	20	118	0
	@!%p84 bra 	$Lt_38_245506;
	.loc	20	100	0
	ld.const.f32 	%f146, [kYCbCrOffset+4];
	bra.uni 	$Lt_38_245250;
$Lt_38_245506:
	ld.const.f32 	%f146, [kYCbCrFullRangeOffset+4];
$Lt_38_245250:
	.loc	20	118	0
	@!%p84 bra 	$Lt_38_246018;
	.loc	20	100	0
	ld.const.f32 	%f147, [kYCbCrOffset+8];
	bra.uni 	$Lt_38_245762;
$Lt_38_246018:
	ld.const.f32 	%f147, [kYCbCrFullRangeOffset+8];
$Lt_38_245762:
	.loc	20	437	0
	mov.f32 	%f148, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f149, %f144, %f148;
	mul.ftz.f32 	%f150, %f149, %f145;
	sub.ftz.f32 	%f139, %f138, %f150;
	mul.ftz.f32 	%f151, %f149, %f146;
	sub.ftz.f32 	%f141, %f140, %f151;
	mul.ftz.f32 	%f152, %f149, %f147;
	sub.ftz.f32 	%f143, %f142, %f152;
$Lt_38_244226:
	mov.f32 	%f153, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f154, %f2, %f153;
	mov.f32 	%f155, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p115, %f154, %f155;
	@!%p115 bra 	$Lt_38_246530;
	mov.f32 	%f143, 0f00000000;   	// 0
	mov.f32 	%f141, 0f00000000;   	// 0
	mov.f32 	%f139, 0f00000000;   	// 0
	mov.f32 	%f137, 0f00000000;   	// 0
	bra.uni 	$Lt_38_246274;
$Lt_38_246530:
	.loc	20	57	0
	mov.u32 	%r180, 0;
	setp.ne.s32 	%p116, %r21, %r180;
	@%p116 bra 	$Lt_38_184834;
	.loc	20	59	0
	mov.f32 	%f156, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_7;
$Lt_38_184834:
	.loc	20	61	0
	mov.u32 	%r181, 64;
	setp.ne.s32 	%p117, %r21, %r181;
	@%p117 bra 	$Lt_38_185090;
	.loc	20	63	0
	mov.f32 	%f156, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_7;
$Lt_38_185090:
	.loc	20	65	0
	mov.u32 	%r182, 128;
	setp.ne.s32 	%p118, %r21, %r182;
	@%p118 bra 	$Lt_38_185346;
	.loc	20	68	0
	mov.f32 	%f156, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_7;
$Lt_38_185346:
	.loc	20	70	0
	mov.u32 	%r183, 192;
	setp.ne.s32 	%p119, %r21, %r183;
	@%p119 bra 	$Lt_38_185602;
	.loc	20	72	0
	mov.f32 	%f156, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_7;
$Lt_38_185602:
	.loc	20	76	0
	mov.f32 	%f156, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_7:
	.loc	20	447	0
	div.approx.ftz.f32 	%f157, %f156, %f2;
	mul.ftz.f32 	%f139, %f157, %f139;
	.loc	20	448	0
	mul.ftz.f32 	%f141, %f157, %f141;
	.loc	20	449	0
	mul.ftz.f32 	%f143, %f157, %f143;
$Lt_38_246274:
	.loc	20	452	0
	mov.u32 	%r184, 0;
	setp.eq.s32 	%p120, %r30, %r184;
	@%p120 bra 	$Lt_38_246786;
	.loc	20	57	0
	mov.u32 	%r185, 0;
	setp.ne.s32 	%p121, %r21, %r185;
	@%p121 bra 	$Lt_38_186114;
	.loc	20	59	0
	mov.f32 	%f158, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_5;
$Lt_38_186114:
	.loc	20	61	0
	mov.u32 	%r186, 64;
	setp.ne.s32 	%p122, %r21, %r186;
	@%p122 bra 	$Lt_38_186370;
	.loc	20	63	0
	mov.f32 	%f158, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_5;
$Lt_38_186370:
	.loc	20	65	0
	mov.u32 	%r187, 128;
	setp.ne.s32 	%p123, %r21, %r187;
	@%p123 bra 	$Lt_38_186626;
	.loc	20	68	0
	mov.f32 	%f158, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_5;
$Lt_38_186626:
	.loc	20	70	0
	mov.u32 	%r188, 192;
	setp.ne.s32 	%p124, %r21, %r188;
	@%p124 bra 	$Lt_38_186882;
	.loc	20	72	0
	mov.f32 	%f158, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_5;
$Lt_38_186882:
	.loc	20	76	0
	mov.f32 	%f158, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_5:
	.loc	20	107	0
	and.b32 	%r134, %r12, 2048;
	mov.s32 	%r189, 0;
	setp.ne.s32 	%p84, %r134, %r189;
	@!%p84 bra 	$Lt_38_247554;
	.loc	20	100	0
	ld.const.f32 	%f159, [kYCbCrOffset+0];
	bra.uni 	$Lt_38_247298;
$Lt_38_247554:
	ld.const.f32 	%f159, [kYCbCrFullRangeOffset+0];
$Lt_38_247298:
	.loc	20	107	0
	@!%p84 bra 	$Lt_38_248066;
	.loc	20	100	0
	ld.const.f32 	%f160, [kYCbCrOffset+4];
	bra.uni 	$Lt_38_247810;
$Lt_38_248066:
	ld.const.f32 	%f160, [kYCbCrFullRangeOffset+4];
$Lt_38_247810:
	.loc	20	107	0
	@!%p84 bra 	$Lt_38_248578;
	.loc	20	100	0
	ld.const.f32 	%f161, [kYCbCrOffset+8];
	bra.uni 	$Lt_38_248322;
$Lt_38_248578:
	ld.const.f32 	%f161, [kYCbCrFullRangeOffset+8];
$Lt_38_248322:
	.loc	20	454	0
	mov.f32 	%f162, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f163, %f158, %f162;
	fma.rn.ftz.f32 	%f139, %f163, %f159, %f139;
	fma.rn.ftz.f32 	%f141, %f163, %f160, %f141;
	fma.rn.ftz.f32 	%f143, %f163, %f161, %f143;
$Lt_38_246786:
	.loc	20	551	0
	mov.f32 	%f2, %f137;
	mov.f32 	%f4, %f139;
	mov.f32 	%f5, %f141;
	mov.f32 	%f6, %f143;
$Lt_38_239106:
$L_38_221442:
$Lt_38_238594:
	.loc	20	540	0
	and.b32 	%r190, %r12, 4096;
	mov.u32 	%r191, 0;
	setp.ne.s32 	%p125, %r190, %r191;
	@%p125 bra 	$Lt_38_248834;
	.loc	21	268	0
	mov.f32 	%f164, %f4;
	.loc	21	269	0
	mov.f32 	%f165, %f2;
	.loc	20	558	0
	mov.f32 	%f2, %f6;
	mov.f32 	%f4, %f5;
	mov.f32 	%f5, %f164;
	mov.f32 	%f6, %f165;
$Lt_38_248834:
	@!%p3 bra 	$Lt_38_249346;
	.loc	20	57	0
	mov.u32 	%r192, 0;
	setp.ne.s32 	%p126, %r21, %r192;
	@%p126 bra 	$Lt_38_189442;
	.loc	20	59	0
	mov.f32 	%f166, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_1;
$Lt_38_189442:
	.loc	20	61	0
	mov.u32 	%r193, 64;
	setp.ne.s32 	%p127, %r21, %r193;
	@%p127 bra 	$Lt_38_189698;
	.loc	20	63	0
	mov.f32 	%f166, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_1;
$Lt_38_189698:
	.loc	20	65	0
	mov.u32 	%r194, 128;
	setp.ne.s32 	%p128, %r21, %r194;
	@%p128 bra 	$Lt_38_189954;
	.loc	20	68	0
	mov.f32 	%f166, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_1;
$Lt_38_189954:
	.loc	20	70	0
	mov.u32 	%r195, 192;
	setp.ne.s32 	%p129, %r21, %r195;
	@%p129 bra 	$Lt_38_190210;
	.loc	20	72	0
	mov.f32 	%f166, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_216_1;
$Lt_38_190210:
	.loc	20	76	0
	mov.f32 	%f166, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_216_1:
	.loc	20	564	0
	mov.f32 	%f167, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f168, %f2, %f167;
	mov.f32 	%f169, 0f00000000;   	// 0
	mov.f32 	%f170, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p130, %f168, %f170;
	selp.f32 	%f171, %f168, %f169, %p130;
	min.ftz.f32 	%f2, %f171, %f166;
	mov.f32 	%f172, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f173, %f4, %f172;
	mov.f32 	%f174, 0f00000000;   	// 0
	mov.f32 	%f175, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p131, %f173, %f175;
	selp.f32 	%f176, %f173, %f174, %p131;
	min.ftz.f32 	%f4, %f176, %f166;
	mov.f32 	%f177, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f178, %f5, %f177;
	mov.f32 	%f179, 0f00000000;   	// 0
	mov.f32 	%f180, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p132, %f178, %f180;
	selp.f32 	%f181, %f178, %f179, %p132;
	min.ftz.f32 	%f5, %f181, %f166;
	mov.f32 	%f182, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f183, %f6, %f182;
	mov.f32 	%f184, 0f00000000;   	// 0
	mov.f32 	%f185, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p133, %f183, %f185;
	selp.f32 	%f186, %f183, %f184, %p133;
	min.ftz.f32 	%f6, %f186, %f166;
$Lt_38_249346:
	.loc	22	45	0
	mov.f32 	%f187, %f2;
	st.param.f32 	[__cudaretf__Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_+0], %f187;
	mov.f32 	%f188, %f4;
	st.param.f32 	[__cudaretf__Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_+4], %f188;
	mov.f32 	%f189, %f5;
	st.param.f32 	[__cudaretf__Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_+8], %f189;
	mov.f32 	%f190, %f6;
	st.param.f32 	[__cudaretf__Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_+12], %f190;
	ret;
$LDWend__Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_:
	} // _Z31ConvertPixel_444_15u_To_444_32f7ushort414IR_PixelFormatS0_

	.visible .func (.param .align 4 .b8 __cudaretf__Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_[4]) _Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_ (.param .align 16 .b8 __cudaparmf1__Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_[16], .param .s32 __cudaparmf2__Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_, .param .s32 __cudaparmf3__Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_)
	{
	.reg .u32 %r<196>;
	.reg .u64 %rd<3>;
	.reg .f32 %f<194>;
	.reg .pred %p<135>;
	.loc	22	54	0
$LDWbegin__Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_:
	ld.param.f32 	%f1, [__cudaparmf1__Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_+12];
	mov.f32 	%f8, %f7;
	ld.param.u32 	%r1, [__cudaparmf2__Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf3__Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_];
	mov.s32 	%r4, %r3;
	.loc	20	469	0
	mov.f32 	%f9, %f2;
	mov.f32 	%f10, %f4;
	mov.f32 	%f11, %f6;
	mov.f32 	%f12, %f8;
	and.b32 	%r5, %r2, 4096;
	mov.u32 	%r6, 0;
	setp.ne.s32 	%p1, %r5, %r6;
	@%p1 bra 	$Lt_39_222722;
	.loc	20	473	0
	mov.f32 	%f9, %f8;
	mov.f32 	%f10, %f6;
	mov.f32 	%f11, %f4;
	mov.f32 	%f12, %f2;
$Lt_39_222722:
	.loc	20	476	0
	and.b32 	%r7, %r2, 448;
	mov.s32 	%r8, %r2;
	and.b32 	%r9, %r4, 448;
	mov.s32 	%r10, %r4;
	mov.s32 	%r11, 256;
	setp.ne.s32 	%p2, %r7, %r11;
	and.b32 	%r12, %r8, 1;
	mov.s32 	%r13, 256;
	setp.ne.s32 	%p3, %r9, %r13;
	and.b32 	%r14, %r10, 1;
	selp.s32 	%r15, 1, 0, %p2;
	selp.s32 	%r16, 1, 0, %p3;
	and.b32 	%r17, %r12, %r15;
	and.b32 	%r18, %r14, %r16;
	mov.u32 	%r19, 0;
	setp.eq.s32 	%p4, %r17, %r19;
	@%p4 bra 	$Lt_39_249858;
	mov.u32 	%r20, 0;
	setp.ne.s32 	%p5, %r18, %r20;
	@%p5 bra 	$Lt_39_249858;
	.loc	20	57	0
	mov.u32 	%r21, 0;
	setp.ne.s32 	%p6, %r7, %r21;
	@%p6 bra 	$Lt_39_140802;
	.loc	20	59	0
	mov.f32 	%f13, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_75;
$Lt_39_140802:
	.loc	20	61	0
	mov.u32 	%r22, 64;
	setp.ne.s32 	%p7, %r7, %r22;
	@%p7 bra 	$Lt_39_141058;
	.loc	20	63	0
	mov.f32 	%f13, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_75;
$Lt_39_141058:
	.loc	20	65	0
	mov.u32 	%r23, 128;
	setp.ne.s32 	%p8, %r7, %r23;
	@%p8 bra 	$Lt_39_141314;
	.loc	20	68	0
	mov.f32 	%f13, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_75;
$Lt_39_141314:
	.loc	20	70	0
	mov.u32 	%r24, 192;
	setp.ne.s32 	%p9, %r7, %r24;
	@%p9 bra 	$Lt_39_141570;
	.loc	20	72	0
	mov.f32 	%f13, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_75;
$Lt_39_141570:
	.loc	20	76	0
	mov.f32 	%f13, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_75:
	.loc	20	118	0
	and.b32 	%r25, %r2, 2048;
	mov.s32 	%r26, 0;
	setp.ne.s32 	%p10, %r25, %r26;
	@!%p10 bra 	$Lt_39_223490;
	.loc	20	100	0
	ld.const.f32 	%f14, [kYCbCrOffset+0];
	bra.uni 	$Lt_39_223234;
$Lt_39_223490:
	ld.const.f32 	%f14, [kYCbCrFullRangeOffset+0];
$Lt_39_223234:
	.loc	20	118	0
	@!%p10 bra 	$Lt_39_224002;
	.loc	20	100	0
	ld.const.f32 	%f15, [kYCbCrOffset+4];
	bra.uni 	$Lt_39_223746;
$Lt_39_224002:
	ld.const.f32 	%f15, [kYCbCrFullRangeOffset+4];
$Lt_39_223746:
	.loc	20	118	0
	@!%p10 bra 	$Lt_39_224514;
	.loc	20	100	0
	ld.const.f32 	%f16, [kYCbCrOffset+8];
	bra.uni 	$Lt_39_224258;
$Lt_39_224514:
	ld.const.f32 	%f16, [kYCbCrFullRangeOffset+8];
$Lt_39_224258:
	.loc	20	478	0
	mov.f32 	%f17, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f18, %f13, %f17;
	mul.ftz.f32 	%f19, %f18, %f14;
	sub.ftz.f32 	%f10, %f10, %f19;
	mul.ftz.f32 	%f20, %f18, %f15;
	sub.ftz.f32 	%f11, %f11, %f20;
	mul.ftz.f32 	%f21, %f18, %f16;
	sub.ftz.f32 	%f12, %f12, %f21;
$Lt_39_249858:
$Lt_39_26114:
	.loc	20	481	0
	and.b32 	%r27, %r2, 2;
	and.b32 	%r28, %r4, 2;
	mov.u32 	%r29, 0;
	setp.eq.s32 	%p11, %r27, %r29;
	@%p11 bra 	$Lt_39_250370;
	mov.u32 	%r30, 0;
	setp.ne.s32 	%p12, %r28, %r30;
	@%p12 bra 	$Lt_39_250370;
	.loc	20	483	0
	mov.f32 	%f22, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p13, %f10, %f22;
	@!%p13 bra 	$Lt_39_224770;
	.loc	20	372	0
	neg.ftz.f32 	%f23, %f10;
	lg2.approx.ftz.f32 	%f24, %f23;
	mov.f32 	%f25, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f26, %f24, %f25;
	ex2.approx.ftz.f32 	%f27, %f26;
	neg.ftz.f32 	%f28, %f27;
	bra.uni 	$LDWendi___log2f_217_71;
$Lt_39_224770:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f29, %f10;
	mov.f32 	%f30, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f31, %f29, %f30;
	ex2.approx.ftz.f32 	%f28, %f31;
$LDWendi___log2f_217_71:
	.loc	20	483	0
	mov.f32 	%f32, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p14, %f11, %f32;
	@!%p14 bra 	$Lt_39_225282;
	.loc	20	372	0
	neg.ftz.f32 	%f33, %f11;
	lg2.approx.ftz.f32 	%f34, %f33;
	mov.f32 	%f35, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f36, %f34, %f35;
	ex2.approx.ftz.f32 	%f37, %f36;
	neg.ftz.f32 	%f38, %f37;
	bra.uni 	$LDWendi___log2f_217_69;
$Lt_39_225282:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f39, %f11;
	mov.f32 	%f40, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f41, %f39, %f40;
	ex2.approx.ftz.f32 	%f38, %f41;
$LDWendi___log2f_217_69:
	.loc	20	483	0
	mov.f32 	%f42, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p15, %f12, %f42;
	@!%p15 bra 	$Lt_39_225794;
	.loc	20	372	0
	neg.ftz.f32 	%f43, %f12;
	lg2.approx.ftz.f32 	%f44, %f43;
	mov.f32 	%f45, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f46, %f44, %f45;
	ex2.approx.ftz.f32 	%f47, %f46;
	neg.ftz.f32 	%f48, %f47;
	bra.uni 	$LDWendi___log2f_217_67;
$Lt_39_225794:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f49, %f12;
	mov.f32 	%f50, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f51, %f49, %f50;
	ex2.approx.ftz.f32 	%f48, %f51;
$LDWendi___log2f_217_67:
	.loc	20	483	0
	mov.f32 	%f10, %f28;
	mov.f32 	%f11, %f38;
	mov.f32 	%f12, %f48;
$Lt_39_250370:
$Lt_39_29954:
	.loc	20	486	0
	and.b32 	%r31, %r2, 1;
	and.b32 	%r32, %r4, 1;
	and.b32 	%r33, %r2, 1536;
	and.b32 	%r34, %r4, 1536;
	set.ne.u32.s32 	%r35, %r31, %r32;
	neg.s32 	%r36, %r35;
	set.ne.u32.s32 	%r37, %r33, %r34;
	neg.s32 	%r38, %r37;
	or.b32 	%r39, %r36, %r38;
	mov.u32 	%r40, 0;
	setp.ne.s32 	%p16, %r39, %r40;
	@%p16 bra 	$Lt_39_80642;
	setp.eq.s32 	%p17, %r17, %r18;
	@%p17 bra 	$Lt_39_80898;
$Lt_39_80642:
	.loc	20	490	0
	mov.u32 	%r41, 0;
	setp.ne.s32 	%p18, %r31, %r41;
	@%p18 bra 	$Lt_39_226562;
	mov.s32 	%r42, 256;
	setp.eq.s32 	%p19, %r9, %r42;
	mov.u32 	%r43, 256;
	setp.ne.s32 	%p20, %r7, %r43;
	@%p20 bra 	$Lt_39_227074;
	.loc	20	137	0
	mov.s32 	%r44, 512;
	setp.eq.s32 	%p21, %r34, %r44;
	@!%p19 bra 	$Lt_39_144898;
	.loc	20	139	0
	@!%p21 bra 	$Lt_39_145154;
	.loc	20	141	0
	mov.u64 	%rd1, kRGB32f_To_709YPbPr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__217_65;
$Lt_39_145154:
	.loc	20	145	0
	mov.u64 	%rd1, kRGB32f_To_601YPbPr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__217_65;
$Lt_39_144898:
	.loc	20	150	0
	@!%p21 bra 	$Lt_39_145410;
	.loc	20	152	0
	mov.u64 	%rd1, kRGB32f_To_709YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__217_65;
$Lt_39_145410:
	.loc	20	154	0
	and.b32 	%r45, %r4, 2048;
	mov.u32 	%r46, 0;
	setp.ne.s32 	%p22, %r45, %r46;
	@%p22 bra 	$Lt_39_145666;
	.loc	20	156	0
	mov.u64 	%rd1, kRGB32f_To_601YCbCrFullRange;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__217_65;
$Lt_39_145666:
	.loc	20	160	0
	mov.u64 	%rd1, kRGB32f_To_601YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__217_65;
$Lt_39_227074:
	@!%p19 bra 	$Lt_39_227586;
	bra.uni 	$Lt_39_226306;
$Lt_39_227586:
	.loc	20	179	0
	mov.u32 	%r47, 512;
	setp.ne.s32 	%p23, %r34, %r47;
	@%p23 bra 	$Lt_39_146434;
	.loc	20	181	0
	mov.u64 	%rd1, kRGB8u_To_709YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__217_65;
$Lt_39_146434:
	.loc	20	183	0
	and.b32 	%r48, %r4, 2048;
	mov.u32 	%r49, 0;
	setp.ne.s32 	%p24, %r48, %r49;
	@%p24 bra 	$Lt_39_146690;
	.loc	20	185	0
	mov.u64 	%rd1, kRGB8u_To_601YCbCrFullRange;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__217_65;
$Lt_39_146690:
	.loc	20	189	0
	mov.u64 	%rd1, kRGB8u_To_601YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__217_65;
$Lt_39_226562:
	mov.s32 	%r50, 0;
	setp.eq.s32 	%p25, %r32, %r50;
	mov.u32 	%r51, 512;
	setp.ne.s32 	%p26, %r33, %r51;
	@%p26 bra 	$Lt_39_228098;
	mov.s32 	%r52, 256;
	setp.eq.s32 	%p27, %r7, %r52;
	@!%p25 bra 	$Lt_39_228610;
	mov.s32 	%r53, 256;
	setp.eq.s32 	%p28, %r9, %r53;
	@!%p27 bra 	$Lt_39_229122;
	@!%p28 bra 	$Lt_39_226306;
	.loc	20	202	0
	mov.u64 	%rd1, k709YPbPr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__217_65;
$Lt_39_229122:
	.loc	20	211	0
	@!%p28 bra 	$Lt_39_147970;
	.loc	20	213	0
	mov.u64 	%rd1, k709YCbCr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__217_65;
$Lt_39_147970:
	.loc	20	217	0
	mov.u64 	%rd1, k709YCbCr_To_RGB8u;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__217_65;
$Lt_39_228610:
	@!%p27 bra 	$Lt_39_230146;
	bra.uni 	$Lt_39_226306;
$Lt_39_230146:
	mov.s32 	%r54, 256;
	set.eq.u32.s32 	%r55, %r9, %r54;
	neg.s32 	%r56, %r55;
	and.b32 	%r57, %r4, 2048;
	mov.s32 	%r58, 0;
	set.eq.u32.s32 	%r59, %r57, %r58;
	neg.s32 	%r60, %r59;
	or.b32 	%r61, %r56, %r60;
	mov.u32 	%r62, 0;
	setp.eq.s32 	%p29, %r61, %r62;
	@%p29 bra 	$Lt_39_230658;
	bra.uni 	$Lt_39_226306;
$Lt_39_230658:
	.loc	20	250	0
	mov.u64 	%rd1, k709YCbCr_To_601YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__217_65;
$Lt_39_228098:
	and.b32 	%r63, %r2, 2048;
	mov.s32 	%r64, 0;
	setp.eq.s32 	%p30, %r63, %r64;
	@!%p30 bra 	$Lt_39_231170;
	@!%p25 bra 	$Lt_39_226306;
	.loc	20	259	0
	mov.u32 	%r65, 256;
	setp.ne.s32 	%p31, %r9, %r65;
	@%p31 bra 	$Lt_39_149506;
	.loc	20	261	0
	mov.u64 	%rd1, k601YCbCrFullRange_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__217_65;
$Lt_39_149506:
	.loc	20	265	0
	mov.u64 	%rd1, k601YCbCrFullRange_To_RGB8u;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__217_65;
$Lt_39_231170:
	mov.s32 	%r66, 256;
	setp.eq.s32 	%p27, %r7, %r66;
	@!%p25 bra 	$Lt_39_232194;
	mov.s32 	%r67, 256;
	setp.eq.s32 	%p32, %r9, %r67;
	@!%p27 bra 	$Lt_39_232706;
	@!%p32 bra 	$Lt_39_226306;
	.loc	20	302	0
	mov.u64 	%rd1, k601YPbPr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__217_65;
$Lt_39_232706:
	.loc	20	311	0
	@!%p32 bra 	$Lt_39_151298;
	.loc	20	313	0
	mov.u64 	%rd1, k601YCbCr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__217_65;
$Lt_39_151298:
	.loc	20	317	0
	mov.u64 	%rd1, k601YCbCr_To_RGB8u;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__217_65;
$Lt_39_232194:
	@!%p27 bra 	$Lt_39_233730;
	bra.uni 	$Lt_39_226306;
$Lt_39_233730:
	selp.s32 	%r68, 1, 0, %p30;
	mov.s32 	%r69, 256;
	set.eq.u32.s32 	%r70, %r9, %r69;
	neg.s32 	%r71, %r70;
	or.b32 	%r72, %r68, %r71;
	mov.u32 	%r73, 0;
	setp.eq.s32 	%p33, %r72, %r73;
	@%p33 bra 	$Lt_39_234242;
	bra.uni 	$Lt_39_226306;
$Lt_39_234242:
	.loc	20	350	0
	mov.u64 	%rd1, k601YCbCr_To_709YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__217_65;
$Lt_39_226306:
	.loc	20	355	0
	mov.u64 	%rd1, 0;
$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__217_65:
	.loc	20	490	0
	ld.global.f32 	%f52, [%rd1+16];
	mul.ftz.f32 	%f53, %f52, %f11;
	ld.global.f32 	%f54, [%rd1+12];
	fma.rn.ftz.f32 	%f55, %f54, %f10, %f53;
	ld.global.f32 	%f56, [%rd1+20];
	fma.rn.ftz.f32 	%f57, %f56, %f12, %f55;
	ld.global.f32 	%f58, [%rd1+28];
	mul.ftz.f32 	%f59, %f58, %f11;
	ld.global.f32 	%f60, [%rd1+24];
	fma.rn.ftz.f32 	%f61, %f60, %f10, %f59;
	ld.global.f32 	%f62, [%rd1+32];
	fma.rn.ftz.f32 	%f63, %f62, %f12, %f61;
	ld.global.f32 	%f64, [%rd1+4];
	mul.ftz.f32 	%f65, %f64, %f11;
	ld.global.f32 	%f66, [%rd1+0];
	fma.rn.ftz.f32 	%f67, %f66, %f10, %f65;
	ld.global.f32 	%f68, [%rd1+8];
	fma.rn.ftz.f32 	%f10, %f68, %f12, %f67;
	mov.f32 	%f11, %f57;
	mov.f32 	%f12, %f63;
	setp.eq.s32 	%p34, %r7, %r9;
	@%p34 bra 	$Lt_39_235010;
	.loc	20	494	0
	mov.s32 	%r74, 256;
	setp.eq.s32 	%p27, %r7, %r74;
	@!%p27 bra 	$L_39_220162;
	mov.s32 	%r75, 0;
	setp.eq.s32 	%p35, %r9, %r75;
	@%p35 bra 	$Lt_39_251394;
$L_39_220162:
	mov.s32 	%r76, 0;
	setp.eq.s32 	%p36, %r7, %r76;
	@!%p36 bra 	$Lt_39_251650;
	mov.u32 	%r77, 256;
	setp.ne.s32 	%p37, %r9, %r77;
	@%p37 bra 	$Lt_39_251650;
	mov.s32 	%r78, 0;
	setp.eq.s32 	%p35, %r9, %r78;
	bra.uni 	$L_39_219906;
$Lt_39_251394:
	mov.s32 	%r79, 0;
	setp.eq.s32 	%p36, %r7, %r79;
$L_39_219906:
	.loc	20	57	0
	@!%p35 bra 	$Lt_39_152834;
	.loc	20	59	0
	mov.f32 	%f69, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_63;
$Lt_39_152834:
	.loc	20	61	0
	mov.u32 	%r80, 64;
	setp.ne.s32 	%p38, %r9, %r80;
	@%p38 bra 	$Lt_39_153090;
	.loc	20	63	0
	mov.f32 	%f69, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_63;
$Lt_39_153090:
	.loc	20	65	0
	mov.u32 	%r81, 128;
	setp.ne.s32 	%p39, %r9, %r81;
	@%p39 bra 	$Lt_39_153346;
	.loc	20	68	0
	mov.f32 	%f69, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_63;
$Lt_39_153346:
	.loc	20	70	0
	mov.u32 	%r82, 192;
	setp.ne.s32 	%p40, %r9, %r82;
	@%p40 bra 	$Lt_39_153602;
	.loc	20	72	0
	mov.f32 	%f69, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_63;
$Lt_39_153602:
	.loc	20	76	0
	mov.f32 	%f69, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_63:
	.loc	20	57	0
	@!%p36 bra 	$Lt_39_153858;
	.loc	20	59	0
	mov.f32 	%f70, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_61;
$Lt_39_153858:
	.loc	20	61	0
	mov.u32 	%r83, 64;
	setp.ne.s32 	%p41, %r7, %r83;
	@%p41 bra 	$Lt_39_154114;
	.loc	20	63	0
	mov.f32 	%f70, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_61;
$Lt_39_154114:
	.loc	20	65	0
	mov.u32 	%r84, 128;
	setp.ne.s32 	%p42, %r7, %r84;
	@%p42 bra 	$Lt_39_154370;
	.loc	20	68	0
	mov.f32 	%f70, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_61;
$Lt_39_154370:
	.loc	20	70	0
	mov.u32 	%r85, 192;
	setp.ne.s32 	%p43, %r7, %r85;
	@%p43 bra 	$Lt_39_154626;
	.loc	20	72	0
	mov.f32 	%f70, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_61;
$Lt_39_154626:
	.loc	20	76	0
	mov.f32 	%f70, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_61:
	.loc	20	498	0
	div.approx.ftz.f32 	%f71, %f69, %f70;
	mul.ftz.f32 	%f9, %f9, %f71;
	bra.uni 	$Lt_39_235010;
$Lt_39_251650:
$L_39_219650:
	.loc	20	500	0
	@!%p27 bra 	$L_39_221186;
	@%p3 bra 	$L_39_220930;
$L_39_221186:
	@!%p2 bra 	$Lt_39_252674;
	mov.u32 	%r86, 256;
	setp.ne.s32 	%p44, %r9, %r86;
	@%p44 bra 	$Lt_39_252674;
$L_39_220930:
	.loc	20	57	0
	mov.u32 	%r87, 0;
	setp.ne.s32 	%p45, %r9, %r87;
	@%p45 bra 	$Lt_39_155138;
	.loc	20	59	0
	mov.f32 	%f69, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_59;
$Lt_39_155138:
	.loc	20	61	0
	mov.u32 	%r88, 64;
	setp.ne.s32 	%p46, %r9, %r88;
	@%p46 bra 	$Lt_39_155394;
	.loc	20	63	0
	mov.f32 	%f69, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_59;
$Lt_39_155394:
	.loc	20	65	0
	mov.u32 	%r89, 128;
	setp.ne.s32 	%p47, %r9, %r89;
	@%p47 bra 	$Lt_39_155650;
	.loc	20	68	0
	mov.f32 	%f69, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_59;
$Lt_39_155650:
	.loc	20	70	0
	mov.u32 	%r90, 192;
	setp.ne.s32 	%p48, %r9, %r90;
	@%p48 bra 	$Lt_39_155906;
	.loc	20	72	0
	mov.f32 	%f69, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_59;
$Lt_39_155906:
	.loc	20	76	0
	mov.f32 	%f69, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_59:
	.loc	20	57	0
	@!%p36 bra 	$Lt_39_156162;
	.loc	20	59	0
	mov.f32 	%f70, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_57;
$Lt_39_156162:
	.loc	20	61	0
	mov.u32 	%r91, 64;
	setp.ne.s32 	%p49, %r7, %r91;
	@%p49 bra 	$Lt_39_156418;
	.loc	20	63	0
	mov.f32 	%f70, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_57;
$Lt_39_156418:
	.loc	20	65	0
	mov.u32 	%r92, 128;
	setp.ne.s32 	%p50, %r7, %r92;
	@%p50 bra 	$Lt_39_156674;
	.loc	20	68	0
	mov.f32 	%f70, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_57;
$Lt_39_156674:
	.loc	20	70	0
	mov.u32 	%r93, 192;
	setp.ne.s32 	%p51, %r7, %r93;
	@%p51 bra 	$Lt_39_156930;
	.loc	20	72	0
	mov.f32 	%f70, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_57;
$Lt_39_156930:
	.loc	20	76	0
	mov.f32 	%f70, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_57:
	.loc	20	504	0
	div.approx.ftz.f32 	%f72, %f69, %f70;
	mul.ftz.f32 	%f9, %f9, %f72;
	.loc	20	57	0
	@!%p36 bra 	$Lt_39_158210;
	.loc	20	59	0
	mov.f32 	%f70, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_53;
$Lt_39_158210:
	.loc	20	61	0
	mov.u32 	%r94, 64;
	setp.ne.s32 	%p52, %r7, %r94;
	@%p52 bra 	$Lt_39_158466;
	.loc	20	63	0
	mov.f32 	%f70, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_53;
$Lt_39_158466:
	.loc	20	65	0
	mov.u32 	%r95, 128;
	setp.ne.s32 	%p53, %r7, %r95;
	@%p53 bra 	$Lt_39_158722;
	.loc	20	68	0
	mov.f32 	%f70, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_53;
$Lt_39_158722:
	.loc	20	70	0
	mov.u32 	%r96, 192;
	setp.ne.s32 	%p54, %r7, %r96;
	@%p54 bra 	$Lt_39_158978;
	.loc	20	72	0
	mov.f32 	%f70, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_53;
$Lt_39_158978:
	.loc	20	76	0
	mov.f32 	%f70, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_53:
	.loc	20	505	0
	mov.f32 	%f73, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f74, %f73, %f70;
	mul.ftz.f32 	%f10, %f10, %f74;
	.loc	20	57	0
	@!%p36 bra 	$Lt_39_160258;
	.loc	20	59	0
	mov.f32 	%f70, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_49;
$Lt_39_160258:
	.loc	20	61	0
	mov.u32 	%r97, 64;
	setp.ne.s32 	%p55, %r7, %r97;
	@%p55 bra 	$Lt_39_160514;
	.loc	20	63	0
	mov.f32 	%f70, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_49;
$Lt_39_160514:
	.loc	20	65	0
	mov.u32 	%r98, 128;
	setp.ne.s32 	%p56, %r7, %r98;
	@%p56 bra 	$Lt_39_160770;
	.loc	20	68	0
	mov.f32 	%f70, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_49;
$Lt_39_160770:
	.loc	20	70	0
	mov.u32 	%r99, 192;
	setp.ne.s32 	%p57, %r7, %r99;
	@%p57 bra 	$Lt_39_161026;
	.loc	20	72	0
	mov.f32 	%f70, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_49;
$Lt_39_161026:
	.loc	20	76	0
	mov.f32 	%f70, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_49:
	.loc	20	506	0
	mov.f32 	%f75, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f76, %f75, %f70;
	mul.ftz.f32 	%f11, %f57, %f76;
	.loc	20	57	0
	@!%p36 bra 	$Lt_39_162306;
	.loc	20	59	0
	mov.f32 	%f70, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_45;
$Lt_39_162306:
	.loc	20	61	0
	mov.u32 	%r100, 64;
	setp.ne.s32 	%p58, %r7, %r100;
	@%p58 bra 	$Lt_39_162562;
	.loc	20	63	0
	mov.f32 	%f70, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_45;
$Lt_39_162562:
	.loc	20	65	0
	mov.u32 	%r101, 128;
	setp.ne.s32 	%p59, %r7, %r101;
	@%p59 bra 	$Lt_39_162818;
	.loc	20	68	0
	mov.f32 	%f70, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_45;
$Lt_39_162818:
	.loc	20	70	0
	mov.u32 	%r102, 192;
	setp.ne.s32 	%p60, %r7, %r102;
	@%p60 bra 	$Lt_39_163074;
	.loc	20	72	0
	mov.f32 	%f70, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_45;
$Lt_39_163074:
	.loc	20	76	0
	mov.f32 	%f70, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_45:
	.loc	20	507	0
	mov.f32 	%f77, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f78, %f77, %f70;
	mul.ftz.f32 	%f12, %f63, %f78;
	bra.uni 	$Lt_39_235010;
$Lt_39_252674:
$L_39_220674:
	.loc	20	57	0
	@!%p36 bra 	$Lt_39_163330;
	.loc	20	59	0
	mov.f32 	%f69, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_43;
$Lt_39_163330:
	.loc	20	61	0
	mov.u32 	%r103, 64;
	setp.ne.s32 	%p61, %r7, %r103;
	@%p61 bra 	$Lt_39_163586;
	.loc	20	63	0
	mov.f32 	%f69, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_43;
$Lt_39_163586:
	.loc	20	65	0
	mov.u32 	%r104, 128;
	setp.ne.s32 	%p62, %r7, %r104;
	@%p62 bra 	$Lt_39_163842;
	.loc	20	68	0
	mov.f32 	%f69, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_43;
$Lt_39_163842:
	.loc	20	70	0
	mov.u32 	%r105, 192;
	setp.ne.s32 	%p63, %r7, %r105;
	@%p63 bra 	$Lt_39_164098;
	.loc	20	72	0
	mov.f32 	%f69, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_43;
$Lt_39_164098:
	.loc	20	76	0
	mov.f32 	%f69, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_43:
	.loc	20	511	0
	mov.f32 	%f79, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f80, %f69, %f79;
	mul.ftz.f32 	%f9, %f80, %f9;
	mul.ftz.f32 	%f10, %f80, %f10;
	mul.ftz.f32 	%f11, %f80, %f57;
	mul.ftz.f32 	%f12, %f80, %f63;
	bra.uni 	$Lt_39_235010;
$Lt_39_80898:
	.loc	20	486	0
	setp.eq.s32 	%p64, %r7, %r9;
	@%p64 bra 	$Lt_39_235010;
	.loc	20	57	0
	mov.u32 	%r106, 0;
	setp.ne.s32 	%p65, %r9, %r106;
	@%p65 bra 	$Lt_39_165634;
	.loc	20	59	0
	mov.f32 	%f69, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_39;
$Lt_39_165634:
	.loc	20	61	0
	mov.u32 	%r107, 64;
	setp.ne.s32 	%p66, %r9, %r107;
	@%p66 bra 	$Lt_39_165890;
	.loc	20	63	0
	mov.f32 	%f69, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_39;
$Lt_39_165890:
	.loc	20	65	0
	mov.u32 	%r108, 128;
	setp.ne.s32 	%p67, %r9, %r108;
	@%p67 bra 	$Lt_39_166146;
	.loc	20	68	0
	mov.f32 	%f69, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_39;
$Lt_39_166146:
	.loc	20	70	0
	mov.u32 	%r109, 192;
	setp.ne.s32 	%p68, %r9, %r109;
	@%p68 bra 	$Lt_39_166402;
	.loc	20	72	0
	mov.f32 	%f69, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_39;
$Lt_39_166402:
	.loc	20	76	0
	mov.f32 	%f69, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_39:
	.loc	20	57	0
	mov.u32 	%r110, 0;
	setp.ne.s32 	%p69, %r7, %r110;
	@%p69 bra 	$Lt_39_166658;
	.loc	20	59	0
	mov.f32 	%f70, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_37;
$Lt_39_166658:
	.loc	20	61	0
	mov.u32 	%r111, 64;
	setp.ne.s32 	%p70, %r7, %r111;
	@%p70 bra 	$Lt_39_166914;
	.loc	20	63	0
	mov.f32 	%f70, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_37;
$Lt_39_166914:
	.loc	20	65	0
	mov.u32 	%r112, 128;
	setp.ne.s32 	%p71, %r7, %r112;
	@%p71 bra 	$Lt_39_167170;
	.loc	20	68	0
	mov.f32 	%f70, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_37;
$Lt_39_167170:
	.loc	20	70	0
	mov.u32 	%r113, 192;
	setp.ne.s32 	%p72, %r7, %r113;
	@%p72 bra 	$Lt_39_167426;
	.loc	20	72	0
	mov.f32 	%f70, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_37;
$Lt_39_167426:
	.loc	20	76	0
	mov.f32 	%f70, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_37:
	.loc	20	517	0
	div.approx.ftz.f32 	%f81, %f69, %f70;
	mul.ftz.f32 	%f9, %f81, %f9;
	mul.ftz.f32 	%f10, %f81, %f10;
	mul.ftz.f32 	%f11, %f81, %f11;
	mul.ftz.f32 	%f12, %f81, %f12;
$Lt_39_235010:
$Lt_39_83202:
	.loc	20	520	0
	mov.u32 	%r114, 0;
	setp.eq.s32 	%p73, %r28, %r114;
	@%p73 bra 	$Lt_39_253186;
	mov.u32 	%r115, 0;
	setp.ne.s32 	%p74, %r27, %r115;
	@%p74 bra 	$Lt_39_253186;
	.loc	20	522	0
	mov.f32 	%f82, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p75, %f10, %f82;
	@!%p75 bra 	$Lt_39_235522;
	.loc	20	372	0
	neg.ftz.f32 	%f83, %f10;
	lg2.approx.ftz.f32 	%f84, %f83;
	mov.f32 	%f85, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f86, %f84, %f85;
	ex2.approx.ftz.f32 	%f87, %f86;
	neg.ftz.f32 	%f88, %f87;
	bra.uni 	$LDWendi___log2f_217_35;
$Lt_39_235522:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f89, %f10;
	mov.f32 	%f90, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f91, %f89, %f90;
	ex2.approx.ftz.f32 	%f88, %f91;
$LDWendi___log2f_217_35:
	.loc	20	522	0
	mov.f32 	%f92, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p76, %f11, %f92;
	@!%p76 bra 	$Lt_39_236034;
	.loc	20	372	0
	neg.ftz.f32 	%f93, %f11;
	lg2.approx.ftz.f32 	%f94, %f93;
	mov.f32 	%f95, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f96, %f94, %f95;
	ex2.approx.ftz.f32 	%f97, %f96;
	neg.ftz.f32 	%f98, %f97;
	bra.uni 	$LDWendi___log2f_217_33;
$Lt_39_236034:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f99, %f11;
	mov.f32 	%f100, 0f400e38e4;   	// 2.22222
	mul.ftz.f32 	%f101, %f99, %f100;
	ex2.approx.ftz.f32 	%f98, %f101;
$LDWendi___log2f_217_33:
	.loc	20	522	0
	mov.f32 	%f102, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p77, %f12, %f102;
	@!%p77 bra 	$Lt_39_236546;
	.loc	20	372	0
	neg.ftz.f32 	%f103, %f12;
	lg2.approx.ftz.f32 	%f104, %f103;
	mov.f32 	%f105, 0f400e38e4;   	// 2.22222
	mul.ftz.f32 	%f106, %f104, %f105;
	ex2.approx.ftz.f32 	%f107, %f106;
	neg.ftz.f32 	%f108, %f107;
	bra.uni 	$LDWendi___log2f_217_31;
$Lt_39_236546:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f109, %f12;
	mov.f32 	%f110, 0f400e38e4;   	// 2.22222
	mul.ftz.f32 	%f111, %f109, %f110;
	ex2.approx.ftz.f32 	%f108, %f111;
$LDWendi___log2f_217_31:
	.loc	20	522	0
	mov.f32 	%f10, %f88;
	mov.f32 	%f11, %f98;
	mov.f32 	%f12, %f108;
$Lt_39_253186:
$Lt_39_85250:
	.loc	20	525	0
	mov.u32 	%r116, 0;
	setp.eq.s32 	%p78, %r18, %r116;
	@%p78 bra 	$Lt_39_253698;
	mov.u32 	%r117, 0;
	setp.ne.s32 	%p79, %r17, %r117;
	@%p79 bra 	$Lt_39_253698;
	.loc	20	57	0
	mov.u32 	%r118, 0;
	setp.ne.s32 	%p80, %r9, %r118;
	@%p80 bra 	$Lt_39_168450;
	.loc	20	59	0
	mov.f32 	%f112, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_29;
$Lt_39_168450:
	.loc	20	61	0
	mov.u32 	%r119, 64;
	setp.ne.s32 	%p81, %r9, %r119;
	@%p81 bra 	$Lt_39_168706;
	.loc	20	63	0
	mov.f32 	%f112, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_29;
$Lt_39_168706:
	.loc	20	65	0
	mov.u32 	%r120, 128;
	setp.ne.s32 	%p82, %r9, %r120;
	@%p82 bra 	$Lt_39_168962;
	.loc	20	68	0
	mov.f32 	%f112, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_29;
$Lt_39_168962:
	.loc	20	70	0
	mov.u32 	%r121, 192;
	setp.ne.s32 	%p83, %r9, %r121;
	@%p83 bra 	$Lt_39_169218;
	.loc	20	72	0
	mov.f32 	%f112, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_29;
$Lt_39_169218:
	.loc	20	76	0
	mov.f32 	%f112, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_29:
	.loc	20	107	0
	and.b32 	%r122, %r4, 2048;
	mov.s32 	%r123, 0;
	setp.ne.s32 	%p84, %r122, %r123;
	@!%p84 bra 	$Lt_39_237314;
	.loc	20	100	0
	ld.const.f32 	%f113, [kYCbCrOffset+0];
	bra.uni 	$Lt_39_237058;
$Lt_39_237314:
	ld.const.f32 	%f113, [kYCbCrFullRangeOffset+0];
$Lt_39_237058:
	.loc	20	107	0
	@!%p84 bra 	$Lt_39_237826;
	.loc	20	100	0
	ld.const.f32 	%f114, [kYCbCrOffset+4];
	bra.uni 	$Lt_39_237570;
$Lt_39_237826:
	ld.const.f32 	%f114, [kYCbCrFullRangeOffset+4];
$Lt_39_237570:
	.loc	20	107	0
	@!%p84 bra 	$Lt_39_238338;
	.loc	20	100	0
	ld.const.f32 	%f115, [kYCbCrOffset+8];
	bra.uni 	$Lt_39_238082;
$Lt_39_238338:
	ld.const.f32 	%f115, [kYCbCrFullRangeOffset+8];
$Lt_39_238082:
	.loc	20	527	0
	mov.f32 	%f116, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f117, %f112, %f116;
	fma.rn.ftz.f32 	%f10, %f117, %f113, %f10;
	fma.rn.ftz.f32 	%f11, %f117, %f114, %f11;
	fma.rn.ftz.f32 	%f12, %f117, %f115, %f12;
$Lt_39_253698:
$Lt_39_91650:
	.loc	20	525	0
	and.b32 	%r124, %r2, 12;
	and.b32 	%r125, %r4, 12;
	setp.eq.s32 	%p85, %r124, %r125;
	@%p85 bra 	$Lt_39_239106;
	.loc	20	532	0
	mov.u32 	%r126, 8;
	setp.ne.s32 	%p86, %r124, %r126;
	@%p86 bra 	$L_39_222466;
	mov.u32 	%r127, 12;
	setp.eq.s32 	%p87, %r125, %r127;
	@%p87 bra 	$Lt_39_254466;
$L_39_222466:
	mov.u32 	%r128, 12;
	setp.eq.s32 	%p88, %r124, %r128;
	@%p88 bra 	$Lt_39_254466;
	mov.u32 	%r129, 0;
	setp.ne.s32 	%p89, %r124, %r129;
	@%p89 bra 	$L_39_221698;
$Lt_39_254466:
$L_39_221954:
	.loc	20	57	0
	mov.u32 	%r130, 0;
	setp.ne.s32 	%p90, %r9, %r130;
	@%p90 bra 	$Lt_39_171778;
	.loc	20	59	0
	mov.f32 	%f118, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_25;
$Lt_39_171778:
	.loc	20	61	0
	mov.u32 	%r131, 64;
	setp.ne.s32 	%p91, %r9, %r131;
	@%p91 bra 	$Lt_39_172034;
	.loc	20	63	0
	mov.f32 	%f118, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_25;
$Lt_39_172034:
	.loc	20	65	0
	mov.u32 	%r132, 128;
	setp.ne.s32 	%p92, %r9, %r132;
	@%p92 bra 	$Lt_39_172290;
	.loc	20	68	0
	mov.f32 	%f118, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_25;
$Lt_39_172290:
	.loc	20	70	0
	mov.u32 	%r133, 192;
	setp.ne.s32 	%p93, %r9, %r133;
	@%p93 bra 	$Lt_39_172546;
	.loc	20	72	0
	mov.f32 	%f118, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_25;
$Lt_39_172546:
	.loc	20	76	0
	mov.f32 	%f118, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_25:
	.loc	20	536	0
	mov.f32 	%f9, %f118;
	bra.uni 	$Lt_39_239106;
$L_39_221698:
	.loc	20	540	0
	mov.s32 	%r134, 12;
	setp.eq.s32 	%p94, %r125, %r134;
	mov.s32 	%r135, 4;
	set.eq.u32.s32 	%r136, %r124, %r135;
	neg.s32 	%r137, %r136;
	selp.s32 	%r138, 1, 0, %p94;
	mov.s32 	%r139, 8;
	set.eq.u32.s32 	%r140, %r125, %r139;
	neg.s32 	%r141, %r140;
	or.b32 	%r142, %r138, %r141;
	and.b32 	%r143, %r137, %r142;
	mov.u32 	%r144, 0;
	setp.eq.s32 	%p95, %r143, %r144;
	@%p95 bra 	$Lt_39_239362;
	.loc	20	410	0
	mov.f32 	%f119, %f10;
	mov.f32 	%f120, %f119;
	mov.f32 	%f121, %f11;
	mov.f32 	%f122, %f121;
	mov.f32 	%f123, %f12;
	mov.f32 	%f124, %f123;
	.loc	20	57	0
	mov.s32 	%r145, 0;
	setp.eq.s32 	%p35, %r9, %r145;
	@!%p35 bra 	$Lt_39_173058;
	.loc	20	59	0
	mov.f32 	%f125, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_23;
$Lt_39_173058:
	.loc	20	61	0
	mov.u32 	%r146, 64;
	setp.ne.s32 	%p96, %r9, %r146;
	@%p96 bra 	$Lt_39_173314;
	.loc	20	63	0
	mov.f32 	%f125, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_23;
$Lt_39_173314:
	.loc	20	65	0
	mov.u32 	%r147, 128;
	setp.ne.s32 	%p97, %r9, %r147;
	@%p97 bra 	$Lt_39_173570;
	.loc	20	68	0
	mov.f32 	%f125, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_23;
$Lt_39_173570:
	.loc	20	70	0
	mov.u32 	%r148, 192;
	setp.ne.s32 	%p98, %r9, %r148;
	@%p98 bra 	$Lt_39_173826;
	.loc	20	72	0
	mov.f32 	%f125, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_23;
$Lt_39_173826:
	.loc	20	76	0
	mov.f32 	%f125, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_23:
	.loc	20	413	0
	mov.u32 	%r149, 0;
	setp.eq.s32 	%p99, %r18, %r149;
	@%p99 bra 	$Lt_39_239618;
	.loc	20	57	0
	@!%p35 bra 	$Lt_39_174338;
	.loc	20	59	0
	mov.f32 	%f126, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_21;
$Lt_39_174338:
	.loc	20	61	0
	mov.u32 	%r150, 64;
	setp.ne.s32 	%p100, %r9, %r150;
	@%p100 bra 	$Lt_39_174594;
	.loc	20	63	0
	mov.f32 	%f126, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_21;
$Lt_39_174594:
	.loc	20	65	0
	mov.u32 	%r151, 128;
	setp.ne.s32 	%p101, %r9, %r151;
	@%p101 bra 	$Lt_39_174850;
	.loc	20	68	0
	mov.f32 	%f126, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_21;
$Lt_39_174850:
	.loc	20	70	0
	mov.u32 	%r152, 192;
	setp.ne.s32 	%p102, %r9, %r152;
	@%p102 bra 	$Lt_39_175106;
	.loc	20	72	0
	mov.f32 	%f126, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_21;
$Lt_39_175106:
	.loc	20	76	0
	mov.f32 	%f126, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_21:
	.loc	20	118	0
	and.b32 	%r122, %r4, 2048;
	mov.s32 	%r153, 0;
	setp.ne.s32 	%p84, %r122, %r153;
	@!%p84 bra 	$Lt_39_240386;
	.loc	20	100	0
	ld.const.f32 	%f127, [kYCbCrOffset+0];
	bra.uni 	$Lt_39_240130;
$Lt_39_240386:
	ld.const.f32 	%f127, [kYCbCrFullRangeOffset+0];
$Lt_39_240130:
	.loc	20	118	0
	@!%p84 bra 	$Lt_39_240898;
	.loc	20	100	0
	ld.const.f32 	%f128, [kYCbCrOffset+4];
	bra.uni 	$Lt_39_240642;
$Lt_39_240898:
	ld.const.f32 	%f128, [kYCbCrFullRangeOffset+4];
$Lt_39_240642:
	.loc	20	118	0
	@!%p84 bra 	$Lt_39_241410;
	.loc	20	100	0
	ld.const.f32 	%f129, [kYCbCrOffset+8];
	bra.uni 	$Lt_39_241154;
$Lt_39_241410:
	ld.const.f32 	%f129, [kYCbCrFullRangeOffset+8];
$Lt_39_241154:
	.loc	20	415	0
	mov.f32 	%f130, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f131, %f126, %f130;
	mul.ftz.f32 	%f132, %f131, %f127;
	sub.ftz.f32 	%f120, %f119, %f132;
	mul.ftz.f32 	%f133, %f131, %f128;
	sub.ftz.f32 	%f122, %f121, %f133;
	mul.ftz.f32 	%f134, %f131, %f129;
	sub.ftz.f32 	%f124, %f123, %f134;
$Lt_39_239618:
	.loc	20	418	0
	rcp.approx.ftz.f32 	%f135, %f125;
	mul.ftz.f32 	%f136, %f135, %f9;
	mul.ftz.f32 	%f120, %f136, %f120;
	.loc	20	419	0
	mul.ftz.f32 	%f122, %f136, %f122;
	.loc	20	420	0
	mul.ftz.f32 	%f124, %f136, %f124;
	.loc	20	422	0
	mov.u32 	%r154, 0;
	setp.eq.s32 	%p103, %r18, %r154;
	@%p103 bra 	$Lt_39_241666;
	.loc	20	57	0
	@!%p35 bra 	$Lt_39_177410;
	.loc	20	59	0
	mov.f32 	%f137, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_17;
$Lt_39_177410:
	.loc	20	61	0
	mov.u32 	%r155, 64;
	setp.ne.s32 	%p104, %r9, %r155;
	@%p104 bra 	$Lt_39_177666;
	.loc	20	63	0
	mov.f32 	%f137, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_17;
$Lt_39_177666:
	.loc	20	65	0
	mov.u32 	%r156, 128;
	setp.ne.s32 	%p105, %r9, %r156;
	@%p105 bra 	$Lt_39_177922;
	.loc	20	68	0
	mov.f32 	%f137, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_17;
$Lt_39_177922:
	.loc	20	70	0
	mov.u32 	%r157, 192;
	setp.ne.s32 	%p106, %r9, %r157;
	@%p106 bra 	$Lt_39_178178;
	.loc	20	72	0
	mov.f32 	%f137, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_17;
$Lt_39_178178:
	.loc	20	76	0
	mov.f32 	%f137, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_17:
	.loc	20	107	0
	and.b32 	%r122, %r4, 2048;
	mov.s32 	%r158, 0;
	setp.ne.s32 	%p84, %r122, %r158;
	@!%p84 bra 	$Lt_39_242434;
	.loc	20	100	0
	ld.const.f32 	%f138, [kYCbCrOffset+0];
	bra.uni 	$Lt_39_242178;
$Lt_39_242434:
	ld.const.f32 	%f138, [kYCbCrFullRangeOffset+0];
$Lt_39_242178:
	.loc	20	107	0
	@!%p84 bra 	$Lt_39_242946;
	.loc	20	100	0
	ld.const.f32 	%f139, [kYCbCrOffset+4];
	bra.uni 	$Lt_39_242690;
$Lt_39_242946:
	ld.const.f32 	%f139, [kYCbCrFullRangeOffset+4];
$Lt_39_242690:
	.loc	20	107	0
	@!%p84 bra 	$Lt_39_243458;
	.loc	20	100	0
	ld.const.f32 	%f140, [kYCbCrOffset+8];
	bra.uni 	$Lt_39_243202;
$Lt_39_243458:
	ld.const.f32 	%f140, [kYCbCrFullRangeOffset+8];
$Lt_39_243202:
	.loc	20	424	0
	mov.f32 	%f141, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f142, %f137, %f141;
	fma.rn.ftz.f32 	%f120, %f142, %f138, %f120;
	fma.rn.ftz.f32 	%f122, %f142, %f139, %f122;
	fma.rn.ftz.f32 	%f124, %f142, %f140, %f124;
$Lt_39_241666:
	.loc	20	543	0
	mov.f32 	%f10, %f120;
	mov.f32 	%f11, %f122;
	mov.f32 	%f12, %f124;
	@!%p94 bra 	$Lt_39_239106;
	.loc	20	57	0
	@!%p35 bra 	$Lt_39_180482;
	.loc	20	59	0
	mov.f32 	%f118, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_13;
$Lt_39_180482:
	.loc	20	61	0
	mov.u32 	%r159, 64;
	setp.ne.s32 	%p107, %r9, %r159;
	@%p107 bra 	$Lt_39_180738;
	.loc	20	63	0
	mov.f32 	%f118, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_13;
$Lt_39_180738:
	.loc	20	65	0
	mov.u32 	%r160, 128;
	setp.ne.s32 	%p108, %r9, %r160;
	@%p108 bra 	$Lt_39_180994;
	.loc	20	68	0
	mov.f32 	%f118, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_13;
$Lt_39_180994:
	.loc	20	70	0
	mov.u32 	%r161, 192;
	setp.ne.s32 	%p109, %r9, %r161;
	@%p109 bra 	$Lt_39_181250;
	.loc	20	72	0
	mov.f32 	%f118, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_13;
$Lt_39_181250:
	.loc	20	76	0
	mov.f32 	%f118, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_13:
	.loc	20	546	0
	mov.f32 	%f9, %f118;
	bra.uni 	$Lt_39_239106;
$Lt_39_239362:
	.loc	20	433	0
	mov.f32 	%f143, %f9;
	mov.f32 	%f144, %f10;
	mov.f32 	%f145, %f144;
	mov.f32 	%f146, %f11;
	mov.f32 	%f147, %f146;
	mov.f32 	%f148, %f12;
	mov.f32 	%f149, %f148;
	.loc	20	435	0
	mov.u32 	%r162, 0;
	setp.eq.s32 	%p110, %r18, %r162;
	@%p110 bra 	$Lt_39_244226;
	.loc	20	57	0
	mov.u32 	%r163, 0;
	setp.ne.s32 	%p111, %r9, %r163;
	@%p111 bra 	$Lt_39_181762;
	.loc	20	59	0
	mov.f32 	%f150, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_11;
$Lt_39_181762:
	.loc	20	61	0
	mov.u32 	%r164, 64;
	setp.ne.s32 	%p112, %r9, %r164;
	@%p112 bra 	$Lt_39_182018;
	.loc	20	63	0
	mov.f32 	%f150, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_11;
$Lt_39_182018:
	.loc	20	65	0
	mov.u32 	%r165, 128;
	setp.ne.s32 	%p113, %r9, %r165;
	@%p113 bra 	$Lt_39_182274;
	.loc	20	68	0
	mov.f32 	%f150, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_11;
$Lt_39_182274:
	.loc	20	70	0
	mov.u32 	%r166, 192;
	setp.ne.s32 	%p114, %r9, %r166;
	@%p114 bra 	$Lt_39_182530;
	.loc	20	72	0
	mov.f32 	%f150, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_11;
$Lt_39_182530:
	.loc	20	76	0
	mov.f32 	%f150, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_11:
	.loc	20	118	0
	and.b32 	%r122, %r4, 2048;
	mov.s32 	%r167, 0;
	setp.ne.s32 	%p84, %r122, %r167;
	@!%p84 bra 	$Lt_39_244994;
	.loc	20	100	0
	ld.const.f32 	%f151, [kYCbCrOffset+0];
	bra.uni 	$Lt_39_244738;
$Lt_39_244994:
	ld.const.f32 	%f151, [kYCbCrFullRangeOffset+0];
$Lt_39_244738:
	.loc	20	118	0
	@!%p84 bra 	$Lt_39_245506;
	.loc	20	100	0
	ld.const.f32 	%f152, [kYCbCrOffset+4];
	bra.uni 	$Lt_39_245250;
$Lt_39_245506:
	ld.const.f32 	%f152, [kYCbCrFullRangeOffset+4];
$Lt_39_245250:
	.loc	20	118	0
	@!%p84 bra 	$Lt_39_246018;
	.loc	20	100	0
	ld.const.f32 	%f153, [kYCbCrOffset+8];
	bra.uni 	$Lt_39_245762;
$Lt_39_246018:
	ld.const.f32 	%f153, [kYCbCrFullRangeOffset+8];
$Lt_39_245762:
	.loc	20	437	0
	mov.f32 	%f154, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f155, %f150, %f154;
	mul.ftz.f32 	%f156, %f155, %f151;
	sub.ftz.f32 	%f145, %f144, %f156;
	mul.ftz.f32 	%f157, %f155, %f152;
	sub.ftz.f32 	%f147, %f146, %f157;
	mul.ftz.f32 	%f158, %f155, %f153;
	sub.ftz.f32 	%f149, %f148, %f158;
$Lt_39_244226:
	mov.f32 	%f159, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f160, %f9, %f159;
	mov.f32 	%f161, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p115, %f160, %f161;
	@!%p115 bra 	$Lt_39_246530;
	mov.f32 	%f149, 0f00000000;   	// 0
	mov.f32 	%f147, 0f00000000;   	// 0
	mov.f32 	%f145, 0f00000000;   	// 0
	mov.f32 	%f143, 0f00000000;   	// 0
	bra.uni 	$Lt_39_246274;
$Lt_39_246530:
	.loc	20	57	0
	mov.u32 	%r168, 0;
	setp.ne.s32 	%p116, %r9, %r168;
	@%p116 bra 	$Lt_39_184834;
	.loc	20	59	0
	mov.f32 	%f162, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_7;
$Lt_39_184834:
	.loc	20	61	0
	mov.u32 	%r169, 64;
	setp.ne.s32 	%p117, %r9, %r169;
	@%p117 bra 	$Lt_39_185090;
	.loc	20	63	0
	mov.f32 	%f162, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_7;
$Lt_39_185090:
	.loc	20	65	0
	mov.u32 	%r170, 128;
	setp.ne.s32 	%p118, %r9, %r170;
	@%p118 bra 	$Lt_39_185346;
	.loc	20	68	0
	mov.f32 	%f162, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_7;
$Lt_39_185346:
	.loc	20	70	0
	mov.u32 	%r171, 192;
	setp.ne.s32 	%p119, %r9, %r171;
	@%p119 bra 	$Lt_39_185602;
	.loc	20	72	0
	mov.f32 	%f162, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_7;
$Lt_39_185602:
	.loc	20	76	0
	mov.f32 	%f162, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_7:
	.loc	20	447	0
	div.approx.ftz.f32 	%f163, %f162, %f9;
	mul.ftz.f32 	%f145, %f163, %f145;
	.loc	20	448	0
	mul.ftz.f32 	%f147, %f163, %f147;
	.loc	20	449	0
	mul.ftz.f32 	%f149, %f163, %f149;
$Lt_39_246274:
	.loc	20	452	0
	mov.u32 	%r172, 0;
	setp.eq.s32 	%p120, %r18, %r172;
	@%p120 bra 	$Lt_39_246786;
	.loc	20	57	0
	mov.u32 	%r173, 0;
	setp.ne.s32 	%p121, %r9, %r173;
	@%p121 bra 	$Lt_39_186114;
	.loc	20	59	0
	mov.f32 	%f164, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_5;
$Lt_39_186114:
	.loc	20	61	0
	mov.u32 	%r174, 64;
	setp.ne.s32 	%p122, %r9, %r174;
	@%p122 bra 	$Lt_39_186370;
	.loc	20	63	0
	mov.f32 	%f164, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_5;
$Lt_39_186370:
	.loc	20	65	0
	mov.u32 	%r175, 128;
	setp.ne.s32 	%p123, %r9, %r175;
	@%p123 bra 	$Lt_39_186626;
	.loc	20	68	0
	mov.f32 	%f164, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_5;
$Lt_39_186626:
	.loc	20	70	0
	mov.u32 	%r176, 192;
	setp.ne.s32 	%p124, %r9, %r176;
	@%p124 bra 	$Lt_39_186882;
	.loc	20	72	0
	mov.f32 	%f164, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_5;
$Lt_39_186882:
	.loc	20	76	0
	mov.f32 	%f164, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_5:
	.loc	20	107	0
	and.b32 	%r122, %r4, 2048;
	mov.s32 	%r177, 0;
	setp.ne.s32 	%p84, %r122, %r177;
	@!%p84 bra 	$Lt_39_247554;
	.loc	20	100	0
	ld.const.f32 	%f165, [kYCbCrOffset+0];
	bra.uni 	$Lt_39_247298;
$Lt_39_247554:
	ld.const.f32 	%f165, [kYCbCrFullRangeOffset+0];
$Lt_39_247298:
	.loc	20	107	0
	@!%p84 bra 	$Lt_39_248066;
	.loc	20	100	0
	ld.const.f32 	%f166, [kYCbCrOffset+4];
	bra.uni 	$Lt_39_247810;
$Lt_39_248066:
	ld.const.f32 	%f166, [kYCbCrFullRangeOffset+4];
$Lt_39_247810:
	.loc	20	107	0
	@!%p84 bra 	$Lt_39_248578;
	.loc	20	100	0
	ld.const.f32 	%f167, [kYCbCrOffset+8];
	bra.uni 	$Lt_39_248322;
$Lt_39_248578:
	ld.const.f32 	%f167, [kYCbCrFullRangeOffset+8];
$Lt_39_248322:
	.loc	20	454	0
	mov.f32 	%f168, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f169, %f164, %f168;
	fma.rn.ftz.f32 	%f145, %f169, %f165, %f145;
	fma.rn.ftz.f32 	%f147, %f169, %f166, %f147;
	fma.rn.ftz.f32 	%f149, %f169, %f167, %f149;
$Lt_39_246786:
	.loc	20	551	0
	mov.f32 	%f9, %f143;
	mov.f32 	%f10, %f145;
	mov.f32 	%f11, %f147;
	mov.f32 	%f12, %f149;
$Lt_39_239106:
$L_39_221442:
$Lt_39_238594:
	.loc	20	540	0
	and.b32 	%r178, %r4, 4096;
	mov.u32 	%r179, 0;
	setp.ne.s32 	%p125, %r178, %r179;
	@%p125 bra 	$Lt_39_248834;
	.loc	21	268	0
	mov.f32 	%f170, %f10;
	.loc	21	269	0
	mov.f32 	%f171, %f9;
	.loc	20	558	0
	mov.f32 	%f9, %f12;
	mov.f32 	%f10, %f11;
	mov.f32 	%f11, %f170;
	mov.f32 	%f12, %f171;
$Lt_39_248834:
	@!%p3 bra 	$Lt_39_249346;
	.loc	20	57	0
	mov.u32 	%r180, 0;
	setp.ne.s32 	%p126, %r9, %r180;
	@%p126 bra 	$Lt_39_189442;
	.loc	20	59	0
	mov.f32 	%f172, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_1;
$Lt_39_189442:
	.loc	20	61	0
	mov.u32 	%r181, 64;
	setp.ne.s32 	%p127, %r9, %r181;
	@%p127 bra 	$Lt_39_189698;
	.loc	20	63	0
	mov.f32 	%f172, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_1;
$Lt_39_189698:
	.loc	20	65	0
	mov.u32 	%r182, 128;
	setp.ne.s32 	%p128, %r9, %r182;
	@%p128 bra 	$Lt_39_189954;
	.loc	20	68	0
	mov.f32 	%f172, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_1;
$Lt_39_189954:
	.loc	20	70	0
	mov.u32 	%r183, 192;
	setp.ne.s32 	%p129, %r9, %r183;
	@%p129 bra 	$Lt_39_190210;
	.loc	20	72	0
	mov.f32 	%f172, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_217_1;
$Lt_39_190210:
	.loc	20	76	0
	mov.f32 	%f172, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_217_1:
	.loc	20	564	0
	mov.f32 	%f173, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f174, %f9, %f173;
	mov.f32 	%f175, 0f00000000;   	// 0
	mov.f32 	%f176, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p130, %f174, %f176;
	selp.f32 	%f177, %f174, %f175, %p130;
	min.ftz.f32 	%f9, %f177, %f172;
	mov.f32 	%f178, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f179, %f10, %f178;
	mov.f32 	%f180, 0f00000000;   	// 0
	mov.f32 	%f181, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p131, %f179, %f181;
	selp.f32 	%f182, %f179, %f180, %p131;
	min.ftz.f32 	%f10, %f182, %f172;
	mov.f32 	%f183, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f184, %f11, %f183;
	mov.f32 	%f185, 0f00000000;   	// 0
	mov.f32 	%f186, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p132, %f184, %f186;
	selp.f32 	%f187, %f184, %f185, %p132;
	min.ftz.f32 	%f11, %f187, %f172;
	mov.f32 	%f188, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f189, %f12, %f188;
	mov.f32 	%f190, 0f00000000;   	// 0
	mov.f32 	%f191, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p133, %f189, %f191;
	selp.f32 	%f192, %f189, %f190, %p133;
	min.ftz.f32 	%f12, %f192, %f172;
$Lt_39_249346:
	.loc	22	56	0
	cvt.rzi.ftz.u32.f32 	%r184, %f10;
	cvt.u8.u32 	%r185, %r184;
	cvt.rzi.ftz.u32.f32 	%r186, %f11;
	cvt.u8.u32 	%r187, %r186;
	cvt.rzi.ftz.u32.f32 	%r188, %f12;
	cvt.u8.u32 	%r189, %r188;
	cvt.rzi.ftz.u32.f32 	%r190, %f9;
	cvt.u8.u32 	%r191, %r190;
	st.param.u8 	[__cudaretf__Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_+0], %r191;
	mov.s32 	%r192, %r185;
	st.param.u8 	[__cudaretf__Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_+1], %r192;
	mov.s32 	%r193, %r187;
	st.param.u8 	[__cudaretf__Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_+2], %r193;
	mov.s32 	%r194, %r189;
	st.param.u8 	[__cudaretf__Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_+3], %r194;
	ret;
$LDWend__Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_:
	} // _Z30ConvertPixel_444_32f_To_444_8u6float414IR_PixelFormatS0_

	.visible .func (.param .align 8 .b8 __cudaretf__Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_[8]) _Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_ (.param .align 16 .b8 __cudaparmf1__Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_[16], .param .s32 __cudaparmf2__Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_, .param .s32 __cudaparmf3__Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_)
	{
	.reg .u32 %r<196>;
	.reg .u64 %rd<3>;
	.reg .f32 %f<194>;
	.reg .pred %p<135>;
	.loc	22	62	0
$LDWbegin__Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_:
	ld.param.f32 	%f1, [__cudaparmf1__Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_+12];
	mov.f32 	%f8, %f7;
	ld.param.u32 	%r1, [__cudaparmf2__Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf3__Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_];
	mov.s32 	%r4, %r3;
	.loc	20	469	0
	mov.f32 	%f9, %f2;
	mov.f32 	%f10, %f4;
	mov.f32 	%f11, %f6;
	mov.f32 	%f12, %f8;
	and.b32 	%r5, %r2, 4096;
	mov.u32 	%r6, 0;
	setp.ne.s32 	%p1, %r5, %r6;
	@%p1 bra 	$Lt_40_222722;
	.loc	20	473	0
	mov.f32 	%f9, %f8;
	mov.f32 	%f10, %f6;
	mov.f32 	%f11, %f4;
	mov.f32 	%f12, %f2;
$Lt_40_222722:
	.loc	20	476	0
	and.b32 	%r7, %r2, 448;
	mov.s32 	%r8, %r2;
	and.b32 	%r9, %r4, 448;
	mov.s32 	%r10, %r4;
	mov.s32 	%r11, 256;
	setp.ne.s32 	%p2, %r7, %r11;
	and.b32 	%r12, %r8, 1;
	mov.s32 	%r13, 256;
	setp.ne.s32 	%p3, %r9, %r13;
	and.b32 	%r14, %r10, 1;
	selp.s32 	%r15, 1, 0, %p2;
	selp.s32 	%r16, 1, 0, %p3;
	and.b32 	%r17, %r12, %r15;
	and.b32 	%r18, %r14, %r16;
	mov.u32 	%r19, 0;
	setp.eq.s32 	%p4, %r17, %r19;
	@%p4 bra 	$Lt_40_249858;
	mov.u32 	%r20, 0;
	setp.ne.s32 	%p5, %r18, %r20;
	@%p5 bra 	$Lt_40_249858;
	.loc	20	57	0
	mov.u32 	%r21, 0;
	setp.ne.s32 	%p6, %r7, %r21;
	@%p6 bra 	$Lt_40_140802;
	.loc	20	59	0
	mov.f32 	%f13, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_75;
$Lt_40_140802:
	.loc	20	61	0
	mov.u32 	%r22, 64;
	setp.ne.s32 	%p7, %r7, %r22;
	@%p7 bra 	$Lt_40_141058;
	.loc	20	63	0
	mov.f32 	%f13, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_75;
$Lt_40_141058:
	.loc	20	65	0
	mov.u32 	%r23, 128;
	setp.ne.s32 	%p8, %r7, %r23;
	@%p8 bra 	$Lt_40_141314;
	.loc	20	68	0
	mov.f32 	%f13, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_75;
$Lt_40_141314:
	.loc	20	70	0
	mov.u32 	%r24, 192;
	setp.ne.s32 	%p9, %r7, %r24;
	@%p9 bra 	$Lt_40_141570;
	.loc	20	72	0
	mov.f32 	%f13, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_75;
$Lt_40_141570:
	.loc	20	76	0
	mov.f32 	%f13, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_75:
	.loc	20	118	0
	and.b32 	%r25, %r2, 2048;
	mov.s32 	%r26, 0;
	setp.ne.s32 	%p10, %r25, %r26;
	@!%p10 bra 	$Lt_40_223490;
	.loc	20	100	0
	ld.const.f32 	%f14, [kYCbCrOffset+0];
	bra.uni 	$Lt_40_223234;
$Lt_40_223490:
	ld.const.f32 	%f14, [kYCbCrFullRangeOffset+0];
$Lt_40_223234:
	.loc	20	118	0
	@!%p10 bra 	$Lt_40_224002;
	.loc	20	100	0
	ld.const.f32 	%f15, [kYCbCrOffset+4];
	bra.uni 	$Lt_40_223746;
$Lt_40_224002:
	ld.const.f32 	%f15, [kYCbCrFullRangeOffset+4];
$Lt_40_223746:
	.loc	20	118	0
	@!%p10 bra 	$Lt_40_224514;
	.loc	20	100	0
	ld.const.f32 	%f16, [kYCbCrOffset+8];
	bra.uni 	$Lt_40_224258;
$Lt_40_224514:
	ld.const.f32 	%f16, [kYCbCrFullRangeOffset+8];
$Lt_40_224258:
	.loc	20	478	0
	mov.f32 	%f17, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f18, %f13, %f17;
	mul.ftz.f32 	%f19, %f18, %f14;
	sub.ftz.f32 	%f10, %f10, %f19;
	mul.ftz.f32 	%f20, %f18, %f15;
	sub.ftz.f32 	%f11, %f11, %f20;
	mul.ftz.f32 	%f21, %f18, %f16;
	sub.ftz.f32 	%f12, %f12, %f21;
$Lt_40_249858:
$Lt_40_26114:
	.loc	20	481	0
	and.b32 	%r27, %r2, 2;
	and.b32 	%r28, %r4, 2;
	mov.u32 	%r29, 0;
	setp.eq.s32 	%p11, %r27, %r29;
	@%p11 bra 	$Lt_40_250370;
	mov.u32 	%r30, 0;
	setp.ne.s32 	%p12, %r28, %r30;
	@%p12 bra 	$Lt_40_250370;
	.loc	20	483	0
	mov.f32 	%f22, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p13, %f10, %f22;
	@!%p13 bra 	$Lt_40_224770;
	.loc	20	372	0
	neg.ftz.f32 	%f23, %f10;
	lg2.approx.ftz.f32 	%f24, %f23;
	mov.f32 	%f25, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f26, %f24, %f25;
	ex2.approx.ftz.f32 	%f27, %f26;
	neg.ftz.f32 	%f28, %f27;
	bra.uni 	$LDWendi___log2f_218_71;
$Lt_40_224770:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f29, %f10;
	mov.f32 	%f30, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f31, %f29, %f30;
	ex2.approx.ftz.f32 	%f28, %f31;
$LDWendi___log2f_218_71:
	.loc	20	483	0
	mov.f32 	%f32, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p14, %f11, %f32;
	@!%p14 bra 	$Lt_40_225282;
	.loc	20	372	0
	neg.ftz.f32 	%f33, %f11;
	lg2.approx.ftz.f32 	%f34, %f33;
	mov.f32 	%f35, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f36, %f34, %f35;
	ex2.approx.ftz.f32 	%f37, %f36;
	neg.ftz.f32 	%f38, %f37;
	bra.uni 	$LDWendi___log2f_218_69;
$Lt_40_225282:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f39, %f11;
	mov.f32 	%f40, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f41, %f39, %f40;
	ex2.approx.ftz.f32 	%f38, %f41;
$LDWendi___log2f_218_69:
	.loc	20	483	0
	mov.f32 	%f42, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p15, %f12, %f42;
	@!%p15 bra 	$Lt_40_225794;
	.loc	20	372	0
	neg.ftz.f32 	%f43, %f12;
	lg2.approx.ftz.f32 	%f44, %f43;
	mov.f32 	%f45, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f46, %f44, %f45;
	ex2.approx.ftz.f32 	%f47, %f46;
	neg.ftz.f32 	%f48, %f47;
	bra.uni 	$LDWendi___log2f_218_67;
$Lt_40_225794:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f49, %f12;
	mov.f32 	%f50, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f51, %f49, %f50;
	ex2.approx.ftz.f32 	%f48, %f51;
$LDWendi___log2f_218_67:
	.loc	20	483	0
	mov.f32 	%f10, %f28;
	mov.f32 	%f11, %f38;
	mov.f32 	%f12, %f48;
$Lt_40_250370:
$Lt_40_29954:
	.loc	20	486	0
	and.b32 	%r31, %r2, 1;
	and.b32 	%r32, %r4, 1;
	and.b32 	%r33, %r2, 1536;
	and.b32 	%r34, %r4, 1536;
	set.ne.u32.s32 	%r35, %r31, %r32;
	neg.s32 	%r36, %r35;
	set.ne.u32.s32 	%r37, %r33, %r34;
	neg.s32 	%r38, %r37;
	or.b32 	%r39, %r36, %r38;
	mov.u32 	%r40, 0;
	setp.ne.s32 	%p16, %r39, %r40;
	@%p16 bra 	$Lt_40_80642;
	setp.eq.s32 	%p17, %r17, %r18;
	@%p17 bra 	$Lt_40_80898;
$Lt_40_80642:
	.loc	20	490	0
	mov.u32 	%r41, 0;
	setp.ne.s32 	%p18, %r31, %r41;
	@%p18 bra 	$Lt_40_226562;
	mov.s32 	%r42, 256;
	setp.eq.s32 	%p19, %r9, %r42;
	mov.u32 	%r43, 256;
	setp.ne.s32 	%p20, %r7, %r43;
	@%p20 bra 	$Lt_40_227074;
	.loc	20	137	0
	mov.s32 	%r44, 512;
	setp.eq.s32 	%p21, %r34, %r44;
	@!%p19 bra 	$Lt_40_144898;
	.loc	20	139	0
	@!%p21 bra 	$Lt_40_145154;
	.loc	20	141	0
	mov.u64 	%rd1, kRGB32f_To_709YPbPr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__218_65;
$Lt_40_145154:
	.loc	20	145	0
	mov.u64 	%rd1, kRGB32f_To_601YPbPr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__218_65;
$Lt_40_144898:
	.loc	20	150	0
	@!%p21 bra 	$Lt_40_145410;
	.loc	20	152	0
	mov.u64 	%rd1, kRGB32f_To_709YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__218_65;
$Lt_40_145410:
	.loc	20	154	0
	and.b32 	%r45, %r4, 2048;
	mov.u32 	%r46, 0;
	setp.ne.s32 	%p22, %r45, %r46;
	@%p22 bra 	$Lt_40_145666;
	.loc	20	156	0
	mov.u64 	%rd1, kRGB32f_To_601YCbCrFullRange;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__218_65;
$Lt_40_145666:
	.loc	20	160	0
	mov.u64 	%rd1, kRGB32f_To_601YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__218_65;
$Lt_40_227074:
	@!%p19 bra 	$Lt_40_227586;
	bra.uni 	$Lt_40_226306;
$Lt_40_227586:
	.loc	20	179	0
	mov.u32 	%r47, 512;
	setp.ne.s32 	%p23, %r34, %r47;
	@%p23 bra 	$Lt_40_146434;
	.loc	20	181	0
	mov.u64 	%rd1, kRGB8u_To_709YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__218_65;
$Lt_40_146434:
	.loc	20	183	0
	and.b32 	%r48, %r4, 2048;
	mov.u32 	%r49, 0;
	setp.ne.s32 	%p24, %r48, %r49;
	@%p24 bra 	$Lt_40_146690;
	.loc	20	185	0
	mov.u64 	%rd1, kRGB8u_To_601YCbCrFullRange;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__218_65;
$Lt_40_146690:
	.loc	20	189	0
	mov.u64 	%rd1, kRGB8u_To_601YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__218_65;
$Lt_40_226562:
	mov.s32 	%r50, 0;
	setp.eq.s32 	%p25, %r32, %r50;
	mov.u32 	%r51, 512;
	setp.ne.s32 	%p26, %r33, %r51;
	@%p26 bra 	$Lt_40_228098;
	mov.s32 	%r52, 256;
	setp.eq.s32 	%p27, %r7, %r52;
	@!%p25 bra 	$Lt_40_228610;
	mov.s32 	%r53, 256;
	setp.eq.s32 	%p28, %r9, %r53;
	@!%p27 bra 	$Lt_40_229122;
	@!%p28 bra 	$Lt_40_226306;
	.loc	20	202	0
	mov.u64 	%rd1, k709YPbPr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__218_65;
$Lt_40_229122:
	.loc	20	211	0
	@!%p28 bra 	$Lt_40_147970;
	.loc	20	213	0
	mov.u64 	%rd1, k709YCbCr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__218_65;
$Lt_40_147970:
	.loc	20	217	0
	mov.u64 	%rd1, k709YCbCr_To_RGB8u;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__218_65;
$Lt_40_228610:
	@!%p27 bra 	$Lt_40_230146;
	bra.uni 	$Lt_40_226306;
$Lt_40_230146:
	mov.s32 	%r54, 256;
	set.eq.u32.s32 	%r55, %r9, %r54;
	neg.s32 	%r56, %r55;
	and.b32 	%r57, %r4, 2048;
	mov.s32 	%r58, 0;
	set.eq.u32.s32 	%r59, %r57, %r58;
	neg.s32 	%r60, %r59;
	or.b32 	%r61, %r56, %r60;
	mov.u32 	%r62, 0;
	setp.eq.s32 	%p29, %r61, %r62;
	@%p29 bra 	$Lt_40_230658;
	bra.uni 	$Lt_40_226306;
$Lt_40_230658:
	.loc	20	250	0
	mov.u64 	%rd1, k709YCbCr_To_601YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__218_65;
$Lt_40_228098:
	and.b32 	%r63, %r2, 2048;
	mov.s32 	%r64, 0;
	setp.eq.s32 	%p30, %r63, %r64;
	@!%p30 bra 	$Lt_40_231170;
	@!%p25 bra 	$Lt_40_226306;
	.loc	20	259	0
	mov.u32 	%r65, 256;
	setp.ne.s32 	%p31, %r9, %r65;
	@%p31 bra 	$Lt_40_149506;
	.loc	20	261	0
	mov.u64 	%rd1, k601YCbCrFullRange_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__218_65;
$Lt_40_149506:
	.loc	20	265	0
	mov.u64 	%rd1, k601YCbCrFullRange_To_RGB8u;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__218_65;
$Lt_40_231170:
	mov.s32 	%r66, 256;
	setp.eq.s32 	%p27, %r7, %r66;
	@!%p25 bra 	$Lt_40_232194;
	mov.s32 	%r67, 256;
	setp.eq.s32 	%p32, %r9, %r67;
	@!%p27 bra 	$Lt_40_232706;
	@!%p32 bra 	$Lt_40_226306;
	.loc	20	302	0
	mov.u64 	%rd1, k601YPbPr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__218_65;
$Lt_40_232706:
	.loc	20	311	0
	@!%p32 bra 	$Lt_40_151298;
	.loc	20	313	0
	mov.u64 	%rd1, k601YCbCr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__218_65;
$Lt_40_151298:
	.loc	20	317	0
	mov.u64 	%rd1, k601YCbCr_To_RGB8u;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__218_65;
$Lt_40_232194:
	@!%p27 bra 	$Lt_40_233730;
	bra.uni 	$Lt_40_226306;
$Lt_40_233730:
	selp.s32 	%r68, 1, 0, %p30;
	mov.s32 	%r69, 256;
	set.eq.u32.s32 	%r70, %r9, %r69;
	neg.s32 	%r71, %r70;
	or.b32 	%r72, %r68, %r71;
	mov.u32 	%r73, 0;
	setp.eq.s32 	%p33, %r72, %r73;
	@%p33 bra 	$Lt_40_234242;
	bra.uni 	$Lt_40_226306;
$Lt_40_234242:
	.loc	20	350	0
	mov.u64 	%rd1, k601YCbCr_To_709YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__218_65;
$Lt_40_226306:
	.loc	20	355	0
	mov.u64 	%rd1, 0;
$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__218_65:
	.loc	20	490	0
	ld.global.f32 	%f52, [%rd1+16];
	mul.ftz.f32 	%f53, %f52, %f11;
	ld.global.f32 	%f54, [%rd1+12];
	fma.rn.ftz.f32 	%f55, %f54, %f10, %f53;
	ld.global.f32 	%f56, [%rd1+20];
	fma.rn.ftz.f32 	%f57, %f56, %f12, %f55;
	ld.global.f32 	%f58, [%rd1+28];
	mul.ftz.f32 	%f59, %f58, %f11;
	ld.global.f32 	%f60, [%rd1+24];
	fma.rn.ftz.f32 	%f61, %f60, %f10, %f59;
	ld.global.f32 	%f62, [%rd1+32];
	fma.rn.ftz.f32 	%f63, %f62, %f12, %f61;
	ld.global.f32 	%f64, [%rd1+4];
	mul.ftz.f32 	%f65, %f64, %f11;
	ld.global.f32 	%f66, [%rd1+0];
	fma.rn.ftz.f32 	%f67, %f66, %f10, %f65;
	ld.global.f32 	%f68, [%rd1+8];
	fma.rn.ftz.f32 	%f10, %f68, %f12, %f67;
	mov.f32 	%f11, %f57;
	mov.f32 	%f12, %f63;
	setp.eq.s32 	%p34, %r7, %r9;
	@%p34 bra 	$Lt_40_235010;
	.loc	20	494	0
	mov.s32 	%r74, 256;
	setp.eq.s32 	%p27, %r7, %r74;
	@!%p27 bra 	$L_40_220162;
	mov.s32 	%r75, 0;
	setp.eq.s32 	%p35, %r9, %r75;
	@%p35 bra 	$Lt_40_251394;
$L_40_220162:
	mov.s32 	%r76, 0;
	setp.eq.s32 	%p36, %r7, %r76;
	@!%p36 bra 	$Lt_40_251650;
	mov.u32 	%r77, 256;
	setp.ne.s32 	%p37, %r9, %r77;
	@%p37 bra 	$Lt_40_251650;
	mov.s32 	%r78, 0;
	setp.eq.s32 	%p35, %r9, %r78;
	bra.uni 	$L_40_219906;
$Lt_40_251394:
	mov.s32 	%r79, 0;
	setp.eq.s32 	%p36, %r7, %r79;
$L_40_219906:
	.loc	20	57	0
	@!%p35 bra 	$Lt_40_152834;
	.loc	20	59	0
	mov.f32 	%f69, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_63;
$Lt_40_152834:
	.loc	20	61	0
	mov.u32 	%r80, 64;
	setp.ne.s32 	%p38, %r9, %r80;
	@%p38 bra 	$Lt_40_153090;
	.loc	20	63	0
	mov.f32 	%f69, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_63;
$Lt_40_153090:
	.loc	20	65	0
	mov.u32 	%r81, 128;
	setp.ne.s32 	%p39, %r9, %r81;
	@%p39 bra 	$Lt_40_153346;
	.loc	20	68	0
	mov.f32 	%f69, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_63;
$Lt_40_153346:
	.loc	20	70	0
	mov.u32 	%r82, 192;
	setp.ne.s32 	%p40, %r9, %r82;
	@%p40 bra 	$Lt_40_153602;
	.loc	20	72	0
	mov.f32 	%f69, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_63;
$Lt_40_153602:
	.loc	20	76	0
	mov.f32 	%f69, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_63:
	.loc	20	57	0
	@!%p36 bra 	$Lt_40_153858;
	.loc	20	59	0
	mov.f32 	%f70, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_61;
$Lt_40_153858:
	.loc	20	61	0
	mov.u32 	%r83, 64;
	setp.ne.s32 	%p41, %r7, %r83;
	@%p41 bra 	$Lt_40_154114;
	.loc	20	63	0
	mov.f32 	%f70, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_61;
$Lt_40_154114:
	.loc	20	65	0
	mov.u32 	%r84, 128;
	setp.ne.s32 	%p42, %r7, %r84;
	@%p42 bra 	$Lt_40_154370;
	.loc	20	68	0
	mov.f32 	%f70, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_61;
$Lt_40_154370:
	.loc	20	70	0
	mov.u32 	%r85, 192;
	setp.ne.s32 	%p43, %r7, %r85;
	@%p43 bra 	$Lt_40_154626;
	.loc	20	72	0
	mov.f32 	%f70, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_61;
$Lt_40_154626:
	.loc	20	76	0
	mov.f32 	%f70, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_61:
	.loc	20	498	0
	div.approx.ftz.f32 	%f71, %f69, %f70;
	mul.ftz.f32 	%f9, %f9, %f71;
	bra.uni 	$Lt_40_235010;
$Lt_40_251650:
$L_40_219650:
	.loc	20	500	0
	@!%p27 bra 	$L_40_221186;
	@%p3 bra 	$L_40_220930;
$L_40_221186:
	@!%p2 bra 	$Lt_40_252674;
	mov.u32 	%r86, 256;
	setp.ne.s32 	%p44, %r9, %r86;
	@%p44 bra 	$Lt_40_252674;
$L_40_220930:
	.loc	20	57	0
	mov.u32 	%r87, 0;
	setp.ne.s32 	%p45, %r9, %r87;
	@%p45 bra 	$Lt_40_155138;
	.loc	20	59	0
	mov.f32 	%f69, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_59;
$Lt_40_155138:
	.loc	20	61	0
	mov.u32 	%r88, 64;
	setp.ne.s32 	%p46, %r9, %r88;
	@%p46 bra 	$Lt_40_155394;
	.loc	20	63	0
	mov.f32 	%f69, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_59;
$Lt_40_155394:
	.loc	20	65	0
	mov.u32 	%r89, 128;
	setp.ne.s32 	%p47, %r9, %r89;
	@%p47 bra 	$Lt_40_155650;
	.loc	20	68	0
	mov.f32 	%f69, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_59;
$Lt_40_155650:
	.loc	20	70	0
	mov.u32 	%r90, 192;
	setp.ne.s32 	%p48, %r9, %r90;
	@%p48 bra 	$Lt_40_155906;
	.loc	20	72	0
	mov.f32 	%f69, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_59;
$Lt_40_155906:
	.loc	20	76	0
	mov.f32 	%f69, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_59:
	.loc	20	57	0
	@!%p36 bra 	$Lt_40_156162;
	.loc	20	59	0
	mov.f32 	%f70, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_57;
$Lt_40_156162:
	.loc	20	61	0
	mov.u32 	%r91, 64;
	setp.ne.s32 	%p49, %r7, %r91;
	@%p49 bra 	$Lt_40_156418;
	.loc	20	63	0
	mov.f32 	%f70, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_57;
$Lt_40_156418:
	.loc	20	65	0
	mov.u32 	%r92, 128;
	setp.ne.s32 	%p50, %r7, %r92;
	@%p50 bra 	$Lt_40_156674;
	.loc	20	68	0
	mov.f32 	%f70, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_57;
$Lt_40_156674:
	.loc	20	70	0
	mov.u32 	%r93, 192;
	setp.ne.s32 	%p51, %r7, %r93;
	@%p51 bra 	$Lt_40_156930;
	.loc	20	72	0
	mov.f32 	%f70, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_57;
$Lt_40_156930:
	.loc	20	76	0
	mov.f32 	%f70, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_57:
	.loc	20	504	0
	div.approx.ftz.f32 	%f72, %f69, %f70;
	mul.ftz.f32 	%f9, %f9, %f72;
	.loc	20	57	0
	@!%p36 bra 	$Lt_40_158210;
	.loc	20	59	0
	mov.f32 	%f70, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_53;
$Lt_40_158210:
	.loc	20	61	0
	mov.u32 	%r94, 64;
	setp.ne.s32 	%p52, %r7, %r94;
	@%p52 bra 	$Lt_40_158466;
	.loc	20	63	0
	mov.f32 	%f70, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_53;
$Lt_40_158466:
	.loc	20	65	0
	mov.u32 	%r95, 128;
	setp.ne.s32 	%p53, %r7, %r95;
	@%p53 bra 	$Lt_40_158722;
	.loc	20	68	0
	mov.f32 	%f70, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_53;
$Lt_40_158722:
	.loc	20	70	0
	mov.u32 	%r96, 192;
	setp.ne.s32 	%p54, %r7, %r96;
	@%p54 bra 	$Lt_40_158978;
	.loc	20	72	0
	mov.f32 	%f70, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_53;
$Lt_40_158978:
	.loc	20	76	0
	mov.f32 	%f70, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_53:
	.loc	20	505	0
	mov.f32 	%f73, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f74, %f73, %f70;
	mul.ftz.f32 	%f10, %f10, %f74;
	.loc	20	57	0
	@!%p36 bra 	$Lt_40_160258;
	.loc	20	59	0
	mov.f32 	%f70, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_49;
$Lt_40_160258:
	.loc	20	61	0
	mov.u32 	%r97, 64;
	setp.ne.s32 	%p55, %r7, %r97;
	@%p55 bra 	$Lt_40_160514;
	.loc	20	63	0
	mov.f32 	%f70, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_49;
$Lt_40_160514:
	.loc	20	65	0
	mov.u32 	%r98, 128;
	setp.ne.s32 	%p56, %r7, %r98;
	@%p56 bra 	$Lt_40_160770;
	.loc	20	68	0
	mov.f32 	%f70, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_49;
$Lt_40_160770:
	.loc	20	70	0
	mov.u32 	%r99, 192;
	setp.ne.s32 	%p57, %r7, %r99;
	@%p57 bra 	$Lt_40_161026;
	.loc	20	72	0
	mov.f32 	%f70, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_49;
$Lt_40_161026:
	.loc	20	76	0
	mov.f32 	%f70, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_49:
	.loc	20	506	0
	mov.f32 	%f75, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f76, %f75, %f70;
	mul.ftz.f32 	%f11, %f57, %f76;
	.loc	20	57	0
	@!%p36 bra 	$Lt_40_162306;
	.loc	20	59	0
	mov.f32 	%f70, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_45;
$Lt_40_162306:
	.loc	20	61	0
	mov.u32 	%r100, 64;
	setp.ne.s32 	%p58, %r7, %r100;
	@%p58 bra 	$Lt_40_162562;
	.loc	20	63	0
	mov.f32 	%f70, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_45;
$Lt_40_162562:
	.loc	20	65	0
	mov.u32 	%r101, 128;
	setp.ne.s32 	%p59, %r7, %r101;
	@%p59 bra 	$Lt_40_162818;
	.loc	20	68	0
	mov.f32 	%f70, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_45;
$Lt_40_162818:
	.loc	20	70	0
	mov.u32 	%r102, 192;
	setp.ne.s32 	%p60, %r7, %r102;
	@%p60 bra 	$Lt_40_163074;
	.loc	20	72	0
	mov.f32 	%f70, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_45;
$Lt_40_163074:
	.loc	20	76	0
	mov.f32 	%f70, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_45:
	.loc	20	507	0
	mov.f32 	%f77, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f78, %f77, %f70;
	mul.ftz.f32 	%f12, %f63, %f78;
	bra.uni 	$Lt_40_235010;
$Lt_40_252674:
$L_40_220674:
	.loc	20	57	0
	@!%p36 bra 	$Lt_40_163330;
	.loc	20	59	0
	mov.f32 	%f69, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_43;
$Lt_40_163330:
	.loc	20	61	0
	mov.u32 	%r103, 64;
	setp.ne.s32 	%p61, %r7, %r103;
	@%p61 bra 	$Lt_40_163586;
	.loc	20	63	0
	mov.f32 	%f69, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_43;
$Lt_40_163586:
	.loc	20	65	0
	mov.u32 	%r104, 128;
	setp.ne.s32 	%p62, %r7, %r104;
	@%p62 bra 	$Lt_40_163842;
	.loc	20	68	0
	mov.f32 	%f69, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_43;
$Lt_40_163842:
	.loc	20	70	0
	mov.u32 	%r105, 192;
	setp.ne.s32 	%p63, %r7, %r105;
	@%p63 bra 	$Lt_40_164098;
	.loc	20	72	0
	mov.f32 	%f69, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_43;
$Lt_40_164098:
	.loc	20	76	0
	mov.f32 	%f69, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_43:
	.loc	20	511	0
	mov.f32 	%f79, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f80, %f69, %f79;
	mul.ftz.f32 	%f9, %f80, %f9;
	mul.ftz.f32 	%f10, %f80, %f10;
	mul.ftz.f32 	%f11, %f80, %f57;
	mul.ftz.f32 	%f12, %f80, %f63;
	bra.uni 	$Lt_40_235010;
$Lt_40_80898:
	.loc	20	486	0
	setp.eq.s32 	%p64, %r7, %r9;
	@%p64 bra 	$Lt_40_235010;
	.loc	20	57	0
	mov.u32 	%r106, 0;
	setp.ne.s32 	%p65, %r9, %r106;
	@%p65 bra 	$Lt_40_165634;
	.loc	20	59	0
	mov.f32 	%f69, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_39;
$Lt_40_165634:
	.loc	20	61	0
	mov.u32 	%r107, 64;
	setp.ne.s32 	%p66, %r9, %r107;
	@%p66 bra 	$Lt_40_165890;
	.loc	20	63	0
	mov.f32 	%f69, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_39;
$Lt_40_165890:
	.loc	20	65	0
	mov.u32 	%r108, 128;
	setp.ne.s32 	%p67, %r9, %r108;
	@%p67 bra 	$Lt_40_166146;
	.loc	20	68	0
	mov.f32 	%f69, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_39;
$Lt_40_166146:
	.loc	20	70	0
	mov.u32 	%r109, 192;
	setp.ne.s32 	%p68, %r9, %r109;
	@%p68 bra 	$Lt_40_166402;
	.loc	20	72	0
	mov.f32 	%f69, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_39;
$Lt_40_166402:
	.loc	20	76	0
	mov.f32 	%f69, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_39:
	.loc	20	57	0
	mov.u32 	%r110, 0;
	setp.ne.s32 	%p69, %r7, %r110;
	@%p69 bra 	$Lt_40_166658;
	.loc	20	59	0
	mov.f32 	%f70, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_37;
$Lt_40_166658:
	.loc	20	61	0
	mov.u32 	%r111, 64;
	setp.ne.s32 	%p70, %r7, %r111;
	@%p70 bra 	$Lt_40_166914;
	.loc	20	63	0
	mov.f32 	%f70, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_37;
$Lt_40_166914:
	.loc	20	65	0
	mov.u32 	%r112, 128;
	setp.ne.s32 	%p71, %r7, %r112;
	@%p71 bra 	$Lt_40_167170;
	.loc	20	68	0
	mov.f32 	%f70, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_37;
$Lt_40_167170:
	.loc	20	70	0
	mov.u32 	%r113, 192;
	setp.ne.s32 	%p72, %r7, %r113;
	@%p72 bra 	$Lt_40_167426;
	.loc	20	72	0
	mov.f32 	%f70, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_37;
$Lt_40_167426:
	.loc	20	76	0
	mov.f32 	%f70, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_37:
	.loc	20	517	0
	div.approx.ftz.f32 	%f81, %f69, %f70;
	mul.ftz.f32 	%f9, %f81, %f9;
	mul.ftz.f32 	%f10, %f81, %f10;
	mul.ftz.f32 	%f11, %f81, %f11;
	mul.ftz.f32 	%f12, %f81, %f12;
$Lt_40_235010:
$Lt_40_83202:
	.loc	20	520	0
	mov.u32 	%r114, 0;
	setp.eq.s32 	%p73, %r28, %r114;
	@%p73 bra 	$Lt_40_253186;
	mov.u32 	%r115, 0;
	setp.ne.s32 	%p74, %r27, %r115;
	@%p74 bra 	$Lt_40_253186;
	.loc	20	522	0
	mov.f32 	%f82, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p75, %f10, %f82;
	@!%p75 bra 	$Lt_40_235522;
	.loc	20	372	0
	neg.ftz.f32 	%f83, %f10;
	lg2.approx.ftz.f32 	%f84, %f83;
	mov.f32 	%f85, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f86, %f84, %f85;
	ex2.approx.ftz.f32 	%f87, %f86;
	neg.ftz.f32 	%f88, %f87;
	bra.uni 	$LDWendi___log2f_218_35;
$Lt_40_235522:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f89, %f10;
	mov.f32 	%f90, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f91, %f89, %f90;
	ex2.approx.ftz.f32 	%f88, %f91;
$LDWendi___log2f_218_35:
	.loc	20	522	0
	mov.f32 	%f92, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p76, %f11, %f92;
	@!%p76 bra 	$Lt_40_236034;
	.loc	20	372	0
	neg.ftz.f32 	%f93, %f11;
	lg2.approx.ftz.f32 	%f94, %f93;
	mov.f32 	%f95, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f96, %f94, %f95;
	ex2.approx.ftz.f32 	%f97, %f96;
	neg.ftz.f32 	%f98, %f97;
	bra.uni 	$LDWendi___log2f_218_33;
$Lt_40_236034:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f99, %f11;
	mov.f32 	%f100, 0f400e38e4;   	// 2.22222
	mul.ftz.f32 	%f101, %f99, %f100;
	ex2.approx.ftz.f32 	%f98, %f101;
$LDWendi___log2f_218_33:
	.loc	20	522	0
	mov.f32 	%f102, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p77, %f12, %f102;
	@!%p77 bra 	$Lt_40_236546;
	.loc	20	372	0
	neg.ftz.f32 	%f103, %f12;
	lg2.approx.ftz.f32 	%f104, %f103;
	mov.f32 	%f105, 0f400e38e4;   	// 2.22222
	mul.ftz.f32 	%f106, %f104, %f105;
	ex2.approx.ftz.f32 	%f107, %f106;
	neg.ftz.f32 	%f108, %f107;
	bra.uni 	$LDWendi___log2f_218_31;
$Lt_40_236546:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f109, %f12;
	mov.f32 	%f110, 0f400e38e4;   	// 2.22222
	mul.ftz.f32 	%f111, %f109, %f110;
	ex2.approx.ftz.f32 	%f108, %f111;
$LDWendi___log2f_218_31:
	.loc	20	522	0
	mov.f32 	%f10, %f88;
	mov.f32 	%f11, %f98;
	mov.f32 	%f12, %f108;
$Lt_40_253186:
$Lt_40_85250:
	.loc	20	525	0
	mov.u32 	%r116, 0;
	setp.eq.s32 	%p78, %r18, %r116;
	@%p78 bra 	$Lt_40_253698;
	mov.u32 	%r117, 0;
	setp.ne.s32 	%p79, %r17, %r117;
	@%p79 bra 	$Lt_40_253698;
	.loc	20	57	0
	mov.u32 	%r118, 0;
	setp.ne.s32 	%p80, %r9, %r118;
	@%p80 bra 	$Lt_40_168450;
	.loc	20	59	0
	mov.f32 	%f112, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_29;
$Lt_40_168450:
	.loc	20	61	0
	mov.u32 	%r119, 64;
	setp.ne.s32 	%p81, %r9, %r119;
	@%p81 bra 	$Lt_40_168706;
	.loc	20	63	0
	mov.f32 	%f112, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_29;
$Lt_40_168706:
	.loc	20	65	0
	mov.u32 	%r120, 128;
	setp.ne.s32 	%p82, %r9, %r120;
	@%p82 bra 	$Lt_40_168962;
	.loc	20	68	0
	mov.f32 	%f112, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_29;
$Lt_40_168962:
	.loc	20	70	0
	mov.u32 	%r121, 192;
	setp.ne.s32 	%p83, %r9, %r121;
	@%p83 bra 	$Lt_40_169218;
	.loc	20	72	0
	mov.f32 	%f112, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_29;
$Lt_40_169218:
	.loc	20	76	0
	mov.f32 	%f112, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_29:
	.loc	20	107	0
	and.b32 	%r122, %r4, 2048;
	mov.s32 	%r123, 0;
	setp.ne.s32 	%p84, %r122, %r123;
	@!%p84 bra 	$Lt_40_237314;
	.loc	20	100	0
	ld.const.f32 	%f113, [kYCbCrOffset+0];
	bra.uni 	$Lt_40_237058;
$Lt_40_237314:
	ld.const.f32 	%f113, [kYCbCrFullRangeOffset+0];
$Lt_40_237058:
	.loc	20	107	0
	@!%p84 bra 	$Lt_40_237826;
	.loc	20	100	0
	ld.const.f32 	%f114, [kYCbCrOffset+4];
	bra.uni 	$Lt_40_237570;
$Lt_40_237826:
	ld.const.f32 	%f114, [kYCbCrFullRangeOffset+4];
$Lt_40_237570:
	.loc	20	107	0
	@!%p84 bra 	$Lt_40_238338;
	.loc	20	100	0
	ld.const.f32 	%f115, [kYCbCrOffset+8];
	bra.uni 	$Lt_40_238082;
$Lt_40_238338:
	ld.const.f32 	%f115, [kYCbCrFullRangeOffset+8];
$Lt_40_238082:
	.loc	20	527	0
	mov.f32 	%f116, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f117, %f112, %f116;
	fma.rn.ftz.f32 	%f10, %f117, %f113, %f10;
	fma.rn.ftz.f32 	%f11, %f117, %f114, %f11;
	fma.rn.ftz.f32 	%f12, %f117, %f115, %f12;
$Lt_40_253698:
$Lt_40_91650:
	.loc	20	525	0
	and.b32 	%r124, %r2, 12;
	and.b32 	%r125, %r4, 12;
	setp.eq.s32 	%p85, %r124, %r125;
	@%p85 bra 	$Lt_40_239106;
	.loc	20	532	0
	mov.u32 	%r126, 8;
	setp.ne.s32 	%p86, %r124, %r126;
	@%p86 bra 	$L_40_222466;
	mov.u32 	%r127, 12;
	setp.eq.s32 	%p87, %r125, %r127;
	@%p87 bra 	$Lt_40_254466;
$L_40_222466:
	mov.u32 	%r128, 12;
	setp.eq.s32 	%p88, %r124, %r128;
	@%p88 bra 	$Lt_40_254466;
	mov.u32 	%r129, 0;
	setp.ne.s32 	%p89, %r124, %r129;
	@%p89 bra 	$L_40_221698;
$Lt_40_254466:
$L_40_221954:
	.loc	20	57	0
	mov.u32 	%r130, 0;
	setp.ne.s32 	%p90, %r9, %r130;
	@%p90 bra 	$Lt_40_171778;
	.loc	20	59	0
	mov.f32 	%f118, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_25;
$Lt_40_171778:
	.loc	20	61	0
	mov.u32 	%r131, 64;
	setp.ne.s32 	%p91, %r9, %r131;
	@%p91 bra 	$Lt_40_172034;
	.loc	20	63	0
	mov.f32 	%f118, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_25;
$Lt_40_172034:
	.loc	20	65	0
	mov.u32 	%r132, 128;
	setp.ne.s32 	%p92, %r9, %r132;
	@%p92 bra 	$Lt_40_172290;
	.loc	20	68	0
	mov.f32 	%f118, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_25;
$Lt_40_172290:
	.loc	20	70	0
	mov.u32 	%r133, 192;
	setp.ne.s32 	%p93, %r9, %r133;
	@%p93 bra 	$Lt_40_172546;
	.loc	20	72	0
	mov.f32 	%f118, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_25;
$Lt_40_172546:
	.loc	20	76	0
	mov.f32 	%f118, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_25:
	.loc	20	536	0
	mov.f32 	%f9, %f118;
	bra.uni 	$Lt_40_239106;
$L_40_221698:
	.loc	20	540	0
	mov.s32 	%r134, 12;
	setp.eq.s32 	%p94, %r125, %r134;
	mov.s32 	%r135, 4;
	set.eq.u32.s32 	%r136, %r124, %r135;
	neg.s32 	%r137, %r136;
	selp.s32 	%r138, 1, 0, %p94;
	mov.s32 	%r139, 8;
	set.eq.u32.s32 	%r140, %r125, %r139;
	neg.s32 	%r141, %r140;
	or.b32 	%r142, %r138, %r141;
	and.b32 	%r143, %r137, %r142;
	mov.u32 	%r144, 0;
	setp.eq.s32 	%p95, %r143, %r144;
	@%p95 bra 	$Lt_40_239362;
	.loc	20	410	0
	mov.f32 	%f119, %f10;
	mov.f32 	%f120, %f119;
	mov.f32 	%f121, %f11;
	mov.f32 	%f122, %f121;
	mov.f32 	%f123, %f12;
	mov.f32 	%f124, %f123;
	.loc	20	57	0
	mov.s32 	%r145, 0;
	setp.eq.s32 	%p35, %r9, %r145;
	@!%p35 bra 	$Lt_40_173058;
	.loc	20	59	0
	mov.f32 	%f125, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_23;
$Lt_40_173058:
	.loc	20	61	0
	mov.u32 	%r146, 64;
	setp.ne.s32 	%p96, %r9, %r146;
	@%p96 bra 	$Lt_40_173314;
	.loc	20	63	0
	mov.f32 	%f125, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_23;
$Lt_40_173314:
	.loc	20	65	0
	mov.u32 	%r147, 128;
	setp.ne.s32 	%p97, %r9, %r147;
	@%p97 bra 	$Lt_40_173570;
	.loc	20	68	0
	mov.f32 	%f125, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_23;
$Lt_40_173570:
	.loc	20	70	0
	mov.u32 	%r148, 192;
	setp.ne.s32 	%p98, %r9, %r148;
	@%p98 bra 	$Lt_40_173826;
	.loc	20	72	0
	mov.f32 	%f125, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_23;
$Lt_40_173826:
	.loc	20	76	0
	mov.f32 	%f125, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_23:
	.loc	20	413	0
	mov.u32 	%r149, 0;
	setp.eq.s32 	%p99, %r18, %r149;
	@%p99 bra 	$Lt_40_239618;
	.loc	20	57	0
	@!%p35 bra 	$Lt_40_174338;
	.loc	20	59	0
	mov.f32 	%f126, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_21;
$Lt_40_174338:
	.loc	20	61	0
	mov.u32 	%r150, 64;
	setp.ne.s32 	%p100, %r9, %r150;
	@%p100 bra 	$Lt_40_174594;
	.loc	20	63	0
	mov.f32 	%f126, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_21;
$Lt_40_174594:
	.loc	20	65	0
	mov.u32 	%r151, 128;
	setp.ne.s32 	%p101, %r9, %r151;
	@%p101 bra 	$Lt_40_174850;
	.loc	20	68	0
	mov.f32 	%f126, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_21;
$Lt_40_174850:
	.loc	20	70	0
	mov.u32 	%r152, 192;
	setp.ne.s32 	%p102, %r9, %r152;
	@%p102 bra 	$Lt_40_175106;
	.loc	20	72	0
	mov.f32 	%f126, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_21;
$Lt_40_175106:
	.loc	20	76	0
	mov.f32 	%f126, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_21:
	.loc	20	118	0
	and.b32 	%r122, %r4, 2048;
	mov.s32 	%r153, 0;
	setp.ne.s32 	%p84, %r122, %r153;
	@!%p84 bra 	$Lt_40_240386;
	.loc	20	100	0
	ld.const.f32 	%f127, [kYCbCrOffset+0];
	bra.uni 	$Lt_40_240130;
$Lt_40_240386:
	ld.const.f32 	%f127, [kYCbCrFullRangeOffset+0];
$Lt_40_240130:
	.loc	20	118	0
	@!%p84 bra 	$Lt_40_240898;
	.loc	20	100	0
	ld.const.f32 	%f128, [kYCbCrOffset+4];
	bra.uni 	$Lt_40_240642;
$Lt_40_240898:
	ld.const.f32 	%f128, [kYCbCrFullRangeOffset+4];
$Lt_40_240642:
	.loc	20	118	0
	@!%p84 bra 	$Lt_40_241410;
	.loc	20	100	0
	ld.const.f32 	%f129, [kYCbCrOffset+8];
	bra.uni 	$Lt_40_241154;
$Lt_40_241410:
	ld.const.f32 	%f129, [kYCbCrFullRangeOffset+8];
$Lt_40_241154:
	.loc	20	415	0
	mov.f32 	%f130, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f131, %f126, %f130;
	mul.ftz.f32 	%f132, %f131, %f127;
	sub.ftz.f32 	%f120, %f119, %f132;
	mul.ftz.f32 	%f133, %f131, %f128;
	sub.ftz.f32 	%f122, %f121, %f133;
	mul.ftz.f32 	%f134, %f131, %f129;
	sub.ftz.f32 	%f124, %f123, %f134;
$Lt_40_239618:
	.loc	20	418	0
	rcp.approx.ftz.f32 	%f135, %f125;
	mul.ftz.f32 	%f136, %f135, %f9;
	mul.ftz.f32 	%f120, %f136, %f120;
	.loc	20	419	0
	mul.ftz.f32 	%f122, %f136, %f122;
	.loc	20	420	0
	mul.ftz.f32 	%f124, %f136, %f124;
	.loc	20	422	0
	mov.u32 	%r154, 0;
	setp.eq.s32 	%p103, %r18, %r154;
	@%p103 bra 	$Lt_40_241666;
	.loc	20	57	0
	@!%p35 bra 	$Lt_40_177410;
	.loc	20	59	0
	mov.f32 	%f137, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_17;
$Lt_40_177410:
	.loc	20	61	0
	mov.u32 	%r155, 64;
	setp.ne.s32 	%p104, %r9, %r155;
	@%p104 bra 	$Lt_40_177666;
	.loc	20	63	0
	mov.f32 	%f137, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_17;
$Lt_40_177666:
	.loc	20	65	0
	mov.u32 	%r156, 128;
	setp.ne.s32 	%p105, %r9, %r156;
	@%p105 bra 	$Lt_40_177922;
	.loc	20	68	0
	mov.f32 	%f137, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_17;
$Lt_40_177922:
	.loc	20	70	0
	mov.u32 	%r157, 192;
	setp.ne.s32 	%p106, %r9, %r157;
	@%p106 bra 	$Lt_40_178178;
	.loc	20	72	0
	mov.f32 	%f137, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_17;
$Lt_40_178178:
	.loc	20	76	0
	mov.f32 	%f137, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_17:
	.loc	20	107	0
	and.b32 	%r122, %r4, 2048;
	mov.s32 	%r158, 0;
	setp.ne.s32 	%p84, %r122, %r158;
	@!%p84 bra 	$Lt_40_242434;
	.loc	20	100	0
	ld.const.f32 	%f138, [kYCbCrOffset+0];
	bra.uni 	$Lt_40_242178;
$Lt_40_242434:
	ld.const.f32 	%f138, [kYCbCrFullRangeOffset+0];
$Lt_40_242178:
	.loc	20	107	0
	@!%p84 bra 	$Lt_40_242946;
	.loc	20	100	0
	ld.const.f32 	%f139, [kYCbCrOffset+4];
	bra.uni 	$Lt_40_242690;
$Lt_40_242946:
	ld.const.f32 	%f139, [kYCbCrFullRangeOffset+4];
$Lt_40_242690:
	.loc	20	107	0
	@!%p84 bra 	$Lt_40_243458;
	.loc	20	100	0
	ld.const.f32 	%f140, [kYCbCrOffset+8];
	bra.uni 	$Lt_40_243202;
$Lt_40_243458:
	ld.const.f32 	%f140, [kYCbCrFullRangeOffset+8];
$Lt_40_243202:
	.loc	20	424	0
	mov.f32 	%f141, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f142, %f137, %f141;
	fma.rn.ftz.f32 	%f120, %f142, %f138, %f120;
	fma.rn.ftz.f32 	%f122, %f142, %f139, %f122;
	fma.rn.ftz.f32 	%f124, %f142, %f140, %f124;
$Lt_40_241666:
	.loc	20	543	0
	mov.f32 	%f10, %f120;
	mov.f32 	%f11, %f122;
	mov.f32 	%f12, %f124;
	@!%p94 bra 	$Lt_40_239106;
	.loc	20	57	0
	@!%p35 bra 	$Lt_40_180482;
	.loc	20	59	0
	mov.f32 	%f118, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_13;
$Lt_40_180482:
	.loc	20	61	0
	mov.u32 	%r159, 64;
	setp.ne.s32 	%p107, %r9, %r159;
	@%p107 bra 	$Lt_40_180738;
	.loc	20	63	0
	mov.f32 	%f118, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_13;
$Lt_40_180738:
	.loc	20	65	0
	mov.u32 	%r160, 128;
	setp.ne.s32 	%p108, %r9, %r160;
	@%p108 bra 	$Lt_40_180994;
	.loc	20	68	0
	mov.f32 	%f118, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_13;
$Lt_40_180994:
	.loc	20	70	0
	mov.u32 	%r161, 192;
	setp.ne.s32 	%p109, %r9, %r161;
	@%p109 bra 	$Lt_40_181250;
	.loc	20	72	0
	mov.f32 	%f118, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_13;
$Lt_40_181250:
	.loc	20	76	0
	mov.f32 	%f118, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_13:
	.loc	20	546	0
	mov.f32 	%f9, %f118;
	bra.uni 	$Lt_40_239106;
$Lt_40_239362:
	.loc	20	433	0
	mov.f32 	%f143, %f9;
	mov.f32 	%f144, %f10;
	mov.f32 	%f145, %f144;
	mov.f32 	%f146, %f11;
	mov.f32 	%f147, %f146;
	mov.f32 	%f148, %f12;
	mov.f32 	%f149, %f148;
	.loc	20	435	0
	mov.u32 	%r162, 0;
	setp.eq.s32 	%p110, %r18, %r162;
	@%p110 bra 	$Lt_40_244226;
	.loc	20	57	0
	mov.u32 	%r163, 0;
	setp.ne.s32 	%p111, %r9, %r163;
	@%p111 bra 	$Lt_40_181762;
	.loc	20	59	0
	mov.f32 	%f150, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_11;
$Lt_40_181762:
	.loc	20	61	0
	mov.u32 	%r164, 64;
	setp.ne.s32 	%p112, %r9, %r164;
	@%p112 bra 	$Lt_40_182018;
	.loc	20	63	0
	mov.f32 	%f150, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_11;
$Lt_40_182018:
	.loc	20	65	0
	mov.u32 	%r165, 128;
	setp.ne.s32 	%p113, %r9, %r165;
	@%p113 bra 	$Lt_40_182274;
	.loc	20	68	0
	mov.f32 	%f150, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_11;
$Lt_40_182274:
	.loc	20	70	0
	mov.u32 	%r166, 192;
	setp.ne.s32 	%p114, %r9, %r166;
	@%p114 bra 	$Lt_40_182530;
	.loc	20	72	0
	mov.f32 	%f150, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_11;
$Lt_40_182530:
	.loc	20	76	0
	mov.f32 	%f150, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_11:
	.loc	20	118	0
	and.b32 	%r122, %r4, 2048;
	mov.s32 	%r167, 0;
	setp.ne.s32 	%p84, %r122, %r167;
	@!%p84 bra 	$Lt_40_244994;
	.loc	20	100	0
	ld.const.f32 	%f151, [kYCbCrOffset+0];
	bra.uni 	$Lt_40_244738;
$Lt_40_244994:
	ld.const.f32 	%f151, [kYCbCrFullRangeOffset+0];
$Lt_40_244738:
	.loc	20	118	0
	@!%p84 bra 	$Lt_40_245506;
	.loc	20	100	0
	ld.const.f32 	%f152, [kYCbCrOffset+4];
	bra.uni 	$Lt_40_245250;
$Lt_40_245506:
	ld.const.f32 	%f152, [kYCbCrFullRangeOffset+4];
$Lt_40_245250:
	.loc	20	118	0
	@!%p84 bra 	$Lt_40_246018;
	.loc	20	100	0
	ld.const.f32 	%f153, [kYCbCrOffset+8];
	bra.uni 	$Lt_40_245762;
$Lt_40_246018:
	ld.const.f32 	%f153, [kYCbCrFullRangeOffset+8];
$Lt_40_245762:
	.loc	20	437	0
	mov.f32 	%f154, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f155, %f150, %f154;
	mul.ftz.f32 	%f156, %f155, %f151;
	sub.ftz.f32 	%f145, %f144, %f156;
	mul.ftz.f32 	%f157, %f155, %f152;
	sub.ftz.f32 	%f147, %f146, %f157;
	mul.ftz.f32 	%f158, %f155, %f153;
	sub.ftz.f32 	%f149, %f148, %f158;
$Lt_40_244226:
	mov.f32 	%f159, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f160, %f9, %f159;
	mov.f32 	%f161, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p115, %f160, %f161;
	@!%p115 bra 	$Lt_40_246530;
	mov.f32 	%f149, 0f00000000;   	// 0
	mov.f32 	%f147, 0f00000000;   	// 0
	mov.f32 	%f145, 0f00000000;   	// 0
	mov.f32 	%f143, 0f00000000;   	// 0
	bra.uni 	$Lt_40_246274;
$Lt_40_246530:
	.loc	20	57	0
	mov.u32 	%r168, 0;
	setp.ne.s32 	%p116, %r9, %r168;
	@%p116 bra 	$Lt_40_184834;
	.loc	20	59	0
	mov.f32 	%f162, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_7;
$Lt_40_184834:
	.loc	20	61	0
	mov.u32 	%r169, 64;
	setp.ne.s32 	%p117, %r9, %r169;
	@%p117 bra 	$Lt_40_185090;
	.loc	20	63	0
	mov.f32 	%f162, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_7;
$Lt_40_185090:
	.loc	20	65	0
	mov.u32 	%r170, 128;
	setp.ne.s32 	%p118, %r9, %r170;
	@%p118 bra 	$Lt_40_185346;
	.loc	20	68	0
	mov.f32 	%f162, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_7;
$Lt_40_185346:
	.loc	20	70	0
	mov.u32 	%r171, 192;
	setp.ne.s32 	%p119, %r9, %r171;
	@%p119 bra 	$Lt_40_185602;
	.loc	20	72	0
	mov.f32 	%f162, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_7;
$Lt_40_185602:
	.loc	20	76	0
	mov.f32 	%f162, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_7:
	.loc	20	447	0
	div.approx.ftz.f32 	%f163, %f162, %f9;
	mul.ftz.f32 	%f145, %f163, %f145;
	.loc	20	448	0
	mul.ftz.f32 	%f147, %f163, %f147;
	.loc	20	449	0
	mul.ftz.f32 	%f149, %f163, %f149;
$Lt_40_246274:
	.loc	20	452	0
	mov.u32 	%r172, 0;
	setp.eq.s32 	%p120, %r18, %r172;
	@%p120 bra 	$Lt_40_246786;
	.loc	20	57	0
	mov.u32 	%r173, 0;
	setp.ne.s32 	%p121, %r9, %r173;
	@%p121 bra 	$Lt_40_186114;
	.loc	20	59	0
	mov.f32 	%f164, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_5;
$Lt_40_186114:
	.loc	20	61	0
	mov.u32 	%r174, 64;
	setp.ne.s32 	%p122, %r9, %r174;
	@%p122 bra 	$Lt_40_186370;
	.loc	20	63	0
	mov.f32 	%f164, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_5;
$Lt_40_186370:
	.loc	20	65	0
	mov.u32 	%r175, 128;
	setp.ne.s32 	%p123, %r9, %r175;
	@%p123 bra 	$Lt_40_186626;
	.loc	20	68	0
	mov.f32 	%f164, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_5;
$Lt_40_186626:
	.loc	20	70	0
	mov.u32 	%r176, 192;
	setp.ne.s32 	%p124, %r9, %r176;
	@%p124 bra 	$Lt_40_186882;
	.loc	20	72	0
	mov.f32 	%f164, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_5;
$Lt_40_186882:
	.loc	20	76	0
	mov.f32 	%f164, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_5:
	.loc	20	107	0
	and.b32 	%r122, %r4, 2048;
	mov.s32 	%r177, 0;
	setp.ne.s32 	%p84, %r122, %r177;
	@!%p84 bra 	$Lt_40_247554;
	.loc	20	100	0
	ld.const.f32 	%f165, [kYCbCrOffset+0];
	bra.uni 	$Lt_40_247298;
$Lt_40_247554:
	ld.const.f32 	%f165, [kYCbCrFullRangeOffset+0];
$Lt_40_247298:
	.loc	20	107	0
	@!%p84 bra 	$Lt_40_248066;
	.loc	20	100	0
	ld.const.f32 	%f166, [kYCbCrOffset+4];
	bra.uni 	$Lt_40_247810;
$Lt_40_248066:
	ld.const.f32 	%f166, [kYCbCrFullRangeOffset+4];
$Lt_40_247810:
	.loc	20	107	0
	@!%p84 bra 	$Lt_40_248578;
	.loc	20	100	0
	ld.const.f32 	%f167, [kYCbCrOffset+8];
	bra.uni 	$Lt_40_248322;
$Lt_40_248578:
	ld.const.f32 	%f167, [kYCbCrFullRangeOffset+8];
$Lt_40_248322:
	.loc	20	454	0
	mov.f32 	%f168, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f169, %f164, %f168;
	fma.rn.ftz.f32 	%f145, %f169, %f165, %f145;
	fma.rn.ftz.f32 	%f147, %f169, %f166, %f147;
	fma.rn.ftz.f32 	%f149, %f169, %f167, %f149;
$Lt_40_246786:
	.loc	20	551	0
	mov.f32 	%f9, %f143;
	mov.f32 	%f10, %f145;
	mov.f32 	%f11, %f147;
	mov.f32 	%f12, %f149;
$Lt_40_239106:
$L_40_221442:
$Lt_40_238594:
	.loc	20	540	0
	and.b32 	%r178, %r4, 4096;
	mov.u32 	%r179, 0;
	setp.ne.s32 	%p125, %r178, %r179;
	@%p125 bra 	$Lt_40_248834;
	.loc	21	268	0
	mov.f32 	%f170, %f10;
	.loc	21	269	0
	mov.f32 	%f171, %f9;
	.loc	20	558	0
	mov.f32 	%f9, %f12;
	mov.f32 	%f10, %f11;
	mov.f32 	%f11, %f170;
	mov.f32 	%f12, %f171;
$Lt_40_248834:
	@!%p3 bra 	$Lt_40_249346;
	.loc	20	57	0
	mov.u32 	%r180, 0;
	setp.ne.s32 	%p126, %r9, %r180;
	@%p126 bra 	$Lt_40_189442;
	.loc	20	59	0
	mov.f32 	%f172, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_1;
$Lt_40_189442:
	.loc	20	61	0
	mov.u32 	%r181, 64;
	setp.ne.s32 	%p127, %r9, %r181;
	@%p127 bra 	$Lt_40_189698;
	.loc	20	63	0
	mov.f32 	%f172, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_1;
$Lt_40_189698:
	.loc	20	65	0
	mov.u32 	%r182, 128;
	setp.ne.s32 	%p128, %r9, %r182;
	@%p128 bra 	$Lt_40_189954;
	.loc	20	68	0
	mov.f32 	%f172, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_1;
$Lt_40_189954:
	.loc	20	70	0
	mov.u32 	%r183, 192;
	setp.ne.s32 	%p129, %r9, %r183;
	@%p129 bra 	$Lt_40_190210;
	.loc	20	72	0
	mov.f32 	%f172, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_218_1;
$Lt_40_190210:
	.loc	20	76	0
	mov.f32 	%f172, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_218_1:
	.loc	20	564	0
	mov.f32 	%f173, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f174, %f9, %f173;
	mov.f32 	%f175, 0f00000000;   	// 0
	mov.f32 	%f176, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p130, %f174, %f176;
	selp.f32 	%f177, %f174, %f175, %p130;
	min.ftz.f32 	%f9, %f177, %f172;
	mov.f32 	%f178, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f179, %f10, %f178;
	mov.f32 	%f180, 0f00000000;   	// 0
	mov.f32 	%f181, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p131, %f179, %f181;
	selp.f32 	%f182, %f179, %f180, %p131;
	min.ftz.f32 	%f10, %f182, %f172;
	mov.f32 	%f183, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f184, %f11, %f183;
	mov.f32 	%f185, 0f00000000;   	// 0
	mov.f32 	%f186, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p132, %f184, %f186;
	selp.f32 	%f187, %f184, %f185, %p132;
	min.ftz.f32 	%f11, %f187, %f172;
	mov.f32 	%f188, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f189, %f12, %f188;
	mov.f32 	%f190, 0f00000000;   	// 0
	mov.f32 	%f191, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p133, %f189, %f191;
	selp.f32 	%f192, %f189, %f190, %p133;
	min.ftz.f32 	%f12, %f192, %f172;
$Lt_40_249346:
	.loc	22	64	0
	cvt.rzi.ftz.u32.f32 	%r184, %f10;
	cvt.u16.u32 	%r185, %r184;
	cvt.rzi.ftz.u32.f32 	%r186, %f11;
	cvt.u16.u32 	%r187, %r186;
	cvt.rzi.ftz.u32.f32 	%r188, %f12;
	cvt.u16.u32 	%r189, %r188;
	cvt.rzi.ftz.u32.f32 	%r190, %f9;
	cvt.u16.u32 	%r191, %r190;
	st.param.u16 	[__cudaretf__Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_+0], %r191;
	mov.s32 	%r192, %r185;
	st.param.u16 	[__cudaretf__Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_+2], %r192;
	mov.s32 	%r193, %r187;
	st.param.u16 	[__cudaretf__Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_+4], %r193;
	mov.s32 	%r194, %r189;
	st.param.u16 	[__cudaretf__Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_+6], %r194;
	ret;
$LDWend__Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_:
	} // _Z31ConvertPixel_444_32f_To_444_15u6float414IR_PixelFormatS0_

	.visible .func (.param .align 4 .b8 __cudaretf__Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_[4]) _Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_ (.param .align 4 .b8 __cudaparmf1__Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_[4], .param .s32 __cudaparmf2__Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_, .param .s32 __cudaparmf3__Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_)
	{
	.reg .u32 %r<208>;
	.reg .u64 %rd<3>;
	.reg .f32 %f<188>;
	.reg .pred %p<135>;
	.loc	22	71	0
$LDWbegin__Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_:
	ld.param.u8 	%r1, [__cudaparmf1__Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_+0];
	mov.s32 	%r2, %r1;
	ld.param.u8 	%r3, [__cudaparmf1__Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_+1];
	mov.s32 	%r4, %r3;
	ld.param.u8 	%r5, [__cudaparmf1__Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_+2];
	mov.s32 	%r6, %r5;
	ld.param.u8 	%r7, [__cudaparmf1__Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_+3];
	mov.s32 	%r8, %r7;
	ld.param.u32 	%r9, [__cudaparmf2__Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_];
	mov.s32 	%r10, %r9;
	ld.param.u32 	%r11, [__cudaparmf3__Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_];
	mov.s32 	%r12, %r11;
	.loc	20	469	0
	cvt.u8.u32 	%r13, %r2;
	cvt.rn.f32.u32 	%f1, %r13;
	mov.f32 	%f2, %f1;
	cvt.u8.u32 	%r14, %r4;
	cvt.rn.f32.u32 	%f3, %r14;
	mov.f32 	%f4, %f3;
	cvt.u8.u32 	%r15, %r6;
	cvt.rn.f32.u32 	%f5, %r15;
	cvt.u8.u32 	%r16, %r8;
	cvt.rn.f32.u32 	%f6, %r16;
	and.b32 	%r17, %r10, 4096;
	mov.u32 	%r18, 0;
	setp.ne.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_41_222722;
	.loc	20	473	0
	mov.f32 	%f2, %f6;
	mov.f32 	%f4, %f5;
	mov.f32 	%f5, %f3;
	mov.f32 	%f6, %f1;
$Lt_41_222722:
	.loc	20	476	0
	and.b32 	%r19, %r10, 448;
	mov.s32 	%r20, %r10;
	and.b32 	%r21, %r12, 448;
	mov.s32 	%r22, %r12;
	mov.s32 	%r23, 256;
	setp.ne.s32 	%p2, %r19, %r23;
	and.b32 	%r24, %r20, 1;
	mov.s32 	%r25, 256;
	setp.ne.s32 	%p3, %r21, %r25;
	and.b32 	%r26, %r22, 1;
	selp.s32 	%r27, 1, 0, %p2;
	selp.s32 	%r28, 1, 0, %p3;
	and.b32 	%r29, %r24, %r27;
	and.b32 	%r30, %r26, %r28;
	mov.u32 	%r31, 0;
	setp.eq.s32 	%p4, %r29, %r31;
	@%p4 bra 	$Lt_41_249858;
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p5, %r30, %r32;
	@%p5 bra 	$Lt_41_249858;
	.loc	20	57	0
	mov.u32 	%r33, 0;
	setp.ne.s32 	%p6, %r19, %r33;
	@%p6 bra 	$Lt_41_140802;
	.loc	20	59	0
	mov.f32 	%f7, 0f437f0000;     	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_75;
$Lt_41_140802:
	.loc	20	61	0
	mov.u32 	%r34, 64;
	setp.ne.s32 	%p7, %r19, %r34;
	@%p7 bra 	$Lt_41_141058;
	.loc	20	63	0
	mov.f32 	%f7, 0f447fc000;     	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_75;
$Lt_41_141058:
	.loc	20	65	0
	mov.u32 	%r35, 128;
	setp.ne.s32 	%p8, %r19, %r35;
	@%p8 bra 	$Lt_41_141314;
	.loc	20	68	0
	mov.f32 	%f7, 0f47000000;     	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_75;
$Lt_41_141314:
	.loc	20	70	0
	mov.u32 	%r36, 192;
	setp.ne.s32 	%p9, %r19, %r36;
	@%p9 bra 	$Lt_41_141570;
	.loc	20	72	0
	mov.f32 	%f7, 0fbf800000;     	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_75;
$Lt_41_141570:
	.loc	20	76	0
	mov.f32 	%f7, 0f3f800000;     	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_75:
	.loc	20	118	0
	and.b32 	%r37, %r10, 2048;
	mov.s32 	%r38, 0;
	setp.ne.s32 	%p10, %r37, %r38;
	@!%p10 bra 	$Lt_41_223490;
	.loc	20	100	0
	ld.const.f32 	%f8, [kYCbCrOffset+0];
	bra.uni 	$Lt_41_223234;
$Lt_41_223490:
	ld.const.f32 	%f8, [kYCbCrFullRangeOffset+0];
$Lt_41_223234:
	.loc	20	118	0
	@!%p10 bra 	$Lt_41_224002;
	.loc	20	100	0
	ld.const.f32 	%f9, [kYCbCrOffset+4];
	bra.uni 	$Lt_41_223746;
$Lt_41_224002:
	ld.const.f32 	%f9, [kYCbCrFullRangeOffset+4];
$Lt_41_223746:
	.loc	20	118	0
	@!%p10 bra 	$Lt_41_224514;
	.loc	20	100	0
	ld.const.f32 	%f10, [kYCbCrOffset+8];
	bra.uni 	$Lt_41_224258;
$Lt_41_224514:
	ld.const.f32 	%f10, [kYCbCrFullRangeOffset+8];
$Lt_41_224258:
	.loc	20	478	0
	mov.f32 	%f11, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f12, %f7, %f11;
	mul.ftz.f32 	%f13, %f12, %f8;
	sub.ftz.f32 	%f4, %f4, %f13;
	mul.ftz.f32 	%f14, %f12, %f9;
	sub.ftz.f32 	%f5, %f5, %f14;
	mul.ftz.f32 	%f15, %f12, %f10;
	sub.ftz.f32 	%f6, %f6, %f15;
$Lt_41_249858:
$Lt_41_26114:
	.loc	20	481	0
	and.b32 	%r39, %r10, 2;
	and.b32 	%r40, %r12, 2;
	mov.u32 	%r41, 0;
	setp.eq.s32 	%p11, %r39, %r41;
	@%p11 bra 	$Lt_41_250370;
	mov.u32 	%r42, 0;
	setp.ne.s32 	%p12, %r40, %r42;
	@%p12 bra 	$Lt_41_250370;
	.loc	20	483	0
	mov.f32 	%f16, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p13, %f4, %f16;
	@!%p13 bra 	$Lt_41_224770;
	.loc	20	372	0
	neg.ftz.f32 	%f17, %f4;
	lg2.approx.ftz.f32 	%f18, %f17;
	mov.f32 	%f19, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f20, %f18, %f19;
	ex2.approx.ftz.f32 	%f21, %f20;
	neg.ftz.f32 	%f22, %f21;
	bra.uni 	$LDWendi___log2f_219_71;
$Lt_41_224770:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f23, %f4;
	mov.f32 	%f24, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f25, %f23, %f24;
	ex2.approx.ftz.f32 	%f22, %f25;
$LDWendi___log2f_219_71:
	.loc	20	483	0
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p14, %f5, %f26;
	@!%p14 bra 	$Lt_41_225282;
	.loc	20	372	0
	neg.ftz.f32 	%f27, %f5;
	lg2.approx.ftz.f32 	%f28, %f27;
	mov.f32 	%f29, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f30, %f28, %f29;
	ex2.approx.ftz.f32 	%f31, %f30;
	neg.ftz.f32 	%f32, %f31;
	bra.uni 	$LDWendi___log2f_219_69;
$Lt_41_225282:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f33, %f5;
	mov.f32 	%f34, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f35, %f33, %f34;
	ex2.approx.ftz.f32 	%f32, %f35;
$LDWendi___log2f_219_69:
	.loc	20	483	0
	mov.f32 	%f36, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p15, %f6, %f36;
	@!%p15 bra 	$Lt_41_225794;
	.loc	20	372	0
	neg.ftz.f32 	%f37, %f6;
	lg2.approx.ftz.f32 	%f38, %f37;
	mov.f32 	%f39, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f40, %f38, %f39;
	ex2.approx.ftz.f32 	%f41, %f40;
	neg.ftz.f32 	%f42, %f41;
	bra.uni 	$LDWendi___log2f_219_67;
$Lt_41_225794:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f43, %f6;
	mov.f32 	%f44, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f45, %f43, %f44;
	ex2.approx.ftz.f32 	%f42, %f45;
$LDWendi___log2f_219_67:
	.loc	20	483	0
	mov.f32 	%f4, %f22;
	mov.f32 	%f5, %f32;
	mov.f32 	%f6, %f42;
$Lt_41_250370:
$Lt_41_29954:
	.loc	20	486	0
	and.b32 	%r43, %r10, 1;
	and.b32 	%r44, %r12, 1;
	and.b32 	%r45, %r10, 1536;
	and.b32 	%r46, %r12, 1536;
	set.ne.u32.s32 	%r47, %r43, %r44;
	neg.s32 	%r48, %r47;
	set.ne.u32.s32 	%r49, %r45, %r46;
	neg.s32 	%r50, %r49;
	or.b32 	%r51, %r48, %r50;
	mov.u32 	%r52, 0;
	setp.ne.s32 	%p16, %r51, %r52;
	@%p16 bra 	$Lt_41_80642;
	setp.eq.s32 	%p17, %r29, %r30;
	@%p17 bra 	$Lt_41_80898;
$Lt_41_80642:
	.loc	20	490	0
	mov.u32 	%r53, 0;
	setp.ne.s32 	%p18, %r43, %r53;
	@%p18 bra 	$Lt_41_226562;
	mov.s32 	%r54, 256;
	setp.eq.s32 	%p19, %r21, %r54;
	mov.u32 	%r55, 256;
	setp.ne.s32 	%p20, %r19, %r55;
	@%p20 bra 	$Lt_41_227074;
	.loc	20	137	0
	mov.s32 	%r56, 512;
	setp.eq.s32 	%p21, %r46, %r56;
	@!%p19 bra 	$Lt_41_144898;
	.loc	20	139	0
	@!%p21 bra 	$Lt_41_145154;
	.loc	20	141	0
	mov.u64 	%rd1, kRGB32f_To_709YPbPr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__219_65;
$Lt_41_145154:
	.loc	20	145	0
	mov.u64 	%rd1, kRGB32f_To_601YPbPr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__219_65;
$Lt_41_144898:
	.loc	20	150	0
	@!%p21 bra 	$Lt_41_145410;
	.loc	20	152	0
	mov.u64 	%rd1, kRGB32f_To_709YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__219_65;
$Lt_41_145410:
	.loc	20	154	0
	and.b32 	%r57, %r12, 2048;
	mov.u32 	%r58, 0;
	setp.ne.s32 	%p22, %r57, %r58;
	@%p22 bra 	$Lt_41_145666;
	.loc	20	156	0
	mov.u64 	%rd1, kRGB32f_To_601YCbCrFullRange;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__219_65;
$Lt_41_145666:
	.loc	20	160	0
	mov.u64 	%rd1, kRGB32f_To_601YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__219_65;
$Lt_41_227074:
	@!%p19 bra 	$Lt_41_227586;
	bra.uni 	$Lt_41_226306;
$Lt_41_227586:
	.loc	20	179	0
	mov.u32 	%r59, 512;
	setp.ne.s32 	%p23, %r46, %r59;
	@%p23 bra 	$Lt_41_146434;
	.loc	20	181	0
	mov.u64 	%rd1, kRGB8u_To_709YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__219_65;
$Lt_41_146434:
	.loc	20	183	0
	and.b32 	%r60, %r12, 2048;
	mov.u32 	%r61, 0;
	setp.ne.s32 	%p24, %r60, %r61;
	@%p24 bra 	$Lt_41_146690;
	.loc	20	185	0
	mov.u64 	%rd1, kRGB8u_To_601YCbCrFullRange;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__219_65;
$Lt_41_146690:
	.loc	20	189	0
	mov.u64 	%rd1, kRGB8u_To_601YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__219_65;
$Lt_41_226562:
	mov.s32 	%r62, 0;
	setp.eq.s32 	%p25, %r44, %r62;
	mov.u32 	%r63, 512;
	setp.ne.s32 	%p26, %r45, %r63;
	@%p26 bra 	$Lt_41_228098;
	mov.s32 	%r64, 256;
	setp.eq.s32 	%p27, %r19, %r64;
	@!%p25 bra 	$Lt_41_228610;
	mov.s32 	%r65, 256;
	setp.eq.s32 	%p28, %r21, %r65;
	@!%p27 bra 	$Lt_41_229122;
	@!%p28 bra 	$Lt_41_226306;
	.loc	20	202	0
	mov.u64 	%rd1, k709YPbPr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__219_65;
$Lt_41_229122:
	.loc	20	211	0
	@!%p28 bra 	$Lt_41_147970;
	.loc	20	213	0
	mov.u64 	%rd1, k709YCbCr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__219_65;
$Lt_41_147970:
	.loc	20	217	0
	mov.u64 	%rd1, k709YCbCr_To_RGB8u;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__219_65;
$Lt_41_228610:
	@!%p27 bra 	$Lt_41_230146;
	bra.uni 	$Lt_41_226306;
$Lt_41_230146:
	mov.s32 	%r66, 256;
	set.eq.u32.s32 	%r67, %r21, %r66;
	neg.s32 	%r68, %r67;
	and.b32 	%r69, %r12, 2048;
	mov.s32 	%r70, 0;
	set.eq.u32.s32 	%r71, %r69, %r70;
	neg.s32 	%r72, %r71;
	or.b32 	%r73, %r68, %r72;
	mov.u32 	%r74, 0;
	setp.eq.s32 	%p29, %r73, %r74;
	@%p29 bra 	$Lt_41_230658;
	bra.uni 	$Lt_41_226306;
$Lt_41_230658:
	.loc	20	250	0
	mov.u64 	%rd1, k709YCbCr_To_601YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__219_65;
$Lt_41_228098:
	and.b32 	%r75, %r10, 2048;
	mov.s32 	%r76, 0;
	setp.eq.s32 	%p30, %r75, %r76;
	@!%p30 bra 	$Lt_41_231170;
	@!%p25 bra 	$Lt_41_226306;
	.loc	20	259	0
	mov.u32 	%r77, 256;
	setp.ne.s32 	%p31, %r21, %r77;
	@%p31 bra 	$Lt_41_149506;
	.loc	20	261	0
	mov.u64 	%rd1, k601YCbCrFullRange_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__219_65;
$Lt_41_149506:
	.loc	20	265	0
	mov.u64 	%rd1, k601YCbCrFullRange_To_RGB8u;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__219_65;
$Lt_41_231170:
	mov.s32 	%r78, 256;
	setp.eq.s32 	%p27, %r19, %r78;
	@!%p25 bra 	$Lt_41_232194;
	mov.s32 	%r79, 256;
	setp.eq.s32 	%p32, %r21, %r79;
	@!%p27 bra 	$Lt_41_232706;
	@!%p32 bra 	$Lt_41_226306;
	.loc	20	302	0
	mov.u64 	%rd1, k601YPbPr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__219_65;
$Lt_41_232706:
	.loc	20	311	0
	@!%p32 bra 	$Lt_41_151298;
	.loc	20	313	0
	mov.u64 	%rd1, k601YCbCr_To_RGB32f;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__219_65;
$Lt_41_151298:
	.loc	20	317	0
	mov.u64 	%rd1, k601YCbCr_To_RGB8u;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__219_65;
$Lt_41_232194:
	@!%p27 bra 	$Lt_41_233730;
	bra.uni 	$Lt_41_226306;
$Lt_41_233730:
	selp.s32 	%r80, 1, 0, %p30;
	mov.s32 	%r81, 256;
	set.eq.u32.s32 	%r82, %r21, %r81;
	neg.s32 	%r83, %r82;
	or.b32 	%r84, %r80, %r83;
	mov.u32 	%r85, 0;
	setp.eq.s32 	%p33, %r84, %r85;
	@%p33 bra 	$Lt_41_234242;
	bra.uni 	$Lt_41_226306;
$Lt_41_234242:
	.loc	20	350	0
	mov.u64 	%rd1, k601YCbCr_To_709YCbCr;
	bra.uni 	$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__219_65;
$Lt_41_226306:
	.loc	20	355	0
	mov.u64 	%rd1, 0;
$LDWendi__Z23ColorSpaceConvertMatrix14IR_PixelFormatS__219_65:
	.loc	20	490	0
	ld.global.f32 	%f46, [%rd1+16];
	mul.ftz.f32 	%f47, %f46, %f5;
	ld.global.f32 	%f48, [%rd1+12];
	fma.rn.ftz.f32 	%f49, %f48, %f4, %f47;
	ld.global.f32 	%f50, [%rd1+20];
	fma.rn.ftz.f32 	%f51, %f50, %f6, %f49;
	ld.global.f32 	%f52, [%rd1+28];
	mul.ftz.f32 	%f53, %f52, %f5;
	ld.global.f32 	%f54, [%rd1+24];
	fma.rn.ftz.f32 	%f55, %f54, %f4, %f53;
	ld.global.f32 	%f56, [%rd1+32];
	fma.rn.ftz.f32 	%f57, %f56, %f6, %f55;
	ld.global.f32 	%f58, [%rd1+4];
	mul.ftz.f32 	%f59, %f58, %f5;
	ld.global.f32 	%f60, [%rd1+0];
	fma.rn.ftz.f32 	%f61, %f60, %f4, %f59;
	ld.global.f32 	%f62, [%rd1+8];
	fma.rn.ftz.f32 	%f4, %f62, %f6, %f61;
	mov.f32 	%f5, %f51;
	mov.f32 	%f6, %f57;
	setp.eq.s32 	%p34, %r19, %r21;
	@%p34 bra 	$Lt_41_235010;
	.loc	20	494	0
	mov.s32 	%r86, 256;
	setp.eq.s32 	%p27, %r19, %r86;
	@!%p27 bra 	$L_41_220162;
	mov.s32 	%r87, 0;
	setp.eq.s32 	%p35, %r21, %r87;
	@%p35 bra 	$Lt_41_251394;
$L_41_220162:
	mov.s32 	%r88, 0;
	setp.eq.s32 	%p36, %r19, %r88;
	@!%p36 bra 	$Lt_41_251650;
	mov.u32 	%r89, 256;
	setp.ne.s32 	%p37, %r21, %r89;
	@%p37 bra 	$Lt_41_251650;
	mov.s32 	%r90, 0;
	setp.eq.s32 	%p35, %r21, %r90;
	bra.uni 	$L_41_219906;
$Lt_41_251394:
	mov.s32 	%r91, 0;
	setp.eq.s32 	%p36, %r19, %r91;
$L_41_219906:
	.loc	20	57	0
	@!%p35 bra 	$Lt_41_152834;
	.loc	20	59	0
	mov.f32 	%f63, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_63;
$Lt_41_152834:
	.loc	20	61	0
	mov.u32 	%r92, 64;
	setp.ne.s32 	%p38, %r21, %r92;
	@%p38 bra 	$Lt_41_153090;
	.loc	20	63	0
	mov.f32 	%f63, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_63;
$Lt_41_153090:
	.loc	20	65	0
	mov.u32 	%r93, 128;
	setp.ne.s32 	%p39, %r21, %r93;
	@%p39 bra 	$Lt_41_153346;
	.loc	20	68	0
	mov.f32 	%f63, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_63;
$Lt_41_153346:
	.loc	20	70	0
	mov.u32 	%r94, 192;
	setp.ne.s32 	%p40, %r21, %r94;
	@%p40 bra 	$Lt_41_153602;
	.loc	20	72	0
	mov.f32 	%f63, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_63;
$Lt_41_153602:
	.loc	20	76	0
	mov.f32 	%f63, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_63:
	.loc	20	57	0
	@!%p36 bra 	$Lt_41_153858;
	.loc	20	59	0
	mov.f32 	%f64, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_61;
$Lt_41_153858:
	.loc	20	61	0
	mov.u32 	%r95, 64;
	setp.ne.s32 	%p41, %r19, %r95;
	@%p41 bra 	$Lt_41_154114;
	.loc	20	63	0
	mov.f32 	%f64, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_61;
$Lt_41_154114:
	.loc	20	65	0
	mov.u32 	%r96, 128;
	setp.ne.s32 	%p42, %r19, %r96;
	@%p42 bra 	$Lt_41_154370;
	.loc	20	68	0
	mov.f32 	%f64, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_61;
$Lt_41_154370:
	.loc	20	70	0
	mov.u32 	%r97, 192;
	setp.ne.s32 	%p43, %r19, %r97;
	@%p43 bra 	$Lt_41_154626;
	.loc	20	72	0
	mov.f32 	%f64, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_61;
$Lt_41_154626:
	.loc	20	76	0
	mov.f32 	%f64, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_61:
	.loc	20	498	0
	div.approx.ftz.f32 	%f65, %f63, %f64;
	mul.ftz.f32 	%f2, %f2, %f65;
	bra.uni 	$Lt_41_235010;
$Lt_41_251650:
$L_41_219650:
	.loc	20	500	0
	@!%p27 bra 	$L_41_221186;
	@%p3 bra 	$L_41_220930;
$L_41_221186:
	@!%p2 bra 	$Lt_41_252674;
	mov.u32 	%r98, 256;
	setp.ne.s32 	%p44, %r21, %r98;
	@%p44 bra 	$Lt_41_252674;
$L_41_220930:
	.loc	20	57	0
	mov.u32 	%r99, 0;
	setp.ne.s32 	%p45, %r21, %r99;
	@%p45 bra 	$Lt_41_155138;
	.loc	20	59	0
	mov.f32 	%f63, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_59;
$Lt_41_155138:
	.loc	20	61	0
	mov.u32 	%r100, 64;
	setp.ne.s32 	%p46, %r21, %r100;
	@%p46 bra 	$Lt_41_155394;
	.loc	20	63	0
	mov.f32 	%f63, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_59;
$Lt_41_155394:
	.loc	20	65	0
	mov.u32 	%r101, 128;
	setp.ne.s32 	%p47, %r21, %r101;
	@%p47 bra 	$Lt_41_155650;
	.loc	20	68	0
	mov.f32 	%f63, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_59;
$Lt_41_155650:
	.loc	20	70	0
	mov.u32 	%r102, 192;
	setp.ne.s32 	%p48, %r21, %r102;
	@%p48 bra 	$Lt_41_155906;
	.loc	20	72	0
	mov.f32 	%f63, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_59;
$Lt_41_155906:
	.loc	20	76	0
	mov.f32 	%f63, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_59:
	.loc	20	57	0
	@!%p36 bra 	$Lt_41_156162;
	.loc	20	59	0
	mov.f32 	%f64, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_57;
$Lt_41_156162:
	.loc	20	61	0
	mov.u32 	%r103, 64;
	setp.ne.s32 	%p49, %r19, %r103;
	@%p49 bra 	$Lt_41_156418;
	.loc	20	63	0
	mov.f32 	%f64, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_57;
$Lt_41_156418:
	.loc	20	65	0
	mov.u32 	%r104, 128;
	setp.ne.s32 	%p50, %r19, %r104;
	@%p50 bra 	$Lt_41_156674;
	.loc	20	68	0
	mov.f32 	%f64, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_57;
$Lt_41_156674:
	.loc	20	70	0
	mov.u32 	%r105, 192;
	setp.ne.s32 	%p51, %r19, %r105;
	@%p51 bra 	$Lt_41_156930;
	.loc	20	72	0
	mov.f32 	%f64, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_57;
$Lt_41_156930:
	.loc	20	76	0
	mov.f32 	%f64, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_57:
	.loc	20	504	0
	div.approx.ftz.f32 	%f66, %f63, %f64;
	mul.ftz.f32 	%f2, %f2, %f66;
	.loc	20	57	0
	@!%p36 bra 	$Lt_41_158210;
	.loc	20	59	0
	mov.f32 	%f64, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_53;
$Lt_41_158210:
	.loc	20	61	0
	mov.u32 	%r106, 64;
	setp.ne.s32 	%p52, %r19, %r106;
	@%p52 bra 	$Lt_41_158466;
	.loc	20	63	0
	mov.f32 	%f64, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_53;
$Lt_41_158466:
	.loc	20	65	0
	mov.u32 	%r107, 128;
	setp.ne.s32 	%p53, %r19, %r107;
	@%p53 bra 	$Lt_41_158722;
	.loc	20	68	0
	mov.f32 	%f64, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_53;
$Lt_41_158722:
	.loc	20	70	0
	mov.u32 	%r108, 192;
	setp.ne.s32 	%p54, %r19, %r108;
	@%p54 bra 	$Lt_41_158978;
	.loc	20	72	0
	mov.f32 	%f64, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_53;
$Lt_41_158978:
	.loc	20	76	0
	mov.f32 	%f64, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_53:
	.loc	20	505	0
	mov.f32 	%f67, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f68, %f67, %f64;
	mul.ftz.f32 	%f4, %f4, %f68;
	.loc	20	57	0
	@!%p36 bra 	$Lt_41_160258;
	.loc	20	59	0
	mov.f32 	%f64, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_49;
$Lt_41_160258:
	.loc	20	61	0
	mov.u32 	%r109, 64;
	setp.ne.s32 	%p55, %r19, %r109;
	@%p55 bra 	$Lt_41_160514;
	.loc	20	63	0
	mov.f32 	%f64, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_49;
$Lt_41_160514:
	.loc	20	65	0
	mov.u32 	%r110, 128;
	setp.ne.s32 	%p56, %r19, %r110;
	@%p56 bra 	$Lt_41_160770;
	.loc	20	68	0
	mov.f32 	%f64, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_49;
$Lt_41_160770:
	.loc	20	70	0
	mov.u32 	%r111, 192;
	setp.ne.s32 	%p57, %r19, %r111;
	@%p57 bra 	$Lt_41_161026;
	.loc	20	72	0
	mov.f32 	%f64, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_49;
$Lt_41_161026:
	.loc	20	76	0
	mov.f32 	%f64, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_49:
	.loc	20	506	0
	mov.f32 	%f69, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f70, %f69, %f64;
	mul.ftz.f32 	%f5, %f51, %f70;
	.loc	20	57	0
	@!%p36 bra 	$Lt_41_162306;
	.loc	20	59	0
	mov.f32 	%f64, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_45;
$Lt_41_162306:
	.loc	20	61	0
	mov.u32 	%r112, 64;
	setp.ne.s32 	%p58, %r19, %r112;
	@%p58 bra 	$Lt_41_162562;
	.loc	20	63	0
	mov.f32 	%f64, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_45;
$Lt_41_162562:
	.loc	20	65	0
	mov.u32 	%r113, 128;
	setp.ne.s32 	%p59, %r19, %r113;
	@%p59 bra 	$Lt_41_162818;
	.loc	20	68	0
	mov.f32 	%f64, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_45;
$Lt_41_162818:
	.loc	20	70	0
	mov.u32 	%r114, 192;
	setp.ne.s32 	%p60, %r19, %r114;
	@%p60 bra 	$Lt_41_163074;
	.loc	20	72	0
	mov.f32 	%f64, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_45;
$Lt_41_163074:
	.loc	20	76	0
	mov.f32 	%f64, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_45:
	.loc	20	507	0
	mov.f32 	%f71, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f72, %f71, %f64;
	mul.ftz.f32 	%f6, %f57, %f72;
	bra.uni 	$Lt_41_235010;
$Lt_41_252674:
$L_41_220674:
	.loc	20	57	0
	@!%p36 bra 	$Lt_41_163330;
	.loc	20	59	0
	mov.f32 	%f63, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_43;
$Lt_41_163330:
	.loc	20	61	0
	mov.u32 	%r115, 64;
	setp.ne.s32 	%p61, %r19, %r115;
	@%p61 bra 	$Lt_41_163586;
	.loc	20	63	0
	mov.f32 	%f63, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_43;
$Lt_41_163586:
	.loc	20	65	0
	mov.u32 	%r116, 128;
	setp.ne.s32 	%p62, %r19, %r116;
	@%p62 bra 	$Lt_41_163842;
	.loc	20	68	0
	mov.f32 	%f63, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_43;
$Lt_41_163842:
	.loc	20	70	0
	mov.u32 	%r117, 192;
	setp.ne.s32 	%p63, %r19, %r117;
	@%p63 bra 	$Lt_41_164098;
	.loc	20	72	0
	mov.f32 	%f63, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_43;
$Lt_41_164098:
	.loc	20	76	0
	mov.f32 	%f63, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_43:
	.loc	20	511	0
	mov.f32 	%f73, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f74, %f63, %f73;
	mul.ftz.f32 	%f2, %f74, %f2;
	mul.ftz.f32 	%f4, %f74, %f4;
	mul.ftz.f32 	%f5, %f74, %f51;
	mul.ftz.f32 	%f6, %f74, %f57;
	bra.uni 	$Lt_41_235010;
$Lt_41_80898:
	.loc	20	486	0
	setp.eq.s32 	%p64, %r19, %r21;
	@%p64 bra 	$Lt_41_235010;
	.loc	20	57	0
	mov.u32 	%r118, 0;
	setp.ne.s32 	%p65, %r21, %r118;
	@%p65 bra 	$Lt_41_165634;
	.loc	20	59	0
	mov.f32 	%f63, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_39;
$Lt_41_165634:
	.loc	20	61	0
	mov.u32 	%r119, 64;
	setp.ne.s32 	%p66, %r21, %r119;
	@%p66 bra 	$Lt_41_165890;
	.loc	20	63	0
	mov.f32 	%f63, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_39;
$Lt_41_165890:
	.loc	20	65	0
	mov.u32 	%r120, 128;
	setp.ne.s32 	%p67, %r21, %r120;
	@%p67 bra 	$Lt_41_166146;
	.loc	20	68	0
	mov.f32 	%f63, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_39;
$Lt_41_166146:
	.loc	20	70	0
	mov.u32 	%r121, 192;
	setp.ne.s32 	%p68, %r21, %r121;
	@%p68 bra 	$Lt_41_166402;
	.loc	20	72	0
	mov.f32 	%f63, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_39;
$Lt_41_166402:
	.loc	20	76	0
	mov.f32 	%f63, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_39:
	.loc	20	57	0
	mov.u32 	%r122, 0;
	setp.ne.s32 	%p69, %r19, %r122;
	@%p69 bra 	$Lt_41_166658;
	.loc	20	59	0
	mov.f32 	%f64, 0f437f0000;    	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_37;
$Lt_41_166658:
	.loc	20	61	0
	mov.u32 	%r123, 64;
	setp.ne.s32 	%p70, %r19, %r123;
	@%p70 bra 	$Lt_41_166914;
	.loc	20	63	0
	mov.f32 	%f64, 0f447fc000;    	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_37;
$Lt_41_166914:
	.loc	20	65	0
	mov.u32 	%r124, 128;
	setp.ne.s32 	%p71, %r19, %r124;
	@%p71 bra 	$Lt_41_167170;
	.loc	20	68	0
	mov.f32 	%f64, 0f47000000;    	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_37;
$Lt_41_167170:
	.loc	20	70	0
	mov.u32 	%r125, 192;
	setp.ne.s32 	%p72, %r19, %r125;
	@%p72 bra 	$Lt_41_167426;
	.loc	20	72	0
	mov.f32 	%f64, 0fbf800000;    	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_37;
$Lt_41_167426:
	.loc	20	76	0
	mov.f32 	%f64, 0f3f800000;    	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_37:
	.loc	20	517	0
	div.approx.ftz.f32 	%f75, %f63, %f64;
	mul.ftz.f32 	%f2, %f75, %f2;
	mul.ftz.f32 	%f4, %f75, %f4;
	mul.ftz.f32 	%f5, %f75, %f5;
	mul.ftz.f32 	%f6, %f75, %f6;
$Lt_41_235010:
$Lt_41_83202:
	.loc	20	520	0
	mov.u32 	%r126, 0;
	setp.eq.s32 	%p73, %r40, %r126;
	@%p73 bra 	$Lt_41_253186;
	mov.u32 	%r127, 0;
	setp.ne.s32 	%p74, %r39, %r127;
	@%p74 bra 	$Lt_41_253186;
	.loc	20	522	0
	mov.f32 	%f76, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p75, %f4, %f76;
	@!%p75 bra 	$Lt_41_235522;
	.loc	20	372	0
	neg.ftz.f32 	%f77, %f4;
	lg2.approx.ftz.f32 	%f78, %f77;
	mov.f32 	%f79, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f80, %f78, %f79;
	ex2.approx.ftz.f32 	%f81, %f80;
	neg.ftz.f32 	%f82, %f81;
	bra.uni 	$LDWendi___log2f_219_35;
$Lt_41_235522:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f83, %f4;
	mov.f32 	%f84, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f85, %f83, %f84;
	ex2.approx.ftz.f32 	%f82, %f85;
$LDWendi___log2f_219_35:
	.loc	20	522	0
	mov.f32 	%f86, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p76, %f5, %f86;
	@!%p76 bra 	$Lt_41_236034;
	.loc	20	372	0
	neg.ftz.f32 	%f87, %f5;
	lg2.approx.ftz.f32 	%f88, %f87;
	mov.f32 	%f89, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f90, %f88, %f89;
	ex2.approx.ftz.f32 	%f91, %f90;
	neg.ftz.f32 	%f92, %f91;
	bra.uni 	$LDWendi___log2f_219_33;
$Lt_41_236034:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f93, %f5;
	mov.f32 	%f94, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f95, %f93, %f94;
	ex2.approx.ftz.f32 	%f92, %f95;
$LDWendi___log2f_219_33:
	.loc	20	522	0
	mov.f32 	%f96, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p77, %f6, %f96;
	@!%p77 bra 	$Lt_41_236546;
	.loc	20	372	0
	neg.ftz.f32 	%f97, %f6;
	lg2.approx.ftz.f32 	%f98, %f97;
	mov.f32 	%f99, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f100, %f98, %f99;
	ex2.approx.ftz.f32 	%f101, %f100;
	neg.ftz.f32 	%f102, %f101;
	bra.uni 	$LDWendi___log2f_219_31;
$Lt_41_236546:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f103, %f6;
	mov.f32 	%f104, 0f400e38e4;   	// 2.22222
	mul.ftz.f32 	%f105, %f103, %f104;
	ex2.approx.ftz.f32 	%f102, %f105;
$LDWendi___log2f_219_31:
	.loc	20	522	0
	mov.f32 	%f4, %f82;
	mov.f32 	%f5, %f92;
	mov.f32 	%f6, %f102;
$Lt_41_253186:
$Lt_41_85250:
	.loc	20	525	0
	mov.u32 	%r128, 0;
	setp.eq.s32 	%p78, %r30, %r128;
	@%p78 bra 	$Lt_41_253698;
	mov.u32 	%r129, 0;
	setp.ne.s32 	%p79, %r29, %r129;
	@%p79 bra 	$Lt_41_253698;
	.loc	20	57	0
	mov.u32 	%r130, 0;
	setp.ne.s32 	%p80, %r21, %r130;
	@%p80 bra 	$Lt_41_168450;
	.loc	20	59	0
	mov.f32 	%f106, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_29;
$Lt_41_168450:
	.loc	20	61	0
	mov.u32 	%r131, 64;
	setp.ne.s32 	%p81, %r21, %r131;
	@%p81 bra 	$Lt_41_168706;
	.loc	20	63	0
	mov.f32 	%f106, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_29;
$Lt_41_168706:
	.loc	20	65	0
	mov.u32 	%r132, 128;
	setp.ne.s32 	%p82, %r21, %r132;
	@%p82 bra 	$Lt_41_168962;
	.loc	20	68	0
	mov.f32 	%f106, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_29;
$Lt_41_168962:
	.loc	20	70	0
	mov.u32 	%r133, 192;
	setp.ne.s32 	%p83, %r21, %r133;
	@%p83 bra 	$Lt_41_169218;
	.loc	20	72	0
	mov.f32 	%f106, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_29;
$Lt_41_169218:
	.loc	20	76	0
	mov.f32 	%f106, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_29:
	.loc	20	107	0
	and.b32 	%r134, %r12, 2048;
	mov.s32 	%r135, 0;
	setp.ne.s32 	%p84, %r134, %r135;
	@!%p84 bra 	$Lt_41_237314;
	.loc	20	100	0
	ld.const.f32 	%f107, [kYCbCrOffset+0];
	bra.uni 	$Lt_41_237058;
$Lt_41_237314:
	ld.const.f32 	%f107, [kYCbCrFullRangeOffset+0];
$Lt_41_237058:
	.loc	20	107	0
	@!%p84 bra 	$Lt_41_237826;
	.loc	20	100	0
	ld.const.f32 	%f108, [kYCbCrOffset+4];
	bra.uni 	$Lt_41_237570;
$Lt_41_237826:
	ld.const.f32 	%f108, [kYCbCrFullRangeOffset+4];
$Lt_41_237570:
	.loc	20	107	0
	@!%p84 bra 	$Lt_41_238338;
	.loc	20	100	0
	ld.const.f32 	%f109, [kYCbCrOffset+8];
	bra.uni 	$Lt_41_238082;
$Lt_41_238338:
	ld.const.f32 	%f109, [kYCbCrFullRangeOffset+8];
$Lt_41_238082:
	.loc	20	527	0
	mov.f32 	%f110, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f111, %f106, %f110;
	fma.rn.ftz.f32 	%f4, %f111, %f107, %f4;
	fma.rn.ftz.f32 	%f5, %f111, %f108, %f5;
	fma.rn.ftz.f32 	%f6, %f111, %f109, %f6;
$Lt_41_253698:
$Lt_41_91650:
	.loc	20	525	0
	and.b32 	%r136, %r10, 12;
	and.b32 	%r137, %r12, 12;
	setp.eq.s32 	%p85, %r136, %r137;
	@%p85 bra 	$Lt_41_239106;
	.loc	20	532	0
	mov.u32 	%r138, 8;
	setp.ne.s32 	%p86, %r136, %r138;
	@%p86 bra 	$L_41_222466;
	mov.u32 	%r139, 12;
	setp.eq.s32 	%p87, %r137, %r139;
	@%p87 bra 	$Lt_41_254466;
$L_41_222466:
	mov.u32 	%r140, 12;
	setp.eq.s32 	%p88, %r136, %r140;
	@%p88 bra 	$Lt_41_254466;
	mov.u32 	%r141, 0;
	setp.ne.s32 	%p89, %r136, %r141;
	@%p89 bra 	$L_41_221698;
$Lt_41_254466:
$L_41_221954:
	.loc	20	57	0
	mov.u32 	%r142, 0;
	setp.ne.s32 	%p90, %r21, %r142;
	@%p90 bra 	$Lt_41_171778;
	.loc	20	59	0
	mov.f32 	%f112, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_25;
$Lt_41_171778:
	.loc	20	61	0
	mov.u32 	%r143, 64;
	setp.ne.s32 	%p91, %r21, %r143;
	@%p91 bra 	$Lt_41_172034;
	.loc	20	63	0
	mov.f32 	%f112, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_25;
$Lt_41_172034:
	.loc	20	65	0
	mov.u32 	%r144, 128;
	setp.ne.s32 	%p92, %r21, %r144;
	@%p92 bra 	$Lt_41_172290;
	.loc	20	68	0
	mov.f32 	%f112, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_25;
$Lt_41_172290:
	.loc	20	70	0
	mov.u32 	%r145, 192;
	setp.ne.s32 	%p93, %r21, %r145;
	@%p93 bra 	$Lt_41_172546;
	.loc	20	72	0
	mov.f32 	%f112, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_25;
$Lt_41_172546:
	.loc	20	76	0
	mov.f32 	%f112, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_25:
	.loc	20	536	0
	mov.f32 	%f2, %f112;
	bra.uni 	$Lt_41_239106;
$L_41_221698:
	.loc	20	540	0
	mov.s32 	%r146, 12;
	setp.eq.s32 	%p94, %r137, %r146;
	mov.s32 	%r147, 4;
	set.eq.u32.s32 	%r148, %r136, %r147;
	neg.s32 	%r149, %r148;
	selp.s32 	%r150, 1, 0, %p94;
	mov.s32 	%r151, 8;
	set.eq.u32.s32 	%r152, %r137, %r151;
	neg.s32 	%r153, %r152;
	or.b32 	%r154, %r150, %r153;
	and.b32 	%r155, %r149, %r154;
	mov.u32 	%r156, 0;
	setp.eq.s32 	%p95, %r155, %r156;
	@%p95 bra 	$Lt_41_239362;
	.loc	20	410	0
	mov.f32 	%f113, %f4;
	mov.f32 	%f114, %f113;
	mov.f32 	%f115, %f5;
	mov.f32 	%f116, %f115;
	mov.f32 	%f117, %f6;
	mov.f32 	%f118, %f117;
	.loc	20	57	0
	mov.s32 	%r157, 0;
	setp.eq.s32 	%p35, %r21, %r157;
	@!%p35 bra 	$Lt_41_173058;
	.loc	20	59	0
	mov.f32 	%f119, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_23;
$Lt_41_173058:
	.loc	20	61	0
	mov.u32 	%r158, 64;
	setp.ne.s32 	%p96, %r21, %r158;
	@%p96 bra 	$Lt_41_173314;
	.loc	20	63	0
	mov.f32 	%f119, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_23;
$Lt_41_173314:
	.loc	20	65	0
	mov.u32 	%r159, 128;
	setp.ne.s32 	%p97, %r21, %r159;
	@%p97 bra 	$Lt_41_173570;
	.loc	20	68	0
	mov.f32 	%f119, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_23;
$Lt_41_173570:
	.loc	20	70	0
	mov.u32 	%r160, 192;
	setp.ne.s32 	%p98, %r21, %r160;
	@%p98 bra 	$Lt_41_173826;
	.loc	20	72	0
	mov.f32 	%f119, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_23;
$Lt_41_173826:
	.loc	20	76	0
	mov.f32 	%f119, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_23:
	.loc	20	413	0
	mov.u32 	%r161, 0;
	setp.eq.s32 	%p99, %r30, %r161;
	@%p99 bra 	$Lt_41_239618;
	.loc	20	57	0
	@!%p35 bra 	$Lt_41_174338;
	.loc	20	59	0
	mov.f32 	%f120, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_21;
$Lt_41_174338:
	.loc	20	61	0
	mov.u32 	%r162, 64;
	setp.ne.s32 	%p100, %r21, %r162;
	@%p100 bra 	$Lt_41_174594;
	.loc	20	63	0
	mov.f32 	%f120, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_21;
$Lt_41_174594:
	.loc	20	65	0
	mov.u32 	%r163, 128;
	setp.ne.s32 	%p101, %r21, %r163;
	@%p101 bra 	$Lt_41_174850;
	.loc	20	68	0
	mov.f32 	%f120, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_21;
$Lt_41_174850:
	.loc	20	70	0
	mov.u32 	%r164, 192;
	setp.ne.s32 	%p102, %r21, %r164;
	@%p102 bra 	$Lt_41_175106;
	.loc	20	72	0
	mov.f32 	%f120, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_21;
$Lt_41_175106:
	.loc	20	76	0
	mov.f32 	%f120, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_21:
	.loc	20	118	0
	and.b32 	%r134, %r12, 2048;
	mov.s32 	%r165, 0;
	setp.ne.s32 	%p84, %r134, %r165;
	@!%p84 bra 	$Lt_41_240386;
	.loc	20	100	0
	ld.const.f32 	%f121, [kYCbCrOffset+0];
	bra.uni 	$Lt_41_240130;
$Lt_41_240386:
	ld.const.f32 	%f121, [kYCbCrFullRangeOffset+0];
$Lt_41_240130:
	.loc	20	118	0
	@!%p84 bra 	$Lt_41_240898;
	.loc	20	100	0
	ld.const.f32 	%f122, [kYCbCrOffset+4];
	bra.uni 	$Lt_41_240642;
$Lt_41_240898:
	ld.const.f32 	%f122, [kYCbCrFullRangeOffset+4];
$Lt_41_240642:
	.loc	20	118	0
	@!%p84 bra 	$Lt_41_241410;
	.loc	20	100	0
	ld.const.f32 	%f123, [kYCbCrOffset+8];
	bra.uni 	$Lt_41_241154;
$Lt_41_241410:
	ld.const.f32 	%f123, [kYCbCrFullRangeOffset+8];
$Lt_41_241154:
	.loc	20	415	0
	mov.f32 	%f124, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f125, %f120, %f124;
	mul.ftz.f32 	%f126, %f125, %f121;
	sub.ftz.f32 	%f114, %f113, %f126;
	mul.ftz.f32 	%f127, %f125, %f122;
	sub.ftz.f32 	%f116, %f115, %f127;
	mul.ftz.f32 	%f128, %f125, %f123;
	sub.ftz.f32 	%f118, %f117, %f128;
$Lt_41_239618:
	.loc	20	418	0
	rcp.approx.ftz.f32 	%f129, %f119;
	mul.ftz.f32 	%f130, %f129, %f2;
	mul.ftz.f32 	%f114, %f130, %f114;
	.loc	20	419	0
	mul.ftz.f32 	%f116, %f130, %f116;
	.loc	20	420	0
	mul.ftz.f32 	%f118, %f130, %f118;
	.loc	20	422	0
	mov.u32 	%r166, 0;
	setp.eq.s32 	%p103, %r30, %r166;
	@%p103 bra 	$Lt_41_241666;
	.loc	20	57	0
	@!%p35 bra 	$Lt_41_177410;
	.loc	20	59	0
	mov.f32 	%f131, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_17;
$Lt_41_177410:
	.loc	20	61	0
	mov.u32 	%r167, 64;
	setp.ne.s32 	%p104, %r21, %r167;
	@%p104 bra 	$Lt_41_177666;
	.loc	20	63	0
	mov.f32 	%f131, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_17;
$Lt_41_177666:
	.loc	20	65	0
	mov.u32 	%r168, 128;
	setp.ne.s32 	%p105, %r21, %r168;
	@%p105 bra 	$Lt_41_177922;
	.loc	20	68	0
	mov.f32 	%f131, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_17;
$Lt_41_177922:
	.loc	20	70	0
	mov.u32 	%r169, 192;
	setp.ne.s32 	%p106, %r21, %r169;
	@%p106 bra 	$Lt_41_178178;
	.loc	20	72	0
	mov.f32 	%f131, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_17;
$Lt_41_178178:
	.loc	20	76	0
	mov.f32 	%f131, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_17:
	.loc	20	107	0
	and.b32 	%r134, %r12, 2048;
	mov.s32 	%r170, 0;
	setp.ne.s32 	%p84, %r134, %r170;
	@!%p84 bra 	$Lt_41_242434;
	.loc	20	100	0
	ld.const.f32 	%f132, [kYCbCrOffset+0];
	bra.uni 	$Lt_41_242178;
$Lt_41_242434:
	ld.const.f32 	%f132, [kYCbCrFullRangeOffset+0];
$Lt_41_242178:
	.loc	20	107	0
	@!%p84 bra 	$Lt_41_242946;
	.loc	20	100	0
	ld.const.f32 	%f133, [kYCbCrOffset+4];
	bra.uni 	$Lt_41_242690;
$Lt_41_242946:
	ld.const.f32 	%f133, [kYCbCrFullRangeOffset+4];
$Lt_41_242690:
	.loc	20	107	0
	@!%p84 bra 	$Lt_41_243458;
	.loc	20	100	0
	ld.const.f32 	%f134, [kYCbCrOffset+8];
	bra.uni 	$Lt_41_243202;
$Lt_41_243458:
	ld.const.f32 	%f134, [kYCbCrFullRangeOffset+8];
$Lt_41_243202:
	.loc	20	424	0
	mov.f32 	%f135, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f136, %f131, %f135;
	fma.rn.ftz.f32 	%f114, %f136, %f132, %f114;
	fma.rn.ftz.f32 	%f116, %f136, %f133, %f116;
	fma.rn.ftz.f32 	%f118, %f136, %f134, %f118;
$Lt_41_241666:
	.loc	20	543	0
	mov.f32 	%f4, %f114;
	mov.f32 	%f5, %f116;
	mov.f32 	%f6, %f118;
	@!%p94 bra 	$Lt_41_239106;
	.loc	20	57	0
	@!%p35 bra 	$Lt_41_180482;
	.loc	20	59	0
	mov.f32 	%f112, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_13;
$Lt_41_180482:
	.loc	20	61	0
	mov.u32 	%r171, 64;
	setp.ne.s32 	%p107, %r21, %r171;
	@%p107 bra 	$Lt_41_180738;
	.loc	20	63	0
	mov.f32 	%f112, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_13;
$Lt_41_180738:
	.loc	20	65	0
	mov.u32 	%r172, 128;
	setp.ne.s32 	%p108, %r21, %r172;
	@%p108 bra 	$Lt_41_180994;
	.loc	20	68	0
	mov.f32 	%f112, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_13;
$Lt_41_180994:
	.loc	20	70	0
	mov.u32 	%r173, 192;
	setp.ne.s32 	%p109, %r21, %r173;
	@%p109 bra 	$Lt_41_181250;
	.loc	20	72	0
	mov.f32 	%f112, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_13;
$Lt_41_181250:
	.loc	20	76	0
	mov.f32 	%f112, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_13:
	.loc	20	546	0
	mov.f32 	%f2, %f112;
	bra.uni 	$Lt_41_239106;
$Lt_41_239362:
	.loc	20	433	0
	mov.f32 	%f137, %f2;
	mov.f32 	%f138, %f4;
	mov.f32 	%f139, %f138;
	mov.f32 	%f140, %f5;
	mov.f32 	%f141, %f140;
	mov.f32 	%f142, %f6;
	mov.f32 	%f143, %f142;
	.loc	20	435	0
	mov.u32 	%r174, 0;
	setp.eq.s32 	%p110, %r30, %r174;
	@%p110 bra 	$Lt_41_244226;
	.loc	20	57	0
	mov.u32 	%r175, 0;
	setp.ne.s32 	%p111, %r21, %r175;
	@%p111 bra 	$Lt_41_181762;
	.loc	20	59	0
	mov.f32 	%f144, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_11;
$Lt_41_181762:
	.loc	20	61	0
	mov.u32 	%r176, 64;
	setp.ne.s32 	%p112, %r21, %r176;
	@%p112 bra 	$Lt_41_182018;
	.loc	20	63	0
	mov.f32 	%f144, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_11;
$Lt_41_182018:
	.loc	20	65	0
	mov.u32 	%r177, 128;
	setp.ne.s32 	%p113, %r21, %r177;
	@%p113 bra 	$Lt_41_182274;
	.loc	20	68	0
	mov.f32 	%f144, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_11;
$Lt_41_182274:
	.loc	20	70	0
	mov.u32 	%r178, 192;
	setp.ne.s32 	%p114, %r21, %r178;
	@%p114 bra 	$Lt_41_182530;
	.loc	20	72	0
	mov.f32 	%f144, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_11;
$Lt_41_182530:
	.loc	20	76	0
	mov.f32 	%f144, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_11:
	.loc	20	118	0
	and.b32 	%r134, %r12, 2048;
	mov.s32 	%r179, 0;
	setp.ne.s32 	%p84, %r134, %r179;
	@!%p84 bra 	$Lt_41_244994;
	.loc	20	100	0
	ld.const.f32 	%f145, [kYCbCrOffset+0];
	bra.uni 	$Lt_41_244738;
$Lt_41_244994:
	ld.const.f32 	%f145, [kYCbCrFullRangeOffset+0];
$Lt_41_244738:
	.loc	20	118	0
	@!%p84 bra 	$Lt_41_245506;
	.loc	20	100	0
	ld.const.f32 	%f146, [kYCbCrOffset+4];
	bra.uni 	$Lt_41_245250;
$Lt_41_245506:
	ld.const.f32 	%f146, [kYCbCrFullRangeOffset+4];
$Lt_41_245250:
	.loc	20	118	0
	@!%p84 bra 	$Lt_41_246018;
	.loc	20	100	0
	ld.const.f32 	%f147, [kYCbCrOffset+8];
	bra.uni 	$Lt_41_245762;
$Lt_41_246018:
	ld.const.f32 	%f147, [kYCbCrFullRangeOffset+8];
$Lt_41_245762:
	.loc	20	437	0
	mov.f32 	%f148, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f149, %f144, %f148;
	mul.ftz.f32 	%f150, %f149, %f145;
	sub.ftz.f32 	%f139, %f138, %f150;
	mul.ftz.f32 	%f151, %f149, %f146;
	sub.ftz.f32 	%f141, %f140, %f151;
	mul.ftz.f32 	%f152, %f149, %f147;
	sub.ftz.f32 	%f143, %f142, %f152;
$Lt_41_244226:
	mov.f32 	%f153, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f154, %f2, %f153;
	mov.f32 	%f155, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p115, %f154, %f155;
	@!%p115 bra 	$Lt_41_246530;
	mov.f32 	%f143, 0f00000000;   	// 0
	mov.f32 	%f141, 0f00000000;   	// 0
	mov.f32 	%f139, 0f00000000;   	// 0
	mov.f32 	%f137, 0f00000000;   	// 0
	bra.uni 	$Lt_41_246274;
$Lt_41_246530:
	.loc	20	57	0
	mov.u32 	%r180, 0;
	setp.ne.s32 	%p116, %r21, %r180;
	@%p116 bra 	$Lt_41_184834;
	.loc	20	59	0
	mov.f32 	%f156, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_7;
$Lt_41_184834:
	.loc	20	61	0
	mov.u32 	%r181, 64;
	setp.ne.s32 	%p117, %r21, %r181;
	@%p117 bra 	$Lt_41_185090;
	.loc	20	63	0
	mov.f32 	%f156, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_7;
$Lt_41_185090:
	.loc	20	65	0
	mov.u32 	%r182, 128;
	setp.ne.s32 	%p118, %r21, %r182;
	@%p118 bra 	$Lt_41_185346;
	.loc	20	68	0
	mov.f32 	%f156, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_7;
$Lt_41_185346:
	.loc	20	70	0
	mov.u32 	%r183, 192;
	setp.ne.s32 	%p119, %r21, %r183;
	@%p119 bra 	$Lt_41_185602;
	.loc	20	72	0
	mov.f32 	%f156, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_7;
$Lt_41_185602:
	.loc	20	76	0
	mov.f32 	%f156, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_7:
	.loc	20	447	0
	div.approx.ftz.f32 	%f157, %f156, %f2;
	mul.ftz.f32 	%f139, %f157, %f139;
	.loc	20	448	0
	mul.ftz.f32 	%f141, %f157, %f141;
	.loc	20	449	0
	mul.ftz.f32 	%f143, %f157, %f143;
$Lt_41_246274:
	.loc	20	452	0
	mov.u32 	%r184, 0;
	setp.eq.s32 	%p120, %r30, %r184;
	@%p120 bra 	$Lt_41_246786;
	.loc	20	57	0
	mov.u32 	%r185, 0;
	setp.ne.s32 	%p121, %r21, %r185;
	@%p121 bra 	$Lt_41_186114;
	.loc	20	59	0
	mov.f32 	%f158, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_5;
$Lt_41_186114:
	.loc	20	61	0
	mov.u32 	%r186, 64;
	setp.ne.s32 	%p122, %r21, %r186;
	@%p122 bra 	$Lt_41_186370;
	.loc	20	63	0
	mov.f32 	%f158, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_5;
$Lt_41_186370:
	.loc	20	65	0
	mov.u32 	%r187, 128;
	setp.ne.s32 	%p123, %r21, %r187;
	@%p123 bra 	$Lt_41_186626;
	.loc	20	68	0
	mov.f32 	%f158, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_5;
$Lt_41_186626:
	.loc	20	70	0
	mov.u32 	%r188, 192;
	setp.ne.s32 	%p124, %r21, %r188;
	@%p124 bra 	$Lt_41_186882;
	.loc	20	72	0
	mov.f32 	%f158, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_5;
$Lt_41_186882:
	.loc	20	76	0
	mov.f32 	%f158, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_5:
	.loc	20	107	0
	and.b32 	%r134, %r12, 2048;
	mov.s32 	%r189, 0;
	setp.ne.s32 	%p84, %r134, %r189;
	@!%p84 bra 	$Lt_41_247554;
	.loc	20	100	0
	ld.const.f32 	%f159, [kYCbCrOffset+0];
	bra.uni 	$Lt_41_247298;
$Lt_41_247554:
	ld.const.f32 	%f159, [kYCbCrFullRangeOffset+0];
$Lt_41_247298:
	.loc	20	107	0
	@!%p84 bra 	$Lt_41_248066;
	.loc	20	100	0
	ld.const.f32 	%f160, [kYCbCrOffset+4];
	bra.uni 	$Lt_41_247810;
$Lt_41_248066:
	ld.const.f32 	%f160, [kYCbCrFullRangeOffset+4];
$Lt_41_247810:
	.loc	20	107	0
	@!%p84 bra 	$Lt_41_248578;
	.loc	20	100	0
	ld.const.f32 	%f161, [kYCbCrOffset+8];
	bra.uni 	$Lt_41_248322;
$Lt_41_248578:
	ld.const.f32 	%f161, [kYCbCrFullRangeOffset+8];
$Lt_41_248322:
	.loc	20	454	0
	mov.f32 	%f162, 0f437f0000;   	// 255
	div.approx.ftz.f32 	%f163, %f158, %f162;
	fma.rn.ftz.f32 	%f139, %f163, %f159, %f139;
	fma.rn.ftz.f32 	%f141, %f163, %f160, %f141;
	fma.rn.ftz.f32 	%f143, %f163, %f161, %f143;
$Lt_41_246786:
	.loc	20	551	0
	mov.f32 	%f2, %f137;
	mov.f32 	%f4, %f139;
	mov.f32 	%f5, %f141;
	mov.f32 	%f6, %f143;
$Lt_41_239106:
$L_41_221442:
$Lt_41_238594:
	.loc	20	540	0
	and.b32 	%r190, %r12, 4096;
	mov.u32 	%r191, 0;
	setp.ne.s32 	%p125, %r190, %r191;
	@%p125 bra 	$Lt_41_248834;
	.loc	21	268	0
	mov.f32 	%f164, %f4;
	.loc	21	269	0
	mov.f32 	%f165, %f2;
	.loc	20	558	0
	mov.f32 	%f2, %f6;
	mov.f32 	%f4, %f5;
	mov.f32 	%f5, %f164;
	mov.f32 	%f6, %f165;
$Lt_41_248834:
	@!%p3 bra 	$Lt_41_249346;
	.loc	20	57	0
	mov.u32 	%r192, 0;
	setp.ne.s32 	%p126, %r21, %r192;
	@%p126 bra 	$Lt_41_189442;
	.loc	20	59	0
	mov.f32 	%f166, 0f437f0000;   	// 255
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_1;
$Lt_41_189442:
	.loc	20	61	0
	mov.u32 	%r193, 64;
	setp.ne.s32 	%p127, %r21, %r193;
	@%p127 bra 	$Lt_41_189698;
	.loc	20	63	0
	mov.f32 	%f166, 0f447fc000;   	// 1023
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_1;
$Lt_41_189698:
	.loc	20	65	0
	mov.u32 	%r194, 128;
	setp.ne.s32 	%p128, %r21, %r194;
	@%p128 bra 	$Lt_41_189954;
	.loc	20	68	0
	mov.f32 	%f166, 0f47000000;   	// 32768
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_1;
$Lt_41_189954:
	.loc	20	70	0
	mov.u32 	%r195, 192;
	setp.ne.s32 	%p129, %r21, %r195;
	@%p129 bra 	$Lt_41_190210;
	.loc	20	72	0
	mov.f32 	%f166, 0fbf800000;   	// -1
	bra.uni 	$LDWendi__Z19MaxUnsignedBitValuei_219_1;
$Lt_41_190210:
	.loc	20	76	0
	mov.f32 	%f166, 0f3f800000;   	// 1
$LDWendi__Z19MaxUnsignedBitValuei_219_1:
	.loc	20	564	0
	mov.f32 	%f167, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f168, %f2, %f167;
	mov.f32 	%f169, 0f00000000;   	// 0
	mov.f32 	%f170, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p130, %f168, %f170;
	selp.f32 	%f171, %f168, %f169, %p130;
	min.ftz.f32 	%f2, %f171, %f166;
	mov.f32 	%f172, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f173, %f4, %f172;
	mov.f32 	%f174, 0f00000000;   	// 0
	mov.f32 	%f175, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p131, %f173, %f175;
	selp.f32 	%f176, %f173, %f174, %p131;
	min.ftz.f32 	%f4, %f176, %f166;
	mov.f32 	%f177, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f178, %f5, %f177;
	mov.f32 	%f179, 0f00000000;   	// 0
	mov.f32 	%f180, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p132, %f178, %f180;
	selp.f32 	%f181, %f178, %f179, %p132;
	min.ftz.f32 	%f5, %f181, %f166;
	mov.f32 	%f182, 0f3f000000;   	// 0.5
	add.ftz.f32 	%f183, %f6, %f182;
	mov.f32 	%f184, 0f00000000;   	// 0
	mov.f32 	%f185, 0f00000000;   	// 0
	setp.gt.ftz.f32 	%p133, %f183, %f185;
	selp.f32 	%f186, %f183, %f184, %p133;
	min.ftz.f32 	%f6, %f186, %f166;
$Lt_41_249346:
	.loc	22	73	0
	cvt.rzi.ftz.u32.f32 	%r196, %f4;
	cvt.u8.u32 	%r197, %r196;
	cvt.rzi.ftz.u32.f32 	%r198, %f5;
	cvt.u8.u32 	%r199, %r198;
	cvt.rzi.ftz.u32.f32 	%r200, %f6;
	cvt.u8.u32 	%r201, %r200;
	cvt.rzi.ftz.u32.f32 	%r202, %f2;
	cvt.u8.u32 	%r203, %r202;
	st.param.u8 	[__cudaretf__Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_+0], %r203;
	mov.s32 	%r204, %r197;
	st.param.u8 	[__cudaretf__Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_+1], %r204;
	mov.s32 	%r205, %r199;
	st.param.u8 	[__cudaretf__Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_+2], %r205;
	mov.s32 	%r206, %r201;
	st.param.u8 	[__cudaretf__Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_+3], %r206;
	ret;
$LDWend__Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_:
	} // _Z29ConvertPixel_444_8u_To_444_8u6uchar414IR_PixelFormatS0_

	.visible .func (.param .align 4 .b8 __cudaretf__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii[4]) _Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii (.param .u64 __cudaparmf1__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii, .param .s32 __cudaparmf2__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii, .param .u32 __cudaparmf3__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii, .param .s32 __cudaparmf4__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii, .param .s32 __cudaparmf5__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii)
	{
	.reg .u32 %r<14>;
	.reg .u64 %rd<7>;
	.loc	22	87	0
$LDWbegin__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii:
	ld.param.u64 	%rd1, [__cudaparmf1__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf2__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf4__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf5__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii];
	mov.s32 	%r6, %r5;
	.loc	22	88	0
	mul.lo.s32 	%r7, %r2, %r6;
	add.s32 	%r8, %r4, %r7;
	cvt.s64.s32 	%rd3, %r8;
	mul.wide.s32 	%rd4, %r8, 4;
	add.u64 	%rd5, %rd2, %rd4;
	ld.v4.u8 	{%r9,%r10,%r11,%r12}, [%rd5+0];
	st.param.u8 	[__cudaretf__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii+0], %r9;
	st.param.u8 	[__cudaretf__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii+1], %r10;
	st.param.u8 	[__cudaretf__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii+2], %r11;
	st.param.u8 	[__cudaretf__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii+3], %r12;
	ret;
$LDWend__Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii:
	} // _Z9ReadPixelI6uchar4ET_PKS1_i17DevicePixelFormatii

	.visible .func (.param .align 8 .b8 __cudaretf__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii[8]) _Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii (.param .u64 __cudaparmf1__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii, .param .s32 __cudaparmf2__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii, .param .u32 __cudaparmf3__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii, .param .s32 __cudaparmf4__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii, .param .s32 __cudaparmf5__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii)
	{
	.reg .u32 %r<14>;
	.reg .u64 %rd<7>;
	.loc	22	87	0
$LDWbegin__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii:
	ld.param.u64 	%rd1, [__cudaparmf1__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf2__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf4__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf5__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii];
	mov.s32 	%r6, %r5;
	.loc	22	88	0
	mul.lo.s32 	%r7, %r2, %r6;
	add.s32 	%r8, %r4, %r7;
	cvt.s64.s32 	%rd3, %r8;
	mul.wide.s32 	%rd4, %r8, 8;
	add.u64 	%rd5, %rd2, %rd4;
	ld.v4.u16 	{%r9,%r10,%r11,%r12}, [%rd5+0];
	st.param.u16 	[__cudaretf__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii+0], %r9;
	st.param.u16 	[__cudaretf__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii+2], %r10;
	st.param.u16 	[__cudaretf__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii+4], %r11;
	st.param.u16 	[__cudaretf__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii+6], %r12;
	ret;
$LDWend__Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii:
	} // _Z9ReadPixelI7ushort4ET_PKS1_i17DevicePixelFormatii

	.visible .func _Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii (.param .align 4 .b8 __cudaparmf1__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii[4], .param .u64 __cudaparmf2__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii, .param .s32 __cudaparmf3__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii, .param .u32 __cudaparmf4__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii, .param .s32 __cudaparmf5__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii, .param .s32 __cudaparmf6__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii)
	{
	.reg .u32 %r<18>;
	.reg .u64 %rd<7>;
	.loc	22	99	0
$LDWbegin__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii:
	ld.param.u8 	%r1, [__cudaparmf1__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii+0];
	mov.s32 	%r2, %r1;
	ld.param.u8 	%r3, [__cudaparmf1__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii+1];
	mov.s32 	%r4, %r3;
	ld.param.u8 	%r5, [__cudaparmf1__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii+2];
	mov.s32 	%r6, %r5;
	ld.param.u8 	%r7, [__cudaparmf1__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii+3];
	mov.s32 	%r8, %r7;
	ld.param.u64 	%rd1, [__cudaparmf2__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r9, [__cudaparmf3__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii];
	mov.s32 	%r10, %r9;
	ld.param.u32 	%r11, [__cudaparmf5__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii];
	mov.s32 	%r12, %r11;
	ld.param.u32 	%r13, [__cudaparmf6__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii];
	mov.s32 	%r14, %r13;
	.loc	19	126	0
	mul.lo.s32 	%r15, %r10, %r14;
	add.s32 	%r16, %r12, %r15;
	cvt.s64.s32 	%rd3, %r16;
	mul.wide.s32 	%rd4, %r16, 4;
	add.u64 	%rd5, %rd2, %rd4;
	st.v4.u8 	[%rd5+0], {%r2,%r4,%r6,%r8};
	.loc	22	101	0
	ret;
$LDWend__Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii:
	} // _Z10WritePixelI6uchar4EvT_PS1_i17DevicePixelFormatii

	.visible .func _Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii (.param .align 8 .b8 __cudaparmf1__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii[8], .param .u64 __cudaparmf2__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii, .param .s32 __cudaparmf3__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii, .param .u32 __cudaparmf4__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii, .param .s32 __cudaparmf5__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii, .param .s32 __cudaparmf6__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii)
	{
	.reg .u32 %r<18>;
	.reg .u64 %rd<7>;
	.loc	22	99	0
$LDWbegin__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii:
	ld.param.u16 	%r1, [__cudaparmf1__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii+0];
	mov.s32 	%r2, %r1;
	ld.param.u16 	%r3, [__cudaparmf1__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii+2];
	mov.s32 	%r4, %r3;
	ld.param.u16 	%r5, [__cudaparmf1__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii+4];
	mov.s32 	%r6, %r5;
	ld.param.u16 	%r7, [__cudaparmf1__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii+6];
	mov.s32 	%r8, %r7;
	ld.param.u64 	%rd1, [__cudaparmf2__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r9, [__cudaparmf3__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii];
	mov.s32 	%r10, %r9;
	ld.param.u32 	%r11, [__cudaparmf5__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii];
	mov.s32 	%r12, %r11;
	ld.param.u32 	%r13, [__cudaparmf6__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii];
	mov.s32 	%r14, %r13;
	.loc	19	126	0
	mul.lo.s32 	%r15, %r10, %r14;
	add.s32 	%r16, %r12, %r15;
	cvt.s64.s32 	%rd3, %r16;
	mul.wide.s32 	%rd4, %r16, 8;
	add.u64 	%rd5, %rd2, %rd4;
	st.v4.u16 	[%rd5+0], {%r2,%r4,%r6,%r8};
	.loc	22	101	0
	ret;
$LDWend__Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii:
	} // _Z10WritePixelI7ushort4EvT_PS1_i17DevicePixelFormatii

	.visible .func (.param .u64 __cudaretf__ZpLI6float4ERT_S2_f) _ZpLI6float4ERT_S2_f (.param .u64 __cudaparmf1__ZpLI6float4ERT_S2_f, .param .f32 __cudaparmf2__ZpLI6float4ERT_S2_f)
	{
	.reg .u64 %rd<5>;
	.reg .f32 %f<12>;
	.loc	21	91	0
$LDWbegin__ZpLI6float4ERT_S2_f:
	ld.param.u64 	%rd1, [__cudaparmf1__ZpLI6float4ERT_S2_f];
	mov.s64 	%rd2, %rd1;
	ld.param.f32 	%f1, [__cudaparmf2__ZpLI6float4ERT_S2_f];
	mov.f32 	%f2, %f1;
	ld.v4.f32 	{%f3,%f4,%f5,%f6}, [%rd2+0];
	.loc	21	92	0
	add.ftz.f32 	%f7, %f3, %f2;
	.loc	21	93	0
	add.ftz.f32 	%f8, %f4, %f2;
	.loc	21	94	0
	add.ftz.f32 	%f9, %f5, %f2;
	.loc	21	95	0
	add.ftz.f32 	%f10, %f6, %f2;
	st.v4.f32 	[%rd2+0], {%f7,%f8,%f9,%f10};
	.loc	21	96	0
	mov.s64 	%rd3, %rd2;
	st.param.u64 	[__cudaretf__ZpLI6float4ERT_S2_f], %rd3;
	ret;
$LDWend__ZpLI6float4ERT_S2_f:
	} // _ZpLI6float4ERT_S2_f

	.visible .func (.param .align 4 .b8 __cudaretf__Z6Read2DI6uchar4ET_PKS1_iii[4]) _Z6Read2DI6uchar4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI6uchar4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI6uchar4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI6uchar4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI6uchar4ET_PKS1_iii)
	{
	.reg .u32 %r<14>;
	.reg .u64 %rd<7>;
	.loc	19	114	0
$LDWbegin__Z6Read2DI6uchar4ET_PKS1_iii:
	ld.param.u64 	%rd1, [__cudaparmf1__Z6Read2DI6uchar4ET_PKS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf2__Z6Read2DI6uchar4ET_PKS1_iii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf3__Z6Read2DI6uchar4ET_PKS1_iii];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf4__Z6Read2DI6uchar4ET_PKS1_iii];
	mov.s32 	%r6, %r5;
	.loc	19	115	0
	mul.lo.s32 	%r7, %r2, %r6;
	add.s32 	%r8, %r4, %r7;
	cvt.s64.s32 	%rd3, %r8;
	mul.wide.s32 	%rd4, %r8, 4;
	add.u64 	%rd5, %rd2, %rd4;
	ld.v4.u8 	{%r9,%r10,%r11,%r12}, [%rd5+0];
	st.param.u8 	[__cudaretf__Z6Read2DI6uchar4ET_PKS1_iii+0], %r9;
	st.param.u8 	[__cudaretf__Z6Read2DI6uchar4ET_PKS1_iii+1], %r10;
	st.param.u8 	[__cudaretf__Z6Read2DI6uchar4ET_PKS1_iii+2], %r11;
	st.param.u8 	[__cudaretf__Z6Read2DI6uchar4ET_PKS1_iii+3], %r12;
	ret;
$LDWend__Z6Read2DI6uchar4ET_PKS1_iii:
	} // _Z6Read2DI6uchar4ET_PKS1_iii

	.visible .func _Z7Write2DI6uchar4EvT_PS1_iii (.param .align 4 .b8 __cudaparmf1__Z7Write2DI6uchar4EvT_PS1_iii[4], .param .u64 __cudaparmf2__Z7Write2DI6uchar4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI6uchar4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI6uchar4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI6uchar4EvT_PS1_iii)
	{
	.reg .u32 %r<18>;
	.reg .u64 %rd<7>;
	.loc	19	125	0
$LDWbegin__Z7Write2DI6uchar4EvT_PS1_iii:
	ld.param.u8 	%r1, [__cudaparmf1__Z7Write2DI6uchar4EvT_PS1_iii+0];
	mov.s32 	%r2, %r1;
	ld.param.u8 	%r3, [__cudaparmf1__Z7Write2DI6uchar4EvT_PS1_iii+1];
	mov.s32 	%r4, %r3;
	ld.param.u8 	%r5, [__cudaparmf1__Z7Write2DI6uchar4EvT_PS1_iii+2];
	mov.s32 	%r6, %r5;
	ld.param.u8 	%r7, [__cudaparmf1__Z7Write2DI6uchar4EvT_PS1_iii+3];
	mov.s32 	%r8, %r7;
	ld.param.u64 	%rd1, [__cudaparmf2__Z7Write2DI6uchar4EvT_PS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r9, [__cudaparmf3__Z7Write2DI6uchar4EvT_PS1_iii];
	mov.s32 	%r10, %r9;
	ld.param.u32 	%r11, [__cudaparmf4__Z7Write2DI6uchar4EvT_PS1_iii];
	mov.s32 	%r12, %r11;
	ld.param.u32 	%r13, [__cudaparmf5__Z7Write2DI6uchar4EvT_PS1_iii];
	mov.s32 	%r14, %r13;
	.loc	19	126	0
	mul.lo.s32 	%r15, %r10, %r14;
	add.s32 	%r16, %r12, %r15;
	cvt.s64.s32 	%rd3, %r16;
	mul.wide.s32 	%rd4, %r16, 4;
	add.u64 	%rd5, %rd2, %rd4;
	st.v4.u8 	[%rd5+0], {%r2,%r4,%r6,%r8};
	.loc	19	127	0
	ret;
$LDWend__Z7Write2DI6uchar4EvT_PS1_iii:
	} // _Z7Write2DI6uchar4EvT_PS1_iii

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<20>;
	.reg .pred %p<4>;
	.loc	22	157	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_49_194306;
	.loc	19	115	0
	ld.param.u64 	%rd1, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd2, %r21;
	mul.wide.s32 	%rd3, %r21, 4;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.v4.u8 	{%r22,%r23,%r24,%r25}, [%rd4+0];
	.loc	22	157	0
	cvt.rn.f32.u32 	%f1, %r22;
	cvt.rn.f32.u32 	%f2, %r23;
	cvt.rn.f32.u32 	%f3, %r24;
	cvt.rn.f32.u32 	%f4, %r25;
	ld.param.s32 	%r26, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r27, %r26, %r10;
	add.s32 	%r28, %r8, %r27;
	cvt.s64.s32 	%rd5, %r28;
	ld.param.u64 	%rd6, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r29, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r30, 0;
	setp.ne.s32 	%p2, %r29, %r30;
	@%p2 bra 	$Lt_49_195074;
	.loc	19	126	0
	mov.f32 	%f5, 0f3f800000;     	// 1
	mov.f32 	%f6, 0f437f0000;     	// 255
	div.approx.ftz.f32 	%f7, %f5, %f6;
	mul.lo.u64 	%rd7, %rd5, 8;
	add.u64 	%rd8, %rd6, %rd7;
	mul.ftz.f32 	%f8, %f1, %f7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f8;
	mov.b32		%r31, %b1; }
	mul.ftz.f32 	%f9, %f2, %f7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f9;
	mov.b32		%r32, %b1; }
	mul.ftz.f32 	%f10, %f3, %f7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r33, %b1; }
	mul.ftz.f32 	%f11, %f4, %f7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r34, %b1; }
	st.global.v4.u16 	[%rd8+0], {%r31,%r32,%r33,%r34};
	.loc	22	157	0
	bra.uni 	$Lt_49_194818;
$Lt_49_195074:
	.loc	19	126	0
	mov.f32 	%f12, 0f3f800000;    	// 1
	mov.f32 	%f13, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f14, %f12, %f13;
	mul.lo.u64 	%rd9, %rd5, 16;
	add.u64 	%rd10, %rd6, %rd9;
	mul.ftz.f32 	%f15, %f1, %f14;
	mul.ftz.f32 	%f16, %f2, %f14;
	mul.ftz.f32 	%f17, %f3, %f14;
	mul.ftz.f32 	%f18, %f4, %f14;
	st.global.v4.f32 	[%rd10+0], {%f15,%f16,%f17,%f18};
$Lt_49_194818:
$Lt_49_194306:
	.loc	22	157	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<22>;
	.reg .pred %p<5>;
	.loc	22	158	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_50_194562;
	.loc	19	115	0
	ld.param.u64 	%rd1, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd2, %r21;
	mul.wide.s32 	%rd3, %r21, 4;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.u8 	%r22, [%rd4+3];
	.loc	20	433	0
	cvt.rn.f32.u32 	%f1, %r22;
	mov.f32 	%f2, 0f3f800000;     	// 1
	mov.f32 	%f3, 0f437f0000;     	// 255
	div.approx.ftz.f32 	%f4, %f2, %f3;
	mul.ftz.f32 	%f5, %f1, %f4;
	mov.f32 	%f6, %f5;
	.loc	20	435	0
	mov.f32 	%f7, 0fb70637bd;     	// -8e-006
	add.ftz.f32 	%f8, %f5, %f7;
	mov.f32 	%f9, 0f00000000;     	// 0
	setp.le.ftz.f32 	%p2, %f8, %f9;
	@!%p2 bra 	$LDWendi__Z19MaxUnsignedBitValuei_227_7;
	mov.f32 	%f10, 0f00000000;    	// 0
	mov.f32 	%f11, 0f00000000;    	// 0
	mov.f32 	%f12, 0f00000000;    	// 0
	mov.f32 	%f6, 0f00000000;     	// 0
	bra.uni 	$Lt_50_195074;
$LDWendi__Z19MaxUnsignedBitValuei_227_7:
	.loc	20	447	0
	mov.f32 	%f13, 0f3f800000;    	// 1
	div.approx.ftz.f32 	%f14, %f13, %f5;
	ld.global.v4.u8 	{%r23,%r24,%r25,_}, [%rd4+0];
	cvt.rn.f32.u32 	%f15, %r25;
	mul.ftz.f32 	%f16, %f15, %f4;
	mul.ftz.f32 	%f12, %f14, %f16;
	.loc	20	448	0
	cvt.rn.f32.u32 	%f17, %r24;
	mul.ftz.f32 	%f18, %f17, %f4;
	mul.ftz.f32 	%f11, %f14, %f18;
	.loc	20	449	0
	cvt.rn.f32.u32 	%f19, %r23;
	mul.ftz.f32 	%f20, %f19, %f4;
	mul.ftz.f32 	%f10, %f14, %f20;
$Lt_50_195074:
	.loc	22	158	0
	ld.param.s32 	%r26, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r27, %r26, %r10;
	add.s32 	%r28, %r8, %r27;
	cvt.s64.s32 	%rd5, %r28;
	ld.param.u64 	%rd6, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r29, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r30, 0;
	setp.ne.s32 	%p3, %r29, %r30;
	@%p3 bra 	$Lt_50_195842;
	.loc	19	126	0
	mul.lo.u64 	%rd7, %rd5, 8;
	add.u64 	%rd8, %rd6, %rd7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r31, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r32, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f12;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r34, %b1; }
	st.global.v4.u16 	[%rd8+0], {%r31,%r32,%r33,%r34};
	.loc	22	158	0
	bra.uni 	$Lt_50_195586;
$Lt_50_195842:
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd5, 16;
	add.u64 	%rd10, %rd6, %rd9;
	st.global.v4.f32 	[%rd10+0], {%f10,%f11,%f12,%f6};
$Lt_50_195586:
$Lt_50_194562:
	.loc	22	158	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<35>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<19>;
	.reg .pred %p<4>;
	.loc	22	159	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_51_194050;
	.loc	19	115	0
	ld.param.u64 	%rd1, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd2, %r21;
	mul.wide.s32 	%rd3, %r21, 4;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.v4.u8 	{%r22,%r23,%r24,_}, [%rd4+0];
	.loc	22	159	0
	cvt.rn.f32.u32 	%f1, %r22;
	cvt.rn.f32.u32 	%f2, %r23;
	cvt.rn.f32.u32 	%f3, %r24;
	ld.param.s32 	%r25, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r26, %r25, %r10;
	add.s32 	%r27, %r8, %r26;
	cvt.s64.s32 	%rd5, %r27;
	ld.param.u64 	%rd6, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r29, 0;
	setp.ne.s32 	%p2, %r28, %r29;
	@%p2 bra 	$Lt_51_194818;
	.loc	19	126	0
	mov.f32 	%f4, 0f3f800000;     	// 1
	mov.f32 	%f5, 0f437f0000;     	// 255
	div.approx.ftz.f32 	%f6, %f4, %f5;
	mul.lo.u64 	%rd7, %rd5, 8;
	add.u64 	%rd8, %rd6, %rd7;
	mul.ftz.f32 	%f7, %f1, %f6;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f7;
	mov.b32		%r30, %b1; }
	mul.ftz.f32 	%f8, %f2, %f6;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f8;
	mov.b32		%r31, %b1; }
	mul.ftz.f32 	%f9, %f3, %f6;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f9;
	mov.b32		%r32, %b1; }
	mov.f32 	%f10, 0f3f800000;    	// 1
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r33, %b1; }
	st.global.v4.u16 	[%rd8+0], {%r30,%r31,%r32,%r33};
	.loc	22	159	0
	bra.uni 	$Lt_51_194562;
$Lt_51_194818:
	.loc	19	126	0
	mov.f32 	%f11, 0f3f800000;    	// 1
	mov.f32 	%f12, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f13, %f11, %f12;
	mul.lo.u64 	%rd9, %rd5, 16;
	add.u64 	%rd10, %rd6, %rd9;
	mul.ftz.f32 	%f14, %f1, %f13;
	mul.ftz.f32 	%f15, %f2, %f13;
	mul.ftz.f32 	%f16, %f3, %f13;
	mov.f32 	%f17, 0f3f800000;    	// 1
	st.global.v4.f32 	[%rd10+0], {%f14,%f15,%f16,%f17};
$Lt_51_194562:
$Lt_51_194050:
	.loc	22	159	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_ARGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<20>;
	.reg .pred %p<4>;
	.loc	22	160	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_ARGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_52_194562;
	.loc	19	115	0
	ld.param.u64 	%rd1, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd2, %r21;
	mul.wide.s32 	%rd3, %r21, 4;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.v4.u8 	{%r22,%r23,%r24,%r25}, [%rd4+0];
	.loc	22	160	0
	cvt.rn.f32.u32 	%f1, %r25;
	cvt.rn.f32.u32 	%f2, %r24;
	cvt.rn.f32.u32 	%f3, %r23;
	cvt.rn.f32.u32 	%f4, %r22;
	ld.param.s32 	%r26, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r27, %r26, %r10;
	add.s32 	%r28, %r8, %r27;
	cvt.s64.s32 	%rd5, %r28;
	ld.param.u64 	%rd6, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r29, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r30, 0;
	setp.ne.s32 	%p2, %r29, %r30;
	@%p2 bra 	$Lt_52_195330;
	.loc	19	126	0
	mov.f32 	%f5, 0f3f800000;     	// 1
	mov.f32 	%f6, 0f437f0000;     	// 255
	div.approx.ftz.f32 	%f7, %f5, %f6;
	mul.lo.u64 	%rd7, %rd5, 8;
	add.u64 	%rd8, %rd6, %rd7;
	mul.ftz.f32 	%f8, %f1, %f7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f8;
	mov.b32		%r31, %b1; }
	mul.ftz.f32 	%f9, %f2, %f7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f9;
	mov.b32		%r32, %b1; }
	mul.ftz.f32 	%f10, %f3, %f7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r33, %b1; }
	mul.ftz.f32 	%f11, %f4, %f7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r34, %b1; }
	st.global.v4.u16 	[%rd8+0], {%r31,%r32,%r33,%r34};
	.loc	22	160	0
	bra.uni 	$Lt_52_195074;
$Lt_52_195330:
	.loc	19	126	0
	mov.f32 	%f12, 0f3f800000;    	// 1
	mov.f32 	%f13, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f14, %f12, %f13;
	mul.lo.u64 	%rd9, %rd5, 16;
	add.u64 	%rd10, %rd6, %rd9;
	mul.ftz.f32 	%f15, %f1, %f14;
	mul.ftz.f32 	%f16, %f2, %f14;
	mul.ftz.f32 	%f17, %f3, %f14;
	mul.ftz.f32 	%f18, %f4, %f14;
	st.global.v4.f32 	[%rd10+0], {%f15,%f16,%f17,%f18};
$Lt_52_195074:
$Lt_52_194562:
	.loc	22	160	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_ARGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_ARGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_PRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<22>;
	.reg .pred %p<5>;
	.loc	22	161	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_PRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_53_194818;
	.loc	19	115	0
	ld.param.u64 	%rd1, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd2, %r21;
	mul.wide.s32 	%rd3, %r21, 4;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.u8 	%r22, [%rd4+0];
	.loc	20	433	0
	cvt.rn.f32.u32 	%f1, %r22;
	mov.f32 	%f2, 0f3f800000;     	// 1
	mov.f32 	%f3, 0f437f0000;     	// 255
	div.approx.ftz.f32 	%f4, %f2, %f3;
	mul.ftz.f32 	%f5, %f1, %f4;
	mov.f32 	%f6, %f5;
	.loc	20	435	0
	mov.f32 	%f7, 0fb70637bd;     	// -8e-006
	add.ftz.f32 	%f8, %f5, %f7;
	mov.f32 	%f9, 0f00000000;     	// 0
	setp.le.ftz.f32 	%p2, %f8, %f9;
	@!%p2 bra 	$LDWendi__Z19MaxUnsignedBitValuei_230_7;
	mov.f32 	%f10, 0f00000000;    	// 0
	mov.f32 	%f11, 0f00000000;    	// 0
	mov.f32 	%f12, 0f00000000;    	// 0
	mov.f32 	%f6, 0f00000000;     	// 0
	bra.uni 	$Lt_53_195330;
$LDWendi__Z19MaxUnsignedBitValuei_230_7:
	.loc	20	447	0
	mov.f32 	%f13, 0f3f800000;    	// 1
	div.approx.ftz.f32 	%f14, %f13, %f5;
	ld.global.v4.u8 	{_,%r23,%r24,%r25}, [%rd4+0];
	cvt.rn.f32.u32 	%f15, %r23;
	mul.ftz.f32 	%f16, %f15, %f4;
	mul.ftz.f32 	%f12, %f14, %f16;
	.loc	20	448	0
	cvt.rn.f32.u32 	%f17, %r24;
	mul.ftz.f32 	%f18, %f17, %f4;
	mul.ftz.f32 	%f11, %f14, %f18;
	.loc	20	449	0
	cvt.rn.f32.u32 	%f19, %r25;
	mul.ftz.f32 	%f20, %f19, %f4;
	mul.ftz.f32 	%f10, %f14, %f20;
$Lt_53_195330:
	.loc	22	161	0
	ld.param.s32 	%r26, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r27, %r26, %r10;
	add.s32 	%r28, %r8, %r27;
	cvt.s64.s32 	%rd5, %r28;
	ld.param.u64 	%rd6, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r29, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r30, 0;
	setp.ne.s32 	%p3, %r29, %r30;
	@%p3 bra 	$Lt_53_196098;
	.loc	19	126	0
	mul.lo.u64 	%rd7, %rd5, 8;
	add.u64 	%rd8, %rd6, %rd7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r31, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r32, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f12;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r34, %b1; }
	st.global.v4.u16 	[%rd8+0], {%r31,%r32,%r33,%r34};
	.loc	22	161	0
	bra.uni 	$Lt_53_195842;
$Lt_53_196098:
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd5, 16;
	add.u64 	%rd10, %rd6, %rd9;
	st.global.v4.f32 	[%rd10+0], {%f10,%f11,%f12,%f6};
$Lt_53_195842:
$Lt_53_194818:
	.loc	22	161	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_PRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_PRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_XRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<35>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<19>;
	.reg .pred %p<4>;
	.loc	22	162	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_XRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_54_194306;
	.loc	19	115	0
	ld.param.u64 	%rd1, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd2, %r21;
	mul.wide.s32 	%rd3, %r21, 4;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.v4.u8 	{_,%r22,%r23,%r24}, [%rd4+0];
	.loc	22	162	0
	cvt.rn.f32.u32 	%f1, %r24;
	cvt.rn.f32.u32 	%f2, %r23;
	cvt.rn.f32.u32 	%f3, %r22;
	ld.param.s32 	%r25, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r26, %r25, %r10;
	add.s32 	%r27, %r8, %r26;
	cvt.s64.s32 	%rd5, %r27;
	ld.param.u64 	%rd6, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r29, 0;
	setp.ne.s32 	%p2, %r28, %r29;
	@%p2 bra 	$Lt_54_195074;
	.loc	19	126	0
	mov.f32 	%f4, 0f3f800000;     	// 1
	mov.f32 	%f5, 0f437f0000;     	// 255
	div.approx.ftz.f32 	%f6, %f4, %f5;
	mul.lo.u64 	%rd7, %rd5, 8;
	add.u64 	%rd8, %rd6, %rd7;
	mul.ftz.f32 	%f7, %f1, %f6;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f7;
	mov.b32		%r30, %b1; }
	mul.ftz.f32 	%f8, %f2, %f6;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f8;
	mov.b32		%r31, %b1; }
	mul.ftz.f32 	%f9, %f3, %f6;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f9;
	mov.b32		%r32, %b1; }
	mov.f32 	%f10, 0f3f800000;    	// 1
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r33, %b1; }
	st.global.v4.u16 	[%rd8+0], {%r30,%r31,%r32,%r33};
	.loc	22	162	0
	bra.uni 	$Lt_54_194818;
$Lt_54_195074:
	.loc	19	126	0
	mov.f32 	%f11, 0f3f800000;    	// 1
	mov.f32 	%f12, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f13, %f11, %f12;
	mul.lo.u64 	%rd9, %rd5, 16;
	add.u64 	%rd10, %rd6, %rd9;
	mul.ftz.f32 	%f14, %f1, %f13;
	mul.ftz.f32 	%f15, %f2, %f13;
	mul.ftz.f32 	%f16, %f3, %f13;
	mov.f32 	%f17, 0f3f800000;    	// 1
	st.global.v4.f32 	[%rd10+0], {%f14,%f15,%f16,%f17};
$Lt_54_194818:
$Lt_54_194306:
	.loc	22	162	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_XRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_XRGB_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<68>;
	.reg .pred %p<4>;
	.loc	22	163	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_55_195074;
	.loc	19	115	0
	ld.param.u64 	%rd1, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd2, %r21;
	mul.wide.s32 	%rd3, %r21, 4;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.v4.u8 	{%r22,%r23,%r24,%r25}, [%rd4+0];
	.loc	22	163	0
	cvt.rn.f32.u32 	%f1, %r23;
	cvt.rn.f32.u32 	%f2, %r24;
	cvt.rn.f32.u32 	%f3, %r22;
	cvt.rn.f32.u32 	%f4, %r25;
	ld.param.s32 	%r26, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r27, %r26, %r10;
	add.s32 	%r28, %r8, %r27;
	cvt.s64.s32 	%rd5, %r28;
	ld.global.f32 	%f5, [k601YCbCr_To_RGB32f+32];
	ld.global.f32 	%f6, [k601YCbCr_To_RGB32f+24];
	ld.global.f32 	%f7, [k601YCbCr_To_RGB32f+28];
	ld.param.u64 	%rd6, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.const.f32 	%f8, [kYCbCrOffset+8];
	ld.const.f32 	%f9, [kYCbCrOffset+0];
	ld.const.f32 	%f10, [kYCbCrOffset+4];
	ld.param.s32 	%r29, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r30, 0;
	setp.ne.s32 	%p2, %r29, %r30;
	@%p2 bra 	$Lt_55_195842;
	.loc	19	126	0
	mov.f32 	%f11, 0f437f0000;    	// 255
	mov.f32 	%f12, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f13, %f11, %f12;
	mul.ftz.f32 	%f14, %f13, %f10;
	sub.ftz.f32 	%f15, %f1, %f14;
	mul.ftz.f32 	%f16, %f13, %f9;
	sub.ftz.f32 	%f17, %f2, %f16;
	mul.ftz.f32 	%f18, %f13, %f8;
	sub.ftz.f32 	%f19, %f3, %f18;
	mul.lo.u64 	%rd7, %rd5, 8;
	add.u64 	%rd8, %rd6, %rd7;
	mul.ftz.f32 	%f20, %f15, %f7;
	fma.rn.ftz.f32 	%f21, %f6, %f17, %f20;
	fma.rn.ftz.f32 	%f22, %f5, %f19, %f21;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f22;
	mov.b32		%r31, %b1; }
	ld.global.f32 	%f23, [k601YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f24, %f23, %f15;
	ld.global.f32 	%f25, [k601YCbCr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f26, %f25, %f17, %f24;
	ld.global.f32 	%f27, [k601YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f28, %f27, %f19, %f26;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f28;
	mov.b32		%r32, %b1; }
	ld.global.f32 	%f29, [k601YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f30, %f29, %f15;
	ld.global.f32 	%f31, [k601YCbCr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f32, %f31, %f17, %f30;
	ld.global.f32 	%f33, [k601YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f34, %f33, %f19, %f32;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f34;
	mov.b32		%r33, %b1; }
	mov.f32 	%f35, 0f3f800000;    	// 1
	mov.f32 	%f36, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f37, %f35, %f36;
	mul.ftz.f32 	%f38, %f4, %f37;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f38;
	mov.b32		%r34, %b1; }
	st.global.v4.u16 	[%rd8+0], {%r31,%r32,%r33,%r34};
	.loc	22	163	0
	bra.uni 	$Lt_55_195586;
$Lt_55_195842:
	.loc	19	126	0
	mov.f32 	%f39, 0f437f0000;    	// 255
	mov.f32 	%f40, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f41, %f39, %f40;
	mul.ftz.f32 	%f42, %f41, %f10;
	sub.ftz.f32 	%f43, %f1, %f42;
	mul.ftz.f32 	%f44, %f41, %f9;
	sub.ftz.f32 	%f45, %f2, %f44;
	mul.ftz.f32 	%f46, %f41, %f8;
	sub.ftz.f32 	%f47, %f3, %f46;
	mul.lo.u64 	%rd9, %rd5, 16;
	add.u64 	%rd10, %rd6, %rd9;
	mul.ftz.f32 	%f48, %f43, %f7;
	fma.rn.ftz.f32 	%f49, %f6, %f45, %f48;
	fma.rn.ftz.f32 	%f50, %f5, %f47, %f49;
	ld.global.f32 	%f51, [k601YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f52, %f51, %f43;
	ld.global.f32 	%f53, [k601YCbCr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f54, %f53, %f45, %f52;
	ld.global.f32 	%f55, [k601YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f56, %f55, %f47, %f54;
	ld.global.f32 	%f57, [k601YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f58, %f57, %f43;
	ld.global.f32 	%f59, [k601YCbCr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f60, %f59, %f45, %f58;
	ld.global.f32 	%f61, [k601YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f62, %f61, %f47, %f60;
	mov.f32 	%f63, 0f3f800000;    	// 1
	mov.f32 	%f64, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f65, %f63, %f64;
	mul.ftz.f32 	%f66, %f4, %f65;
	st.global.v4.f32 	[%rd10+0], {%f50,%f56,%f62,%f66};
$Lt_55_195586:
$Lt_55_195074:
	.loc	22	163	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<49>;
	.reg .pred %p<5>;
	.loc	22	164	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_56_195330;
	.loc	19	115	0
	ld.param.u64 	%rd1, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd2, %r21;
	mul.wide.s32 	%rd3, %r21, 4;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.v4.u8 	{%r22,%r23,%r24,%r25}, [%rd4+0];
	.loc	20	433	0
	cvt.rn.f32.u32 	%f1, %r25;
	mov.f32 	%f2, 0f3f800000;     	// 1
	mov.f32 	%f3, 0f437f0000;     	// 255
	div.approx.ftz.f32 	%f4, %f2, %f3;
	mul.ftz.f32 	%f5, %f1, %f4;
	mov.f32 	%f6, %f5;
	.loc	20	435	0
	mov.f32 	%f7, 0fb70637bd;     	// -8e-006
	add.ftz.f32 	%f8, %f5, %f7;
	mov.f32 	%f9, 0f00000000;     	// 0
	setp.le.ftz.f32 	%p2, %f8, %f9;
	@!%p2 bra 	$LDWendi__Z19MaxUnsignedBitValuei_233_7;
	mov.f32 	%f10, 0f00000000;    	// 0
	mov.f32 	%f11, 0f00000000;    	// 0
	mov.f32 	%f12, 0f00000000;    	// 0
	mov.f32 	%f6, 0f00000000;     	// 0
	bra.uni 	$Lt_56_195842;
$LDWendi__Z19MaxUnsignedBitValuei_233_7:
	.loc	20	447	0
	cvt.rn.f32.u32 	%f13, %r23;
	mov.f32 	%f14, 0f437f0000;    	// 255
	mov.f32 	%f15, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f16, %f14, %f15;
	cvt.rn.f32.u32 	%f17, %r24;
	cvt.rn.f32.u32 	%f18, %r22;
	ld.const.f32 	%f19, [kYCbCrOffset+4];
	mul.ftz.f32 	%f20, %f16, %f19;
	sub.ftz.f32 	%f21, %f13, %f20;
	ld.const.f32 	%f22, [kYCbCrOffset+0];
	mul.ftz.f32 	%f23, %f16, %f22;
	sub.ftz.f32 	%f24, %f17, %f23;
	ld.const.f32 	%f25, [kYCbCrOffset+8];
	mul.ftz.f32 	%f26, %f16, %f25;
	sub.ftz.f32 	%f27, %f18, %f26;
	mov.f32 	%f28, 0f3f800000;    	// 1
	div.approx.ftz.f32 	%f29, %f28, %f5;
	ld.global.f32 	%f30, [k601YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f31, %f30, %f21;
	ld.global.f32 	%f32, [k601YCbCr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f33, %f32, %f24, %f31;
	ld.global.f32 	%f34, [k601YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f35, %f34, %f27, %f33;
	mul.ftz.f32 	%f12, %f29, %f35;
	.loc	20	448	0
	ld.global.f32 	%f36, [k601YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f37, %f36, %f21;
	ld.global.f32 	%f38, [k601YCbCr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f39, %f38, %f24, %f37;
	ld.global.f32 	%f40, [k601YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f41, %f40, %f27, %f39;
	mul.ftz.f32 	%f11, %f29, %f41;
	.loc	20	449	0
	ld.global.f32 	%f42, [k601YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f43, %f42, %f21;
	ld.global.f32 	%f44, [k601YCbCr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f45, %f44, %f24, %f43;
	ld.global.f32 	%f46, [k601YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f47, %f46, %f27, %f45;
	mul.ftz.f32 	%f10, %f29, %f47;
$Lt_56_195842:
	.loc	22	164	0
	ld.param.s32 	%r26, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r27, %r26, %r10;
	add.s32 	%r28, %r8, %r27;
	cvt.s64.s32 	%rd5, %r28;
	ld.param.u64 	%rd6, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r29, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r30, 0;
	setp.ne.s32 	%p3, %r29, %r30;
	@%p3 bra 	$Lt_56_196610;
	.loc	19	126	0
	mul.lo.u64 	%rd7, %rd5, 8;
	add.u64 	%rd8, %rd6, %rd7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r31, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r32, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f12;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r34, %b1; }
	st.global.v4.u16 	[%rd8+0], {%r31,%r32,%r33,%r34};
	.loc	22	164	0
	bra.uni 	$Lt_56_196354;
$Lt_56_196610:
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd5, 16;
	add.u64 	%rd10, %rd6, %rd9;
	st.global.v4.f32 	[%rd10+0], {%f10,%f11,%f12,%f6};
$Lt_56_196354:
$Lt_56_195330:
	.loc	22	164	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<35>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<61>;
	.reg .pred %p<4>;
	.loc	22	165	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_57_194818;
	.loc	19	115	0
	ld.param.u64 	%rd1, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd2, %r21;
	mul.wide.s32 	%rd3, %r21, 4;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.v4.u8 	{%r22,%r23,%r24,_}, [%rd4+0];
	.loc	22	165	0
	cvt.rn.f32.u32 	%f1, %r23;
	cvt.rn.f32.u32 	%f2, %r24;
	cvt.rn.f32.u32 	%f3, %r22;
	ld.param.s32 	%r25, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r26, %r25, %r10;
	add.s32 	%r27, %r8, %r26;
	cvt.s64.s32 	%rd5, %r27;
	ld.global.f32 	%f4, [k601YCbCr_To_RGB32f+32];
	ld.global.f32 	%f5, [k601YCbCr_To_RGB32f+24];
	ld.global.f32 	%f6, [k601YCbCr_To_RGB32f+28];
	ld.param.u64 	%rd6, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.const.f32 	%f7, [kYCbCrOffset+8];
	ld.const.f32 	%f8, [kYCbCrOffset+0];
	ld.const.f32 	%f9, [kYCbCrOffset+4];
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r29, 0;
	setp.ne.s32 	%p2, %r28, %r29;
	@%p2 bra 	$Lt_57_195586;
	.loc	19	126	0
	mov.f32 	%f10, 0f437f0000;    	// 255
	mov.f32 	%f11, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f12, %f10, %f11;
	mul.ftz.f32 	%f13, %f12, %f9;
	sub.ftz.f32 	%f14, %f1, %f13;
	mul.ftz.f32 	%f15, %f12, %f8;
	sub.ftz.f32 	%f16, %f2, %f15;
	mul.ftz.f32 	%f17, %f12, %f7;
	sub.ftz.f32 	%f18, %f3, %f17;
	mul.lo.u64 	%rd7, %rd5, 8;
	add.u64 	%rd8, %rd6, %rd7;
	mul.ftz.f32 	%f19, %f14, %f6;
	fma.rn.ftz.f32 	%f20, %f5, %f16, %f19;
	fma.rn.ftz.f32 	%f21, %f4, %f18, %f20;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f21;
	mov.b32		%r30, %b1; }
	ld.global.f32 	%f22, [k601YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f23, %f22, %f14;
	ld.global.f32 	%f24, [k601YCbCr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f25, %f24, %f16, %f23;
	ld.global.f32 	%f26, [k601YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f27, %f26, %f18, %f25;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f27;
	mov.b32		%r31, %b1; }
	ld.global.f32 	%f28, [k601YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f29, %f28, %f14;
	ld.global.f32 	%f30, [k601YCbCr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f31, %f30, %f16, %f29;
	ld.global.f32 	%f32, [k601YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f33, %f32, %f18, %f31;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f33;
	mov.b32		%r32, %b1; }
	mov.f32 	%f34, 0f3f800000;    	// 1
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f34;
	mov.b32		%r33, %b1; }
	st.global.v4.u16 	[%rd8+0], {%r30,%r31,%r32,%r33};
	.loc	22	165	0
	bra.uni 	$Lt_57_195330;
$Lt_57_195586:
	.loc	19	126	0
	mov.f32 	%f35, 0f437f0000;    	// 255
	mov.f32 	%f36, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f37, %f35, %f36;
	mul.ftz.f32 	%f38, %f37, %f9;
	sub.ftz.f32 	%f39, %f1, %f38;
	mul.ftz.f32 	%f40, %f37, %f8;
	sub.ftz.f32 	%f41, %f2, %f40;
	mul.ftz.f32 	%f42, %f37, %f7;
	sub.ftz.f32 	%f43, %f3, %f42;
	mul.lo.u64 	%rd9, %rd5, 16;
	add.u64 	%rd10, %rd6, %rd9;
	mul.ftz.f32 	%f44, %f39, %f6;
	fma.rn.ftz.f32 	%f45, %f5, %f41, %f44;
	fma.rn.ftz.f32 	%f46, %f4, %f43, %f45;
	ld.global.f32 	%f47, [k601YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f48, %f47, %f39;
	ld.global.f32 	%f49, [k601YCbCr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f50, %f49, %f41, %f48;
	ld.global.f32 	%f51, [k601YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f52, %f51, %f43, %f50;
	ld.global.f32 	%f53, [k601YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f54, %f53, %f39;
	ld.global.f32 	%f55, [k601YCbCr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f56, %f55, %f41, %f54;
	ld.global.f32 	%f57, [k601YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f58, %f57, %f43, %f56;
	mov.f32 	%f59, 0f3f800000;    	// 1
	st.global.v4.f32 	[%rd10+0], {%f46,%f52,%f58,%f59};
$Lt_57_195330:
$Lt_57_194818:
	.loc	22	165	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<68>;
	.reg .pred %p<4>;
	.loc	22	166	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_58_195330;
	.loc	19	115	0
	ld.param.u64 	%rd1, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd2, %r21;
	mul.wide.s32 	%rd3, %r21, 4;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.v4.u8 	{%r22,%r23,%r24,%r25}, [%rd4+0];
	.loc	22	166	0
	cvt.rn.f32.u32 	%f1, %r23;
	cvt.rn.f32.u32 	%f2, %r24;
	cvt.rn.f32.u32 	%f3, %r22;
	cvt.rn.f32.u32 	%f4, %r25;
	ld.param.s32 	%r26, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r27, %r26, %r10;
	add.s32 	%r28, %r8, %r27;
	cvt.s64.s32 	%rd5, %r28;
	ld.global.f32 	%f5, [k709YCbCr_To_RGB32f+32];
	ld.global.f32 	%f6, [k709YCbCr_To_RGB32f+24];
	ld.global.f32 	%f7, [k709YCbCr_To_RGB32f+28];
	ld.param.u64 	%rd6, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.const.f32 	%f8, [kYCbCrOffset+8];
	ld.const.f32 	%f9, [kYCbCrOffset+0];
	ld.const.f32 	%f10, [kYCbCrOffset+4];
	ld.param.s32 	%r29, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r30, 0;
	setp.ne.s32 	%p2, %r29, %r30;
	@%p2 bra 	$Lt_58_196098;
	.loc	19	126	0
	mov.f32 	%f11, 0f437f0000;    	// 255
	mov.f32 	%f12, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f13, %f11, %f12;
	mul.ftz.f32 	%f14, %f13, %f10;
	sub.ftz.f32 	%f15, %f1, %f14;
	mul.ftz.f32 	%f16, %f13, %f9;
	sub.ftz.f32 	%f17, %f2, %f16;
	mul.ftz.f32 	%f18, %f13, %f8;
	sub.ftz.f32 	%f19, %f3, %f18;
	mul.lo.u64 	%rd7, %rd5, 8;
	add.u64 	%rd8, %rd6, %rd7;
	mul.ftz.f32 	%f20, %f15, %f7;
	fma.rn.ftz.f32 	%f21, %f6, %f17, %f20;
	fma.rn.ftz.f32 	%f22, %f5, %f19, %f21;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f22;
	mov.b32		%r31, %b1; }
	ld.global.f32 	%f23, [k709YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f24, %f23, %f15;
	ld.global.f32 	%f25, [k709YCbCr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f26, %f25, %f17, %f24;
	ld.global.f32 	%f27, [k709YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f28, %f27, %f19, %f26;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f28;
	mov.b32		%r32, %b1; }
	ld.global.f32 	%f29, [k709YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f30, %f29, %f15;
	ld.global.f32 	%f31, [k709YCbCr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f32, %f31, %f17, %f30;
	ld.global.f32 	%f33, [k709YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f34, %f33, %f19, %f32;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f34;
	mov.b32		%r33, %b1; }
	mov.f32 	%f35, 0f3f800000;    	// 1
	mov.f32 	%f36, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f37, %f35, %f36;
	mul.ftz.f32 	%f38, %f4, %f37;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f38;
	mov.b32		%r34, %b1; }
	st.global.v4.u16 	[%rd8+0], {%r31,%r32,%r33,%r34};
	.loc	22	166	0
	bra.uni 	$Lt_58_195842;
$Lt_58_196098:
	.loc	19	126	0
	mov.f32 	%f39, 0f437f0000;    	// 255
	mov.f32 	%f40, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f41, %f39, %f40;
	mul.ftz.f32 	%f42, %f41, %f10;
	sub.ftz.f32 	%f43, %f1, %f42;
	mul.ftz.f32 	%f44, %f41, %f9;
	sub.ftz.f32 	%f45, %f2, %f44;
	mul.ftz.f32 	%f46, %f41, %f8;
	sub.ftz.f32 	%f47, %f3, %f46;
	mul.lo.u64 	%rd9, %rd5, 16;
	add.u64 	%rd10, %rd6, %rd9;
	mul.ftz.f32 	%f48, %f43, %f7;
	fma.rn.ftz.f32 	%f49, %f6, %f45, %f48;
	fma.rn.ftz.f32 	%f50, %f5, %f47, %f49;
	ld.global.f32 	%f51, [k709YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f52, %f51, %f43;
	ld.global.f32 	%f53, [k709YCbCr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f54, %f53, %f45, %f52;
	ld.global.f32 	%f55, [k709YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f56, %f55, %f47, %f54;
	ld.global.f32 	%f57, [k709YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f58, %f57, %f43;
	ld.global.f32 	%f59, [k709YCbCr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f60, %f59, %f45, %f58;
	ld.global.f32 	%f61, [k709YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f62, %f61, %f47, %f60;
	mov.f32 	%f63, 0f3f800000;    	// 1
	mov.f32 	%f64, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f65, %f63, %f64;
	mul.ftz.f32 	%f66, %f4, %f65;
	st.global.v4.f32 	[%rd10+0], {%f50,%f56,%f62,%f66};
$Lt_58_195842:
$Lt_58_195330:
	.loc	22	166	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<49>;
	.reg .pred %p<5>;
	.loc	22	167	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_59_195586;
	.loc	19	115	0
	ld.param.u64 	%rd1, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd2, %r21;
	mul.wide.s32 	%rd3, %r21, 4;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.v4.u8 	{%r22,%r23,%r24,%r25}, [%rd4+0];
	.loc	20	433	0
	cvt.rn.f32.u32 	%f1, %r25;
	mov.f32 	%f2, 0f3f800000;     	// 1
	mov.f32 	%f3, 0f437f0000;     	// 255
	div.approx.ftz.f32 	%f4, %f2, %f3;
	mul.ftz.f32 	%f5, %f1, %f4;
	mov.f32 	%f6, %f5;
	.loc	20	435	0
	mov.f32 	%f7, 0fb70637bd;     	// -8e-006
	add.ftz.f32 	%f8, %f5, %f7;
	mov.f32 	%f9, 0f00000000;     	// 0
	setp.le.ftz.f32 	%p2, %f8, %f9;
	@!%p2 bra 	$LDWendi__Z19MaxUnsignedBitValuei_236_7;
	mov.f32 	%f10, 0f00000000;    	// 0
	mov.f32 	%f11, 0f00000000;    	// 0
	mov.f32 	%f12, 0f00000000;    	// 0
	mov.f32 	%f6, 0f00000000;     	// 0
	bra.uni 	$Lt_59_196098;
$LDWendi__Z19MaxUnsignedBitValuei_236_7:
	.loc	20	447	0
	cvt.rn.f32.u32 	%f13, %r23;
	mov.f32 	%f14, 0f437f0000;    	// 255
	mov.f32 	%f15, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f16, %f14, %f15;
	cvt.rn.f32.u32 	%f17, %r24;
	cvt.rn.f32.u32 	%f18, %r22;
	ld.const.f32 	%f19, [kYCbCrOffset+4];
	mul.ftz.f32 	%f20, %f16, %f19;
	sub.ftz.f32 	%f21, %f13, %f20;
	ld.const.f32 	%f22, [kYCbCrOffset+0];
	mul.ftz.f32 	%f23, %f16, %f22;
	sub.ftz.f32 	%f24, %f17, %f23;
	ld.const.f32 	%f25, [kYCbCrOffset+8];
	mul.ftz.f32 	%f26, %f16, %f25;
	sub.ftz.f32 	%f27, %f18, %f26;
	mov.f32 	%f28, 0f3f800000;    	// 1
	div.approx.ftz.f32 	%f29, %f28, %f5;
	ld.global.f32 	%f30, [k709YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f31, %f30, %f21;
	ld.global.f32 	%f32, [k709YCbCr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f33, %f32, %f24, %f31;
	ld.global.f32 	%f34, [k709YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f35, %f34, %f27, %f33;
	mul.ftz.f32 	%f12, %f29, %f35;
	.loc	20	448	0
	ld.global.f32 	%f36, [k709YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f37, %f36, %f21;
	ld.global.f32 	%f38, [k709YCbCr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f39, %f38, %f24, %f37;
	ld.global.f32 	%f40, [k709YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f41, %f40, %f27, %f39;
	mul.ftz.f32 	%f11, %f29, %f41;
	.loc	20	449	0
	ld.global.f32 	%f42, [k709YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f43, %f42, %f21;
	ld.global.f32 	%f44, [k709YCbCr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f45, %f44, %f24, %f43;
	ld.global.f32 	%f46, [k709YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f47, %f46, %f27, %f45;
	mul.ftz.f32 	%f10, %f29, %f47;
$Lt_59_196098:
	.loc	22	167	0
	ld.param.s32 	%r26, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r27, %r26, %r10;
	add.s32 	%r28, %r8, %r27;
	cvt.s64.s32 	%rd5, %r28;
	ld.param.u64 	%rd6, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r29, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r30, 0;
	setp.ne.s32 	%p3, %r29, %r30;
	@%p3 bra 	$Lt_59_196866;
	.loc	19	126	0
	mul.lo.u64 	%rd7, %rd5, 8;
	add.u64 	%rd8, %rd6, %rd7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r31, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r32, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f12;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r34, %b1; }
	st.global.v4.u16 	[%rd8+0], {%r31,%r32,%r33,%r34};
	.loc	22	167	0
	bra.uni 	$Lt_59_196610;
$Lt_59_196866:
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd5, 16;
	add.u64 	%rd10, %rd6, %rd9;
	st.global.v4.f32 	[%rd10+0], {%f10,%f11,%f12,%f6};
$Lt_59_196610:
$Lt_59_195586:
	.loc	22	167	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_VUYP_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<35>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<61>;
	.reg .pred %p<4>;
	.loc	22	168	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_60_195074;
	.loc	19	115	0
	ld.param.u64 	%rd1, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd2, %r21;
	mul.wide.s32 	%rd3, %r21, 4;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.v4.u8 	{%r22,%r23,%r24,_}, [%rd4+0];
	.loc	22	168	0
	cvt.rn.f32.u32 	%f1, %r23;
	cvt.rn.f32.u32 	%f2, %r24;
	cvt.rn.f32.u32 	%f3, %r22;
	ld.param.s32 	%r25, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r26, %r25, %r10;
	add.s32 	%r27, %r8, %r26;
	cvt.s64.s32 	%rd5, %r27;
	ld.global.f32 	%f4, [k709YCbCr_To_RGB32f+32];
	ld.global.f32 	%f5, [k709YCbCr_To_RGB32f+24];
	ld.global.f32 	%f6, [k709YCbCr_To_RGB32f+28];
	ld.param.u64 	%rd6, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.const.f32 	%f7, [kYCbCrOffset+8];
	ld.const.f32 	%f8, [kYCbCrOffset+0];
	ld.const.f32 	%f9, [kYCbCrOffset+4];
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r29, 0;
	setp.ne.s32 	%p2, %r28, %r29;
	@%p2 bra 	$Lt_60_195842;
	.loc	19	126	0
	mov.f32 	%f10, 0f437f0000;    	// 255
	mov.f32 	%f11, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f12, %f10, %f11;
	mul.ftz.f32 	%f13, %f12, %f9;
	sub.ftz.f32 	%f14, %f1, %f13;
	mul.ftz.f32 	%f15, %f12, %f8;
	sub.ftz.f32 	%f16, %f2, %f15;
	mul.ftz.f32 	%f17, %f12, %f7;
	sub.ftz.f32 	%f18, %f3, %f17;
	mul.lo.u64 	%rd7, %rd5, 8;
	add.u64 	%rd8, %rd6, %rd7;
	mul.ftz.f32 	%f19, %f14, %f6;
	fma.rn.ftz.f32 	%f20, %f5, %f16, %f19;
	fma.rn.ftz.f32 	%f21, %f4, %f18, %f20;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f21;
	mov.b32		%r30, %b1; }
	ld.global.f32 	%f22, [k709YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f23, %f22, %f14;
	ld.global.f32 	%f24, [k709YCbCr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f25, %f24, %f16, %f23;
	ld.global.f32 	%f26, [k709YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f27, %f26, %f18, %f25;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f27;
	mov.b32		%r31, %b1; }
	ld.global.f32 	%f28, [k709YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f29, %f28, %f14;
	ld.global.f32 	%f30, [k709YCbCr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f31, %f30, %f16, %f29;
	ld.global.f32 	%f32, [k709YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f33, %f32, %f18, %f31;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f33;
	mov.b32		%r32, %b1; }
	mov.f32 	%f34, 0f3f800000;    	// 1
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f34;
	mov.b32		%r33, %b1; }
	st.global.v4.u16 	[%rd8+0], {%r30,%r31,%r32,%r33};
	.loc	22	168	0
	bra.uni 	$Lt_60_195586;
$Lt_60_195842:
	.loc	19	126	0
	mov.f32 	%f35, 0f437f0000;    	// 255
	mov.f32 	%f36, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f37, %f35, %f36;
	mul.ftz.f32 	%f38, %f37, %f9;
	sub.ftz.f32 	%f39, %f1, %f38;
	mul.ftz.f32 	%f40, %f37, %f8;
	sub.ftz.f32 	%f41, %f2, %f40;
	mul.ftz.f32 	%f42, %f37, %f7;
	sub.ftz.f32 	%f43, %f3, %f42;
	mul.lo.u64 	%rd9, %rd5, 16;
	add.u64 	%rd10, %rd6, %rd9;
	mul.ftz.f32 	%f44, %f39, %f6;
	fma.rn.ftz.f32 	%f45, %f5, %f41, %f44;
	fma.rn.ftz.f32 	%f46, %f4, %f43, %f45;
	ld.global.f32 	%f47, [k709YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f48, %f47, %f39;
	ld.global.f32 	%f49, [k709YCbCr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f50, %f49, %f41, %f48;
	ld.global.f32 	%f51, [k709YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f52, %f51, %f43, %f50;
	ld.global.f32 	%f53, [k709YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f54, %f53, %f39;
	ld.global.f32 	%f55, [k709YCbCr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f56, %f55, %f41, %f54;
	ld.global.f32 	%f57, [k709YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f58, %f57, %f43, %f56;
	mov.f32 	%f59, 0f3f800000;    	// 1
	st.global.v4.f32 	[%rd10+0], {%f46,%f52,%f58,%f59};
$Lt_60_195586:
$Lt_60_195074:
	.loc	22	168	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_VUYX_4444_8u_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<20>;
	.reg .pred %p<4>;
	.loc	22	170	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_61_194306;
	.loc	19	115	0
	ld.param.u64 	%rd1, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd2, %r21;
	mul.wide.s32 	%rd3, %r21, 8;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.v4.u16 	{%r22,%r23,%r24,%r25}, [%rd4+0];
	.loc	22	170	0
	cvt.rn.f32.u32 	%f1, %r22;
	cvt.rn.f32.u32 	%f2, %r23;
	cvt.rn.f32.u32 	%f3, %r24;
	cvt.rn.f32.u32 	%f4, %r25;
	ld.param.s32 	%r26, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r27, %r26, %r10;
	add.s32 	%r28, %r8, %r27;
	cvt.s64.s32 	%rd5, %r28;
	ld.param.u64 	%rd6, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r29, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r30, 0;
	setp.ne.s32 	%p2, %r29, %r30;
	@%p2 bra 	$Lt_61_195074;
	.loc	19	126	0
	mov.f32 	%f5, 0f3f800000;     	// 1
	mov.f32 	%f6, 0f47000000;     	// 32768
	div.approx.ftz.f32 	%f7, %f5, %f6;
	mul.lo.u64 	%rd7, %rd5, 8;
	add.u64 	%rd8, %rd6, %rd7;
	mul.ftz.f32 	%f8, %f1, %f7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f8;
	mov.b32		%r31, %b1; }
	mul.ftz.f32 	%f9, %f2, %f7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f9;
	mov.b32		%r32, %b1; }
	mul.ftz.f32 	%f10, %f3, %f7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r33, %b1; }
	mul.ftz.f32 	%f11, %f4, %f7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r34, %b1; }
	st.global.v4.u16 	[%rd8+0], {%r31,%r32,%r33,%r34};
	.loc	22	170	0
	bra.uni 	$Lt_61_194818;
$Lt_61_195074:
	.loc	19	126	0
	mov.f32 	%f12, 0f3f800000;    	// 1
	mov.f32 	%f13, 0f47000000;    	// 32768
	div.approx.ftz.f32 	%f14, %f12, %f13;
	mul.lo.u64 	%rd9, %rd5, 16;
	add.u64 	%rd10, %rd6, %rd9;
	mul.ftz.f32 	%f15, %f1, %f14;
	mul.ftz.f32 	%f16, %f2, %f14;
	mul.ftz.f32 	%f17, %f3, %f14;
	mul.ftz.f32 	%f18, %f4, %f14;
	st.global.v4.f32 	[%rd10+0], {%f15,%f16,%f17,%f18};
$Lt_61_194818:
$Lt_61_194306:
	.loc	22	170	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRP_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<22>;
	.reg .pred %p<5>;
	.loc	22	171	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRP_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_62_194562;
	.loc	19	115	0
	ld.param.u64 	%rd1, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd2, %r21;
	mul.wide.s32 	%rd3, %r21, 8;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.u16 	%r22, [%rd4+6];
	.loc	20	433	0
	cvt.rn.f32.u32 	%f1, %r22;
	mov.f32 	%f2, 0f3f800000;     	// 1
	mov.f32 	%f3, 0f47000000;     	// 32768
	div.approx.ftz.f32 	%f4, %f2, %f3;
	mul.ftz.f32 	%f5, %f1, %f4;
	mov.f32 	%f6, %f5;
	.loc	20	435	0
	mov.f32 	%f7, 0fb70637bd;     	// -8e-006
	add.ftz.f32 	%f8, %f5, %f7;
	mov.f32 	%f9, 0f00000000;     	// 0
	setp.le.ftz.f32 	%p2, %f8, %f9;
	@!%p2 bra 	$LDWendi__Z19MaxUnsignedBitValuei_239_7;
	mov.f32 	%f10, 0f00000000;    	// 0
	mov.f32 	%f11, 0f00000000;    	// 0
	mov.f32 	%f12, 0f00000000;    	// 0
	mov.f32 	%f6, 0f00000000;     	// 0
	bra.uni 	$Lt_62_195074;
$LDWendi__Z19MaxUnsignedBitValuei_239_7:
	.loc	20	447	0
	mov.f32 	%f13, 0f3f800000;    	// 1
	div.approx.ftz.f32 	%f14, %f13, %f5;
	ld.global.v4.u16 	{%r23,%r24,%r25,_}, [%rd4+0];
	cvt.rn.f32.u32 	%f15, %r25;
	mul.ftz.f32 	%f16, %f15, %f4;
	mul.ftz.f32 	%f12, %f14, %f16;
	.loc	20	448	0
	cvt.rn.f32.u32 	%f17, %r24;
	mul.ftz.f32 	%f18, %f17, %f4;
	mul.ftz.f32 	%f11, %f14, %f18;
	.loc	20	449	0
	cvt.rn.f32.u32 	%f19, %r23;
	mul.ftz.f32 	%f20, %f19, %f4;
	mul.ftz.f32 	%f10, %f14, %f20;
$Lt_62_195074:
	.loc	22	171	0
	ld.param.s32 	%r26, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r27, %r26, %r10;
	add.s32 	%r28, %r8, %r27;
	cvt.s64.s32 	%rd5, %r28;
	ld.param.u64 	%rd6, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r29, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r30, 0;
	setp.ne.s32 	%p3, %r29, %r30;
	@%p3 bra 	$Lt_62_195842;
	.loc	19	126	0
	mul.lo.u64 	%rd7, %rd5, 8;
	add.u64 	%rd8, %rd6, %rd7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r31, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r32, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f12;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r34, %b1; }
	st.global.v4.u16 	[%rd8+0], {%r31,%r32,%r33,%r34};
	.loc	22	171	0
	bra.uni 	$Lt_62_195586;
$Lt_62_195842:
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd5, 16;
	add.u64 	%rd10, %rd6, %rd9;
	st.global.v4.f32 	[%rd10+0], {%f10,%f11,%f12,%f6};
$Lt_62_195586:
$Lt_62_194562:
	.loc	22	171	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRP_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRP_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRX_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<35>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<19>;
	.reg .pred %p<4>;
	.loc	22	172	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRX_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_63_194050;
	.loc	19	115	0
	ld.param.u64 	%rd1, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd2, %r21;
	mul.wide.s32 	%rd3, %r21, 8;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.v4.u16 	{%r22,%r23,%r24,_}, [%rd4+0];
	.loc	22	172	0
	cvt.rn.f32.u32 	%f1, %r22;
	cvt.rn.f32.u32 	%f2, %r23;
	cvt.rn.f32.u32 	%f3, %r24;
	ld.param.s32 	%r25, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r26, %r25, %r10;
	add.s32 	%r27, %r8, %r26;
	cvt.s64.s32 	%rd5, %r27;
	ld.param.u64 	%rd6, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r29, 0;
	setp.ne.s32 	%p2, %r28, %r29;
	@%p2 bra 	$Lt_63_194818;
	.loc	19	126	0
	mov.f32 	%f4, 0f3f800000;     	// 1
	mov.f32 	%f5, 0f47000000;     	// 32768
	div.approx.ftz.f32 	%f6, %f4, %f5;
	mul.lo.u64 	%rd7, %rd5, 8;
	add.u64 	%rd8, %rd6, %rd7;
	mul.ftz.f32 	%f7, %f1, %f6;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f7;
	mov.b32		%r30, %b1; }
	mul.ftz.f32 	%f8, %f2, %f6;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f8;
	mov.b32		%r31, %b1; }
	mul.ftz.f32 	%f9, %f3, %f6;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f9;
	mov.b32		%r32, %b1; }
	mov.f32 	%f10, 0f3f800000;    	// 1
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r33, %b1; }
	st.global.v4.u16 	[%rd8+0], {%r30,%r31,%r32,%r33};
	.loc	22	172	0
	bra.uni 	$Lt_63_194562;
$Lt_63_194818:
	.loc	19	126	0
	mov.f32 	%f11, 0f3f800000;    	// 1
	mov.f32 	%f12, 0f47000000;    	// 32768
	div.approx.ftz.f32 	%f13, %f11, %f12;
	mul.lo.u64 	%rd9, %rd5, 16;
	add.u64 	%rd10, %rd6, %rd9;
	mul.ftz.f32 	%f14, %f1, %f13;
	mul.ftz.f32 	%f15, %f2, %f13;
	mul.ftz.f32 	%f16, %f3, %f13;
	mov.f32 	%f17, 0f3f800000;    	// 1
	st.global.v4.f32 	[%rd10+0], {%f14,%f15,%f16,%f17};
$Lt_63_194562:
$Lt_63_194050:
	.loc	22	172	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRX_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRX_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_ARGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<20>;
	.reg .pred %p<4>;
	.loc	22	173	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_ARGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_64_194562;
	.loc	19	115	0
	ld.param.u64 	%rd1, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd2, %r21;
	mul.wide.s32 	%rd3, %r21, 8;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.v4.u16 	{%r22,%r23,%r24,%r25}, [%rd4+0];
	.loc	22	173	0
	cvt.rn.f32.u32 	%f1, %r25;
	cvt.rn.f32.u32 	%f2, %r24;
	cvt.rn.f32.u32 	%f3, %r23;
	cvt.rn.f32.u32 	%f4, %r22;
	ld.param.s32 	%r26, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r27, %r26, %r10;
	add.s32 	%r28, %r8, %r27;
	cvt.s64.s32 	%rd5, %r28;
	ld.param.u64 	%rd6, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r29, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r30, 0;
	setp.ne.s32 	%p2, %r29, %r30;
	@%p2 bra 	$Lt_64_195330;
	.loc	19	126	0
	mov.f32 	%f5, 0f3f800000;     	// 1
	mov.f32 	%f6, 0f47000000;     	// 32768
	div.approx.ftz.f32 	%f7, %f5, %f6;
	mul.lo.u64 	%rd7, %rd5, 8;
	add.u64 	%rd8, %rd6, %rd7;
	mul.ftz.f32 	%f8, %f1, %f7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f8;
	mov.b32		%r31, %b1; }
	mul.ftz.f32 	%f9, %f2, %f7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f9;
	mov.b32		%r32, %b1; }
	mul.ftz.f32 	%f10, %f3, %f7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r33, %b1; }
	mul.ftz.f32 	%f11, %f4, %f7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r34, %b1; }
	st.global.v4.u16 	[%rd8+0], {%r31,%r32,%r33,%r34};
	.loc	22	173	0
	bra.uni 	$Lt_64_195074;
$Lt_64_195330:
	.loc	19	126	0
	mov.f32 	%f12, 0f3f800000;    	// 1
	mov.f32 	%f13, 0f47000000;    	// 32768
	div.approx.ftz.f32 	%f14, %f12, %f13;
	mul.lo.u64 	%rd9, %rd5, 16;
	add.u64 	%rd10, %rd6, %rd9;
	mul.ftz.f32 	%f15, %f1, %f14;
	mul.ftz.f32 	%f16, %f2, %f14;
	mul.ftz.f32 	%f17, %f3, %f14;
	mul.ftz.f32 	%f18, %f4, %f14;
	st.global.v4.f32 	[%rd10+0], {%f15,%f16,%f17,%f18};
$Lt_64_195074:
$Lt_64_194562:
	.loc	22	173	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_ARGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_ARGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_PRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<22>;
	.reg .pred %p<5>;
	.loc	22	174	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_PRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_65_194818;
	.loc	19	115	0
	ld.param.u64 	%rd1, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd2, %r21;
	mul.wide.s32 	%rd3, %r21, 8;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.u16 	%r22, [%rd4+0];
	.loc	20	433	0
	cvt.rn.f32.u32 	%f1, %r22;
	mov.f32 	%f2, 0f3f800000;     	// 1
	mov.f32 	%f3, 0f47000000;     	// 32768
	div.approx.ftz.f32 	%f4, %f2, %f3;
	mul.ftz.f32 	%f5, %f1, %f4;
	mov.f32 	%f6, %f5;
	.loc	20	435	0
	mov.f32 	%f7, 0fb70637bd;     	// -8e-006
	add.ftz.f32 	%f8, %f5, %f7;
	mov.f32 	%f9, 0f00000000;     	// 0
	setp.le.ftz.f32 	%p2, %f8, %f9;
	@!%p2 bra 	$LDWendi__Z19MaxUnsignedBitValuei_242_7;
	mov.f32 	%f10, 0f00000000;    	// 0
	mov.f32 	%f11, 0f00000000;    	// 0
	mov.f32 	%f12, 0f00000000;    	// 0
	mov.f32 	%f6, 0f00000000;     	// 0
	bra.uni 	$Lt_65_195330;
$LDWendi__Z19MaxUnsignedBitValuei_242_7:
	.loc	20	447	0
	mov.f32 	%f13, 0f3f800000;    	// 1
	div.approx.ftz.f32 	%f14, %f13, %f5;
	ld.global.v4.u16 	{_,%r23,%r24,%r25}, [%rd4+0];
	cvt.rn.f32.u32 	%f15, %r23;
	mul.ftz.f32 	%f16, %f15, %f4;
	mul.ftz.f32 	%f12, %f14, %f16;
	.loc	20	448	0
	cvt.rn.f32.u32 	%f17, %r24;
	mul.ftz.f32 	%f18, %f17, %f4;
	mul.ftz.f32 	%f11, %f14, %f18;
	.loc	20	449	0
	cvt.rn.f32.u32 	%f19, %r25;
	mul.ftz.f32 	%f20, %f19, %f4;
	mul.ftz.f32 	%f10, %f14, %f20;
$Lt_65_195330:
	.loc	22	174	0
	ld.param.s32 	%r26, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r27, %r26, %r10;
	add.s32 	%r28, %r8, %r27;
	cvt.s64.s32 	%rd5, %r28;
	ld.param.u64 	%rd6, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r29, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r30, 0;
	setp.ne.s32 	%p3, %r29, %r30;
	@%p3 bra 	$Lt_65_196098;
	.loc	19	126	0
	mul.lo.u64 	%rd7, %rd5, 8;
	add.u64 	%rd8, %rd6, %rd7;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r31, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r32, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f12;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r34, %b1; }
	st.global.v4.u16 	[%rd8+0], {%r31,%r32,%r33,%r34};
	.loc	22	174	0
	bra.uni 	$Lt_65_195842;
$Lt_65_196098:
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd5, 16;
	add.u64 	%rd10, %rd6, %rd9;
	st.global.v4.f32 	[%rd10+0], {%f10,%f11,%f12,%f6};
$Lt_65_195842:
$Lt_65_194818:
	.loc	22	174	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_PRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_PRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_XRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<35>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<19>;
	.reg .pred %p<4>;
	.loc	22	175	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_XRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_66_194306;
	.loc	19	115	0
	ld.param.u64 	%rd1, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd2, %r21;
	mul.wide.s32 	%rd3, %r21, 8;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.v4.u16 	{_,%r22,%r23,%r24}, [%rd4+0];
	.loc	22	175	0
	cvt.rn.f32.u32 	%f1, %r24;
	cvt.rn.f32.u32 	%f2, %r23;
	cvt.rn.f32.u32 	%f3, %r22;
	ld.param.s32 	%r25, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r26, %r25, %r10;
	add.s32 	%r27, %r8, %r26;
	cvt.s64.s32 	%rd5, %r27;
	ld.param.u64 	%rd6, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r29, 0;
	setp.ne.s32 	%p2, %r28, %r29;
	@%p2 bra 	$Lt_66_195074;
	.loc	19	126	0
	mov.f32 	%f4, 0f3f800000;     	// 1
	mov.f32 	%f5, 0f47000000;     	// 32768
	div.approx.ftz.f32 	%f6, %f4, %f5;
	mul.lo.u64 	%rd7, %rd5, 8;
	add.u64 	%rd8, %rd6, %rd7;
	mul.ftz.f32 	%f7, %f1, %f6;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f7;
	mov.b32		%r30, %b1; }
	mul.ftz.f32 	%f8, %f2, %f6;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f8;
	mov.b32		%r31, %b1; }
	mul.ftz.f32 	%f9, %f3, %f6;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f9;
	mov.b32		%r32, %b1; }
	mov.f32 	%f10, 0f3f800000;    	// 1
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r33, %b1; }
	st.global.v4.u16 	[%rd8+0], {%r30,%r31,%r32,%r33};
	.loc	22	175	0
	bra.uni 	$Lt_66_194818;
$Lt_66_195074:
	.loc	19	126	0
	mov.f32 	%f11, 0f3f800000;    	// 1
	mov.f32 	%f12, 0f47000000;    	// 32768
	div.approx.ftz.f32 	%f13, %f11, %f12;
	mul.lo.u64 	%rd9, %rd5, 16;
	add.u64 	%rd10, %rd6, %rd9;
	mul.ftz.f32 	%f14, %f1, %f13;
	mul.ftz.f32 	%f15, %f2, %f13;
	mul.ftz.f32 	%f16, %f3, %f13;
	mov.f32 	%f17, 0f3f800000;    	// 1
	st.global.v4.f32 	[%rd10+0], {%f14,%f15,%f16,%f17};
$Lt_66_194818:
$Lt_66_194306:
	.loc	22	175	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_XRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_XRGB_4444_15u_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<6>;
	.reg .pred %p<5>;
	.loc	22	177	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_67_194818;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_67_195586;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	177	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_67_195330;
$Lt_67_195586:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_67_195330:
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p3, %r31, %r32;
	@%p3 bra 	$Lt_67_196098;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f1;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f2;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f3;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	177	0
	bra.uni 	$Lt_67_195842;
$Lt_67_196098:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	st.global.v4.f32 	[%rd12+0], {%f1,%f2,%f3,%f4};
$Lt_67_195842:
$Lt_67_194818:
	.loc	22	177	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<15>;
	.reg .pred %p<6>;
	.loc	22	178	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_68_195074;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_68_195842;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	178	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_68_195586;
$Lt_68_195842:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_68_195586:
	.loc	20	433	0
	mov.f32 	%f5, %f4;
	.loc	20	435	0
	mov.f32 	%f6, 0fb70637bd;     	// -8e-006
	add.ftz.f32 	%f7, %f4, %f6;
	mov.f32 	%f8, 0f00000000;     	// 0
	setp.le.ftz.f32 	%p3, %f7, %f8;
	@!%p3 bra 	$LDWendi__Z19MaxUnsignedBitValuei_245_7;
	mov.f32 	%f9, 0f00000000;     	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
	mov.f32 	%f11, 0f00000000;    	// 0
	mov.f32 	%f5, 0f00000000;     	// 0
	bra.uni 	$Lt_68_196098;
$LDWendi__Z19MaxUnsignedBitValuei_245_7:
	.loc	20	447	0
	mov.f32 	%f12, 0f3f800000;    	// 1
	div.approx.ftz.f32 	%f13, %f12, %f4;
	mul.ftz.f32 	%f11, %f13, %f3;
	.loc	20	448	0
	mul.ftz.f32 	%f10, %f13, %f2;
	.loc	20	449	0
	mul.ftz.f32 	%f9, %f13, %f1;
$Lt_68_196098:
	.loc	22	178	0
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p4, %r31, %r32;
	@%p4 bra 	$Lt_68_196866;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f9;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f5;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	178	0
	bra.uni 	$Lt_68_196610;
$Lt_68_196866:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	st.global.v4.f32 	[%rd12+0], {%f9,%f10,%f11,%f5};
$Lt_68_196610:
$Lt_68_195074:
	.loc	22	178	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<37>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<7>;
	.reg .pred %p<5>;
	.loc	22	179	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_69_194562;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_69_195330;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,_}, [%rd4+0];
	.loc	22	179	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	bra.uni 	$Lt_69_195074;
$Lt_69_195330:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,_}, [%rd6+0];
$Lt_69_195074:
	ld.param.s32 	%r27, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r28, %r27, %r10;
	add.s32 	%r29, %r8, %r28;
	cvt.s64.s32 	%rd7, %r29;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r30, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r31, 0;
	setp.ne.s32 	%p3, %r30, %r31;
	@%p3 bra 	$Lt_69_195842;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f1;
	mov.b32		%r32, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f2;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f3;
	mov.b32		%r34, %b1; }
	mov.f32 	%f4, 0f3f800000;     	// 1
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r35, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r32,%r33,%r34,%r35};
	.loc	22	179	0
	bra.uni 	$Lt_69_195586;
$Lt_69_195842:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	mov.f32 	%f5, 0f3f800000;     	// 1
	st.global.v4.f32 	[%rd12+0], {%f1,%f2,%f3,%f5};
$Lt_69_195586:
$Lt_69_194562:
	.loc	22	179	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<6>;
	.reg .pred %p<5>;
	.loc	22	180	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_70_195074;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_70_195842;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	180	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_70_195586;
$Lt_70_195842:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_70_195586:
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p3, %r31, %r32;
	@%p3 bra 	$Lt_70_196354;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f3;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f2;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f1;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	180	0
	bra.uni 	$Lt_70_196098;
$Lt_70_196354:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	st.global.v4.f32 	[%rd12+0], {%f4,%f3,%f2,%f1};
$Lt_70_196098:
$Lt_70_195074:
	.loc	22	180	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<15>;
	.reg .pred %p<6>;
	.loc	22	181	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_71_195330;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_71_196098;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	181	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_71_195842;
$Lt_71_196098:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_71_195842:
	.loc	20	433	0
	mov.f32 	%f5, %f1;
	.loc	20	435	0
	mov.f32 	%f6, 0fb70637bd;     	// -8e-006
	add.ftz.f32 	%f7, %f1, %f6;
	mov.f32 	%f8, 0f00000000;     	// 0
	setp.le.ftz.f32 	%p3, %f7, %f8;
	@!%p3 bra 	$LDWendi__Z19MaxUnsignedBitValuei_248_7;
	mov.f32 	%f9, 0f00000000;     	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
	mov.f32 	%f11, 0f00000000;    	// 0
	mov.f32 	%f5, 0f00000000;     	// 0
	bra.uni 	$Lt_71_196354;
$LDWendi__Z19MaxUnsignedBitValuei_248_7:
	.loc	20	447	0
	mov.f32 	%f12, 0f3f800000;    	// 1
	div.approx.ftz.f32 	%f13, %f12, %f1;
	mul.ftz.f32 	%f11, %f13, %f2;
	.loc	20	448	0
	mul.ftz.f32 	%f10, %f13, %f3;
	.loc	20	449	0
	mul.ftz.f32 	%f9, %f13, %f4;
$Lt_71_196354:
	.loc	22	181	0
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p4, %r31, %r32;
	@%p4 bra 	$Lt_71_197122;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f9;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f5;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	181	0
	bra.uni 	$Lt_71_196866;
$Lt_71_197122:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	st.global.v4.f32 	[%rd12+0], {%f9,%f10,%f11,%f5};
$Lt_71_196866:
$Lt_71_195330:
	.loc	22	181	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<37>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<7>;
	.reg .pred %p<5>;
	.loc	22	182	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_72_194818;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_72_195586;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{_,%r24,%r25,%r26}, [%rd4+0];
	.loc	22	182	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	bra.uni 	$Lt_72_195330;
$Lt_72_195586:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{_,%f1,%f2,%f3}, [%rd6+0];
$Lt_72_195330:
	ld.param.s32 	%r27, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r28, %r27, %r10;
	add.s32 	%r29, %r8, %r28;
	cvt.s64.s32 	%rd7, %r29;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r30, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r31, 0;
	setp.ne.s32 	%p3, %r30, %r31;
	@%p3 bra 	$Lt_72_196098;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f3;
	mov.b32		%r32, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f2;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f1;
	mov.b32		%r34, %b1; }
	mov.f32 	%f4, 0f3f800000;     	// 1
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r35, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r32,%r33,%r34,%r35};
	.loc	22	182	0
	bra.uni 	$Lt_72_195842;
$Lt_72_196098:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	mov.f32 	%f5, 0f3f800000;     	// 1
	st.global.v4.f32 	[%rd12+0], {%f3,%f2,%f1,%f5};
$Lt_72_195842:
$Lt_72_194818:
	.loc	22	182	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<36>;
	.reg .pred %p<5>;
	.loc	22	183	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_73_195586;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_73_196354;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	183	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_73_196098;
$Lt_73_196354:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_73_196098:
	ld.global.f32 	%f5, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f6, %f5, %f2;
	ld.global.f32 	%f7, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f8, %f7, %f3, %f6;
	ld.global.f32 	%f9, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f10, %f9, %f1, %f8;
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p3, %r31, %r32;
	@%p3 bra 	$Lt_73_196866;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r33, %b1; }
	ld.global.f32 	%f11, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f12, %f11, %f2;
	ld.global.f32 	%f13, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f14, %f13, %f3, %f12;
	ld.global.f32 	%f15, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f16, %f15, %f1, %f14;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f16;
	mov.b32		%r34, %b1; }
	ld.global.f32 	%f17, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f18, %f17, %f2;
	ld.global.f32 	%f19, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f20, %f19, %f3, %f18;
	ld.global.f32 	%f21, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f22, %f21, %f1, %f20;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f22;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	183	0
	bra.uni 	$Lt_73_196610;
$Lt_73_196866:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.f32 	%f23, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f24, %f23, %f2;
	ld.global.f32 	%f25, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f26, %f25, %f3, %f24;
	ld.global.f32 	%f27, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f28, %f27, %f1, %f26;
	ld.global.f32 	%f29, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f30, %f29, %f2;
	ld.global.f32 	%f31, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f32, %f31, %f3, %f30;
	ld.global.f32 	%f33, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f34, %f33, %f1, %f32;
	st.global.v4.f32 	[%rd12+0], {%f10,%f28,%f34,%f4};
$Lt_73_196610:
$Lt_73_195586:
	.loc	22	183	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<33>;
	.reg .pred %p<6>;
	.loc	22	184	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_74_195842;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_74_196610;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	184	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_74_196354;
$Lt_74_196610:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_74_196354:
	.loc	20	433	0
	mov.f32 	%f5, %f4;
	.loc	20	435	0
	mov.f32 	%f6, 0fb70637bd;     	// -8e-006
	add.ftz.f32 	%f7, %f4, %f6;
	mov.f32 	%f8, 0f00000000;     	// 0
	setp.le.ftz.f32 	%p3, %f7, %f8;
	@!%p3 bra 	$LDWendi__Z19MaxUnsignedBitValuei_251_7;
	mov.f32 	%f9, 0f00000000;     	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
	mov.f32 	%f11, 0f00000000;    	// 0
	mov.f32 	%f5, 0f00000000;     	// 0
	bra.uni 	$Lt_74_196866;
$LDWendi__Z19MaxUnsignedBitValuei_251_7:
	.loc	20	447	0
	mov.f32 	%f12, 0f3f800000;    	// 1
	div.approx.ftz.f32 	%f13, %f12, %f4;
	ld.global.f32 	%f14, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f15, %f14, %f2;
	ld.global.f32 	%f16, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f17, %f16, %f3, %f15;
	ld.global.f32 	%f18, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f19, %f18, %f1, %f17;
	mul.ftz.f32 	%f11, %f13, %f19;
	.loc	20	448	0
	ld.global.f32 	%f20, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f21, %f20, %f2;
	ld.global.f32 	%f22, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f23, %f22, %f3, %f21;
	ld.global.f32 	%f24, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f25, %f24, %f1, %f23;
	mul.ftz.f32 	%f10, %f13, %f25;
	.loc	20	449	0
	ld.global.f32 	%f26, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f27, %f26, %f2;
	ld.global.f32 	%f28, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f29, %f28, %f3, %f27;
	ld.global.f32 	%f30, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f31, %f30, %f1, %f29;
	mul.ftz.f32 	%f9, %f13, %f31;
$Lt_74_196866:
	.loc	22	184	0
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p4, %r31, %r32;
	@%p4 bra 	$Lt_74_197634;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f9;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f5;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	184	0
	bra.uni 	$Lt_74_197378;
$Lt_74_197634:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	st.global.v4.f32 	[%rd12+0], {%f9,%f10,%f11,%f5};
$Lt_74_197378:
$Lt_74_195842:
	.loc	22	184	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<37>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<37>;
	.reg .pred %p<5>;
	.loc	22	185	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_75_195330;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_75_196098;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,_}, [%rd4+0];
	.loc	22	185	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	bra.uni 	$Lt_75_195842;
$Lt_75_196098:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,_}, [%rd6+0];
$Lt_75_195842:
	ld.global.f32 	%f4, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f5, %f4, %f2;
	ld.global.f32 	%f6, [k601YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f7, %f6, %f3, %f5;
	ld.global.f32 	%f8, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f9, %f8, %f1, %f7;
	ld.param.s32 	%r27, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r28, %r27, %r10;
	add.s32 	%r29, %r8, %r28;
	cvt.s64.s32 	%rd7, %r29;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r30, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r31, 0;
	setp.ne.s32 	%p3, %r30, %r31;
	@%p3 bra 	$Lt_75_196610;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f9;
	mov.b32		%r32, %b1; }
	ld.global.f32 	%f10, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f11, %f10, %f2;
	ld.global.f32 	%f12, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f13, %f12, %f3, %f11;
	ld.global.f32 	%f14, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f15, %f14, %f1, %f13;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f15;
	mov.b32		%r33, %b1; }
	ld.global.f32 	%f16, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f17, %f16, %f2;
	ld.global.f32 	%f18, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f19, %f18, %f3, %f17;
	ld.global.f32 	%f20, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f21, %f20, %f1, %f19;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f21;
	mov.b32		%r34, %b1; }
	mov.f32 	%f22, 0f3f800000;    	// 1
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f22;
	mov.b32		%r35, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r32,%r33,%r34,%r35};
	.loc	22	185	0
	bra.uni 	$Lt_75_196354;
$Lt_75_196610:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.f32 	%f23, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f24, %f23, %f2;
	ld.global.f32 	%f25, [k601YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f26, %f25, %f3, %f24;
	ld.global.f32 	%f27, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f28, %f27, %f1, %f26;
	ld.global.f32 	%f29, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f30, %f29, %f2;
	ld.global.f32 	%f31, [k601YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f32, %f31, %f3, %f30;
	ld.global.f32 	%f33, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f34, %f33, %f1, %f32;
	mov.f32 	%f35, 0f3f800000;    	// 1
	st.global.v4.f32 	[%rd12+0], {%f9,%f28,%f34,%f35};
$Lt_75_196354:
$Lt_75_195330:
	.loc	22	185	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<36>;
	.reg .pred %p<5>;
	.loc	22	186	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_76_195842;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_76_196610;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	186	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_76_196354;
$Lt_76_196610:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_76_196354:
	ld.global.f32 	%f5, [k709YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f6, %f5, %f2;
	ld.global.f32 	%f7, [k709YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f8, %f7, %f3, %f6;
	ld.global.f32 	%f9, [k709YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f10, %f9, %f1, %f8;
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p3, %r31, %r32;
	@%p3 bra 	$Lt_76_197122;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r33, %b1; }
	ld.global.f32 	%f11, [k709YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f12, %f11, %f2;
	ld.global.f32 	%f13, [k709YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f14, %f13, %f3, %f12;
	ld.global.f32 	%f15, [k709YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f16, %f15, %f1, %f14;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f16;
	mov.b32		%r34, %b1; }
	ld.global.f32 	%f17, [k709YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f18, %f17, %f2;
	ld.global.f32 	%f19, [k709YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f20, %f19, %f3, %f18;
	ld.global.f32 	%f21, [k709YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f22, %f21, %f1, %f20;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f22;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	186	0
	bra.uni 	$Lt_76_196866;
$Lt_76_197122:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.f32 	%f23, [k709YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f24, %f23, %f2;
	ld.global.f32 	%f25, [k709YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f26, %f25, %f3, %f24;
	ld.global.f32 	%f27, [k709YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f28, %f27, %f1, %f26;
	ld.global.f32 	%f29, [k709YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f30, %f29, %f2;
	ld.global.f32 	%f31, [k709YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f32, %f31, %f3, %f30;
	ld.global.f32 	%f33, [k709YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f34, %f33, %f1, %f32;
	st.global.v4.f32 	[%rd12+0], {%f10,%f28,%f34,%f4};
$Lt_76_196866:
$Lt_76_195842:
	.loc	22	186	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_VUYA_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<33>;
	.reg .pred %p<6>;
	.loc	22	187	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_77_196098;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_77_196866;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	187	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_77_196610;
$Lt_77_196866:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_77_196610:
	.loc	20	433	0
	mov.f32 	%f5, %f4;
	.loc	20	435	0
	mov.f32 	%f6, 0fb70637bd;     	// -8e-006
	add.ftz.f32 	%f7, %f4, %f6;
	mov.f32 	%f8, 0f00000000;     	// 0
	setp.le.ftz.f32 	%p3, %f7, %f8;
	@!%p3 bra 	$LDWendi__Z19MaxUnsignedBitValuei_254_7;
	mov.f32 	%f9, 0f00000000;     	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
	mov.f32 	%f11, 0f00000000;    	// 0
	mov.f32 	%f5, 0f00000000;     	// 0
	bra.uni 	$Lt_77_197122;
$LDWendi__Z19MaxUnsignedBitValuei_254_7:
	.loc	20	447	0
	mov.f32 	%f12, 0f3f800000;    	// 1
	div.approx.ftz.f32 	%f13, %f12, %f4;
	ld.global.f32 	%f14, [k709YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f15, %f14, %f2;
	ld.global.f32 	%f16, [k709YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f17, %f16, %f3, %f15;
	ld.global.f32 	%f18, [k709YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f19, %f18, %f1, %f17;
	mul.ftz.f32 	%f11, %f13, %f19;
	.loc	20	448	0
	ld.global.f32 	%f20, [k709YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f21, %f20, %f2;
	ld.global.f32 	%f22, [k709YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f23, %f22, %f3, %f21;
	ld.global.f32 	%f24, [k709YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f25, %f24, %f1, %f23;
	mul.ftz.f32 	%f10, %f13, %f25;
	.loc	20	449	0
	ld.global.f32 	%f26, [k709YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f27, %f26, %f2;
	ld.global.f32 	%f28, [k709YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f29, %f28, %f3, %f27;
	ld.global.f32 	%f30, [k709YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f31, %f30, %f1, %f29;
	mul.ftz.f32 	%f9, %f13, %f31;
$Lt_77_197122:
	.loc	22	187	0
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p4, %r31, %r32;
	@%p4 bra 	$Lt_77_197890;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f9;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f5;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	187	0
	bra.uni 	$Lt_77_197634;
$Lt_77_197890:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	st.global.v4.f32 	[%rd12+0], {%f9,%f10,%f11,%f5};
$Lt_77_197634:
$Lt_77_196098:
	.loc	22	187	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_VUYP_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<37>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<37>;
	.reg .pred %p<5>;
	.loc	22	188	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_78_195586;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_78_196354;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,_}, [%rd4+0];
	.loc	22	188	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	bra.uni 	$Lt_78_196098;
$Lt_78_196354:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,_}, [%rd6+0];
$Lt_78_196098:
	ld.global.f32 	%f4, [k709YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f5, %f4, %f2;
	ld.global.f32 	%f6, [k709YPbPr_To_RGB32f+24];
	fma.rn.ftz.f32 	%f7, %f6, %f3, %f5;
	ld.global.f32 	%f8, [k709YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f9, %f8, %f1, %f7;
	ld.param.s32 	%r27, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r28, %r27, %r10;
	add.s32 	%r29, %r8, %r28;
	cvt.s64.s32 	%rd7, %r29;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r30, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r31, 0;
	setp.ne.s32 	%p3, %r30, %r31;
	@%p3 bra 	$Lt_78_196866;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f9;
	mov.b32		%r32, %b1; }
	ld.global.f32 	%f10, [k709YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f11, %f10, %f2;
	ld.global.f32 	%f12, [k709YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f13, %f12, %f3, %f11;
	ld.global.f32 	%f14, [k709YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f15, %f14, %f1, %f13;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f15;
	mov.b32		%r33, %b1; }
	ld.global.f32 	%f16, [k709YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f17, %f16, %f2;
	ld.global.f32 	%f18, [k709YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f19, %f18, %f3, %f17;
	ld.global.f32 	%f20, [k709YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f21, %f20, %f1, %f19;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f21;
	mov.b32		%r34, %b1; }
	mov.f32 	%f22, 0f3f800000;    	// 1
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f22;
	mov.b32		%r35, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r32,%r33,%r34,%r35};
	.loc	22	188	0
	bra.uni 	$Lt_78_196610;
$Lt_78_196866:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.f32 	%f23, [k709YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f24, %f23, %f2;
	ld.global.f32 	%f25, [k709YPbPr_To_RGB32f+12];
	fma.rn.ftz.f32 	%f26, %f25, %f3, %f24;
	ld.global.f32 	%f27, [k709YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f28, %f27, %f1, %f26;
	ld.global.f32 	%f29, [k709YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f30, %f29, %f2;
	ld.global.f32 	%f31, [k709YPbPr_To_RGB32f+0];
	fma.rn.ftz.f32 	%f32, %f31, %f3, %f30;
	ld.global.f32 	%f33, [k709YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f34, %f33, %f1, %f32;
	mov.f32 	%f35, 0f3f800000;    	// 1
	st.global.v4.f32 	[%rd12+0], {%f9,%f28,%f34,%f35};
$Lt_78_196610:
$Lt_78_195586:
	.loc	22	188	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_VUYX_4444_32f_709_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<36>;
	.reg .pred %p<8>;
	.loc	22	190	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_79_194818;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_79_195586;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	190	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_79_195330;
$Lt_79_195586:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_79_195330:
	.loc	20	481	0
	mov.f32 	%f5, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p3, %f3, %f5;
	@!%p3 bra 	$Lt_79_195842;
	.loc	20	372	0
	neg.ftz.f32 	%f6, %f3;
	lg2.approx.ftz.f32 	%f7, %f6;
	mov.f32 	%f8, 0f3ee66666;     	// 0.45
	mul.ftz.f32 	%f9, %f7, %f8;
	ex2.approx.ftz.f32 	%f10, %f9;
	neg.ftz.f32 	%f11, %f10;
	bra.uni 	$LDWendi___log2f_256_71;
$Lt_79_195842:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f12, %f3;
	mov.f32 	%f13, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f11, %f14;
$LDWendi___log2f_256_71:
	.loc	20	483	0
	mov.f32 	%f15, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f2, %f15;
	@!%p4 bra 	$Lt_79_196354;
	.loc	20	372	0
	neg.ftz.f32 	%f16, %f2;
	lg2.approx.ftz.f32 	%f17, %f16;
	mov.f32 	%f18, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f20, %f19;
	neg.ftz.f32 	%f21, %f20;
	bra.uni 	$LDWendi___log2f_256_69;
$Lt_79_196354:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f22, %f2;
	mov.f32 	%f23, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f21, %f24;
$LDWendi___log2f_256_69:
	.loc	20	483	0
	mov.f32 	%f25, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p5, %f1, %f25;
	@!%p5 bra 	$Lt_79_196866;
	.loc	20	372	0
	neg.ftz.f32 	%f26, %f1;
	lg2.approx.ftz.f32 	%f27, %f26;
	mov.f32 	%f28, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f30, %f29;
	neg.ftz.f32 	%f31, %f30;
	bra.uni 	$LDWendi___log2f_256_67;
$Lt_79_196866:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f32, %f1;
	mov.f32 	%f33, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f31, %f34;
$LDWendi___log2f_256_67:
	.loc	22	190	0
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p6, %r31, %r32;
	@%p6 bra 	$Lt_79_197634;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f31;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f21;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	190	0
	bra.uni 	$Lt_79_197378;
$Lt_79_197634:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	st.global.v4.f32 	[%rd12+0], {%f31,%f21,%f11,%f4};
$Lt_79_197378:
$Lt_79_194818:
	.loc	22	190	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<45>;
	.reg .pred %p<9>;
	.loc	22	191	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_80_195074;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_80_195842;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	191	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_80_195586;
$Lt_80_195842:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_80_195586:
	.loc	20	481	0
	mov.f32 	%f5, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p3, %f3, %f5;
	@!%p3 bra 	$Lt_80_196098;
	.loc	20	372	0
	neg.ftz.f32 	%f6, %f3;
	lg2.approx.ftz.f32 	%f7, %f6;
	mov.f32 	%f8, 0f3ee66666;     	// 0.45
	mul.ftz.f32 	%f9, %f7, %f8;
	ex2.approx.ftz.f32 	%f10, %f9;
	neg.ftz.f32 	%f11, %f10;
	bra.uni 	$LDWendi___log2f_257_71;
$Lt_80_196098:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f12, %f3;
	mov.f32 	%f13, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f11, %f14;
$LDWendi___log2f_257_71:
	.loc	20	483	0
	mov.f32 	%f15, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f2, %f15;
	@!%p4 bra 	$Lt_80_196610;
	.loc	20	372	0
	neg.ftz.f32 	%f16, %f2;
	lg2.approx.ftz.f32 	%f17, %f16;
	mov.f32 	%f18, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f20, %f19;
	neg.ftz.f32 	%f21, %f20;
	bra.uni 	$LDWendi___log2f_257_69;
$Lt_80_196610:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f22, %f2;
	mov.f32 	%f23, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f21, %f24;
$LDWendi___log2f_257_69:
	.loc	20	483	0
	mov.f32 	%f25, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p5, %f1, %f25;
	@!%p5 bra 	$Lt_80_197122;
	.loc	20	372	0
	neg.ftz.f32 	%f26, %f1;
	lg2.approx.ftz.f32 	%f27, %f26;
	mov.f32 	%f28, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f30, %f29;
	neg.ftz.f32 	%f31, %f30;
	bra.uni 	$LDWendi___log2f_257_67;
$Lt_80_197122:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f32, %f1;
	mov.f32 	%f33, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f31, %f34;
$LDWendi___log2f_257_67:
	.loc	20	433	0
	mov.f32 	%f35, %f4;
	.loc	20	435	0
	mov.f32 	%f36, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f37, %f4, %f36;
	mov.f32 	%f38, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p6, %f37, %f38;
	@!%p6 bra 	$LDWendi__Z19MaxUnsignedBitValuei_257_7;
	mov.f32 	%f39, 0f00000000;    	// 0
	mov.f32 	%f40, 0f00000000;    	// 0
	mov.f32 	%f41, 0f00000000;    	// 0
	mov.f32 	%f35, 0f00000000;    	// 0
	bra.uni 	$Lt_80_197634;
$LDWendi__Z19MaxUnsignedBitValuei_257_7:
	.loc	20	447	0
	mov.f32 	%f42, 0f3f800000;    	// 1
	div.approx.ftz.f32 	%f43, %f42, %f4;
	mul.ftz.f32 	%f41, %f11, %f43;
	.loc	20	448	0
	mul.ftz.f32 	%f40, %f21, %f43;
	.loc	20	449	0
	mul.ftz.f32 	%f39, %f31, %f43;
$Lt_80_197634:
	.loc	22	191	0
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p7, %r31, %r32;
	@%p7 bra 	$Lt_80_198402;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f39;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f40;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f41;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f35;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	191	0
	bra.uni 	$Lt_80_198146;
$Lt_80_198402:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	st.global.v4.f32 	[%rd12+0], {%f39,%f40,%f41,%f35};
$Lt_80_198146:
$Lt_80_195074:
	.loc	22	191	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRP_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<37>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<37>;
	.reg .pred %p<8>;
	.loc	22	192	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_81_194562;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_81_195330;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,_}, [%rd4+0];
	.loc	22	192	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	bra.uni 	$Lt_81_195074;
$Lt_81_195330:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,_}, [%rd6+0];
$Lt_81_195074:
	.loc	20	481	0
	mov.f32 	%f4, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p3, %f3, %f4;
	@!%p3 bra 	$Lt_81_195586;
	.loc	20	372	0
	neg.ftz.f32 	%f5, %f3;
	lg2.approx.ftz.f32 	%f6, %f5;
	mov.f32 	%f7, 0f3ee66666;     	// 0.45
	mul.ftz.f32 	%f8, %f6, %f7;
	ex2.approx.ftz.f32 	%f9, %f8;
	neg.ftz.f32 	%f10, %f9;
	bra.uni 	$LDWendi___log2f_258_71;
$Lt_81_195586:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f11, %f3;
	mov.f32 	%f12, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f13, %f11, %f12;
	ex2.approx.ftz.f32 	%f10, %f13;
$LDWendi___log2f_258_71:
	.loc	20	483	0
	mov.f32 	%f14, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f2, %f14;
	@!%p4 bra 	$Lt_81_196098;
	.loc	20	372	0
	neg.ftz.f32 	%f15, %f2;
	lg2.approx.ftz.f32 	%f16, %f15;
	mov.f32 	%f17, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f18, %f16, %f17;
	ex2.approx.ftz.f32 	%f19, %f18;
	neg.ftz.f32 	%f20, %f19;
	bra.uni 	$LDWendi___log2f_258_69;
$Lt_81_196098:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f21, %f2;
	mov.f32 	%f22, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f23, %f21, %f22;
	ex2.approx.ftz.f32 	%f20, %f23;
$LDWendi___log2f_258_69:
	.loc	20	483	0
	mov.f32 	%f24, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p5, %f1, %f24;
	@!%p5 bra 	$Lt_81_196610;
	.loc	20	372	0
	neg.ftz.f32 	%f25, %f1;
	lg2.approx.ftz.f32 	%f26, %f25;
	mov.f32 	%f27, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f28, %f26, %f27;
	ex2.approx.ftz.f32 	%f29, %f28;
	neg.ftz.f32 	%f30, %f29;
	bra.uni 	$LDWendi___log2f_258_67;
$Lt_81_196610:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f31, %f1;
	mov.f32 	%f32, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f33, %f31, %f32;
	ex2.approx.ftz.f32 	%f30, %f33;
$LDWendi___log2f_258_67:
	.loc	22	192	0
	ld.param.s32 	%r27, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r28, %r27, %r10;
	add.s32 	%r29, %r8, %r28;
	cvt.s64.s32 	%rd7, %r29;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r30, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r31, 0;
	setp.ne.s32 	%p6, %r30, %r31;
	@%p6 bra 	$Lt_81_197378;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f30;
	mov.b32		%r32, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r34, %b1; }
	mov.f32 	%f34, 0f3f800000;    	// 1
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f34;
	mov.b32		%r35, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r32,%r33,%r34,%r35};
	.loc	22	192	0
	bra.uni 	$Lt_81_197122;
$Lt_81_197378:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	mov.f32 	%f35, 0f3f800000;    	// 1
	st.global.v4.f32 	[%rd12+0], {%f30,%f20,%f10,%f35};
$Lt_81_197122:
$Lt_81_194562:
	.loc	22	192	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRX_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<36>;
	.reg .pred %p<8>;
	.loc	22	193	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_82_195074;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_82_195842;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	193	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_82_195586;
$Lt_82_195842:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_82_195586:
	.loc	20	481	0
	mov.f32 	%f5, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p3, %f2, %f5;
	@!%p3 bra 	$Lt_82_196098;
	.loc	20	372	0
	neg.ftz.f32 	%f6, %f2;
	lg2.approx.ftz.f32 	%f7, %f6;
	mov.f32 	%f8, 0f3ee66666;     	// 0.45
	mul.ftz.f32 	%f9, %f7, %f8;
	ex2.approx.ftz.f32 	%f10, %f9;
	neg.ftz.f32 	%f11, %f10;
	bra.uni 	$LDWendi___log2f_259_71;
$Lt_82_196098:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f12, %f2;
	mov.f32 	%f13, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f11, %f14;
$LDWendi___log2f_259_71:
	.loc	20	483	0
	mov.f32 	%f15, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f3, %f15;
	@!%p4 bra 	$Lt_82_196610;
	.loc	20	372	0
	neg.ftz.f32 	%f16, %f3;
	lg2.approx.ftz.f32 	%f17, %f16;
	mov.f32 	%f18, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f20, %f19;
	neg.ftz.f32 	%f21, %f20;
	bra.uni 	$LDWendi___log2f_259_69;
$Lt_82_196610:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f22, %f3;
	mov.f32 	%f23, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f21, %f24;
$LDWendi___log2f_259_69:
	.loc	20	483	0
	mov.f32 	%f25, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p5, %f4, %f25;
	@!%p5 bra 	$Lt_82_197122;
	.loc	20	372	0
	neg.ftz.f32 	%f26, %f4;
	lg2.approx.ftz.f32 	%f27, %f26;
	mov.f32 	%f28, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f30, %f29;
	neg.ftz.f32 	%f31, %f30;
	bra.uni 	$LDWendi___log2f_259_67;
$Lt_82_197122:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f32, %f4;
	mov.f32 	%f33, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f31, %f34;
$LDWendi___log2f_259_67:
	.loc	22	193	0
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p6, %r31, %r32;
	@%p6 bra 	$Lt_82_197890;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f31;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f21;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f1;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	193	0
	bra.uni 	$Lt_82_197634;
$Lt_82_197890:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	st.global.v4.f32 	[%rd12+0], {%f31,%f21,%f11,%f1};
$Lt_82_197634:
$Lt_82_195074:
	.loc	22	193	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_ARGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<45>;
	.reg .pred %p<9>;
	.loc	22	194	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_83_195330;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_83_196098;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	194	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_83_195842;
$Lt_83_196098:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_83_195842:
	.loc	20	481	0
	mov.f32 	%f5, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p3, %f2, %f5;
	@!%p3 bra 	$Lt_83_196354;
	.loc	20	372	0
	neg.ftz.f32 	%f6, %f2;
	lg2.approx.ftz.f32 	%f7, %f6;
	mov.f32 	%f8, 0f3ee66666;     	// 0.45
	mul.ftz.f32 	%f9, %f7, %f8;
	ex2.approx.ftz.f32 	%f10, %f9;
	neg.ftz.f32 	%f11, %f10;
	bra.uni 	$LDWendi___log2f_260_71;
$Lt_83_196354:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f12, %f2;
	mov.f32 	%f13, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f11, %f14;
$LDWendi___log2f_260_71:
	.loc	20	483	0
	mov.f32 	%f15, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f3, %f15;
	@!%p4 bra 	$Lt_83_196866;
	.loc	20	372	0
	neg.ftz.f32 	%f16, %f3;
	lg2.approx.ftz.f32 	%f17, %f16;
	mov.f32 	%f18, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f20, %f19;
	neg.ftz.f32 	%f21, %f20;
	bra.uni 	$LDWendi___log2f_260_69;
$Lt_83_196866:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f22, %f3;
	mov.f32 	%f23, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f21, %f24;
$LDWendi___log2f_260_69:
	.loc	20	483	0
	mov.f32 	%f25, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p5, %f4, %f25;
	@!%p5 bra 	$Lt_83_197378;
	.loc	20	372	0
	neg.ftz.f32 	%f26, %f4;
	lg2.approx.ftz.f32 	%f27, %f26;
	mov.f32 	%f28, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f30, %f29;
	neg.ftz.f32 	%f31, %f30;
	bra.uni 	$LDWendi___log2f_260_67;
$Lt_83_197378:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f32, %f4;
	mov.f32 	%f33, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f31, %f34;
$LDWendi___log2f_260_67:
	.loc	20	433	0
	mov.f32 	%f35, %f1;
	.loc	20	435	0
	mov.f32 	%f36, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f37, %f1, %f36;
	mov.f32 	%f38, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p6, %f37, %f38;
	@!%p6 bra 	$LDWendi__Z19MaxUnsignedBitValuei_260_7;
	mov.f32 	%f39, 0f00000000;    	// 0
	mov.f32 	%f40, 0f00000000;    	// 0
	mov.f32 	%f41, 0f00000000;    	// 0
	mov.f32 	%f35, 0f00000000;    	// 0
	bra.uni 	$Lt_83_197890;
$LDWendi__Z19MaxUnsignedBitValuei_260_7:
	.loc	20	447	0
	mov.f32 	%f42, 0f3f800000;    	// 1
	div.approx.ftz.f32 	%f43, %f42, %f1;
	mul.ftz.f32 	%f41, %f11, %f43;
	.loc	20	448	0
	mul.ftz.f32 	%f40, %f21, %f43;
	.loc	20	449	0
	mul.ftz.f32 	%f39, %f31, %f43;
$Lt_83_197890:
	.loc	22	194	0
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p7, %r31, %r32;
	@%p7 bra 	$Lt_83_198658;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f39;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f40;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f41;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f35;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	194	0
	bra.uni 	$Lt_83_198402;
$Lt_83_198658:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	st.global.v4.f32 	[%rd12+0], {%f39,%f40,%f41,%f35};
$Lt_83_198402:
$Lt_83_195330:
	.loc	22	194	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_PRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<37>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<37>;
	.reg .pred %p<8>;
	.loc	22	195	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_84_194818;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_84_195586;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{_,%r24,%r25,%r26}, [%rd4+0];
	.loc	22	195	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	bra.uni 	$Lt_84_195330;
$Lt_84_195586:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{_,%f1,%f2,%f3}, [%rd6+0];
$Lt_84_195330:
	.loc	20	481	0
	mov.f32 	%f4, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p3, %f1, %f4;
	@!%p3 bra 	$Lt_84_195842;
	.loc	20	372	0
	neg.ftz.f32 	%f5, %f1;
	lg2.approx.ftz.f32 	%f6, %f5;
	mov.f32 	%f7, 0f3ee66666;     	// 0.45
	mul.ftz.f32 	%f8, %f6, %f7;
	ex2.approx.ftz.f32 	%f9, %f8;
	neg.ftz.f32 	%f10, %f9;
	bra.uni 	$LDWendi___log2f_261_71;
$Lt_84_195842:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f11, %f1;
	mov.f32 	%f12, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f13, %f11, %f12;
	ex2.approx.ftz.f32 	%f10, %f13;
$LDWendi___log2f_261_71:
	.loc	20	483	0
	mov.f32 	%f14, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f2, %f14;
	@!%p4 bra 	$Lt_84_196354;
	.loc	20	372	0
	neg.ftz.f32 	%f15, %f2;
	lg2.approx.ftz.f32 	%f16, %f15;
	mov.f32 	%f17, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f18, %f16, %f17;
	ex2.approx.ftz.f32 	%f19, %f18;
	neg.ftz.f32 	%f20, %f19;
	bra.uni 	$LDWendi___log2f_261_69;
$Lt_84_196354:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f21, %f2;
	mov.f32 	%f22, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f23, %f21, %f22;
	ex2.approx.ftz.f32 	%f20, %f23;
$LDWendi___log2f_261_69:
	.loc	20	483	0
	mov.f32 	%f24, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p5, %f3, %f24;
	@!%p5 bra 	$Lt_84_196866;
	.loc	20	372	0
	neg.ftz.f32 	%f25, %f3;
	lg2.approx.ftz.f32 	%f26, %f25;
	mov.f32 	%f27, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f28, %f26, %f27;
	ex2.approx.ftz.f32 	%f29, %f28;
	neg.ftz.f32 	%f30, %f29;
	bra.uni 	$LDWendi___log2f_261_67;
$Lt_84_196866:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f31, %f3;
	mov.f32 	%f32, 0f3ee66666;    	// 0.45
	mul.ftz.f32 	%f33, %f31, %f32;
	ex2.approx.ftz.f32 	%f30, %f33;
$LDWendi___log2f_261_67:
	.loc	22	195	0
	ld.param.s32 	%r27, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r28, %r27, %r10;
	add.s32 	%r29, %r8, %r28;
	cvt.s64.s32 	%rd7, %r29;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r30, [__cudaparm_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r31, 0;
	setp.ne.s32 	%p6, %r30, %r31;
	@%p6 bra 	$Lt_84_197634;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f30;
	mov.b32		%r32, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r34, %b1; }
	mov.f32 	%f34, 0f3f800000;    	// 1
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f34;
	mov.b32		%r35, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r32,%r33,%r34,%r35};
	.loc	22	195	0
	bra.uni 	$Lt_84_197378;
$Lt_84_197634:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	mov.f32 	%f35, 0f3f800000;    	// 1
	st.global.v4.f32 	[%rd12+0], {%f30,%f20,%f10,%f35};
$Lt_84_197378:
$Lt_84_194818:
	.loc	22	195	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_XRGB_4444_32f_Linear_To_IR_PixelFormat_BGRA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_8u_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<41>;
	.reg .pred %p<8>;
	.loc	22	200	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_8u_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_85_194306;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_85_195074;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	200	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_85_194818;
$Lt_85_195074:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_85_194818:
	.loc	19	126	0
	mov.f32 	%f5, 0f437f0000;     	// 255
	mov.f32 	%f6, 0f3f800000;     	// 1
	div.approx.ftz.f32 	%f7, %f5, %f6;
	mul.ftz.f32 	%f8, %f7, %f1;
	mov.f32 	%f9, 0f3f000000;     	// 0.5
	add.ftz.f32 	%f10, %f8, %f9;
	ld.param.u64 	%rd7, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inDest];
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd8, %r30;
	mul.wide.s32 	%rd9, %r30, 4;
	add.u64 	%rd10, %rd7, %rd9;
	mov.f32 	%f11, 0f00000000;    	// 0
	mov.f32 	%f12, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p3, %f10, %f12;
	selp.f32 	%f13, %f10, %f11, %p3;
	mov.f32 	%f14, 0f437f0000;    	// 255
	min.ftz.f32 	%f15, %f13, %f14;
	cvt.rzi.ftz.u32.f32 	%r31, %f15;
	mul.ftz.f32 	%f16, %f7, %f2;
	mov.f32 	%f17, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f18, %f16, %f17;
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p4, %f18, %f20;
	selp.f32 	%f21, %f18, %f19, %p4;
	mov.f32 	%f22, 0f437f0000;    	// 255
	min.ftz.f32 	%f23, %f21, %f22;
	cvt.rzi.ftz.u32.f32 	%r32, %f23;
	mul.ftz.f32 	%f24, %f7, %f3;
	mov.f32 	%f25, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f26, %f24, %f25;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p5, %f26, %f28;
	selp.f32 	%f29, %f26, %f27, %p5;
	mov.f32 	%f30, 0f437f0000;    	// 255
	min.ftz.f32 	%f31, %f29, %f30;
	cvt.rzi.ftz.u32.f32 	%r33, %f31;
	mul.ftz.f32 	%f32, %f7, %f4;
	mov.f32 	%f33, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f34, %f32, %f33;
	mov.f32 	%f35, 0f00000000;    	// 0
	mov.f32 	%f36, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p6, %f34, %f36;
	selp.f32 	%f37, %f34, %f35, %p6;
	mov.f32 	%f38, 0f437f0000;    	// 255
	min.ftz.f32 	%f39, %f37, %f38;
	cvt.rzi.ftz.u32.f32 	%r34, %f39;
	st.global.v4.u8 	[%rd10+0], {%r31,%r32,%r33,%r34};
$Lt_85_194306:
	.loc	22	200	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_8u_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_8u_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_8u_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_8u_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_8u_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_8u_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_8u_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_8u_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_8u_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_8u_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_8u_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<46>;
	.reg .pred %p<8>;
	.loc	22	201	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_8u_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_8u_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_8u_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_86_195330;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_8u_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_8u_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_8u_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_86_196098;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	201	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_86_195842;
$Lt_86_196098:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_86_195842:
	.loc	22	56	0
	mov.f32 	%f5, 0f437f0000;     	// 255
	mov.f32 	%f6, 0f3f800000;     	// 1
	div.approx.ftz.f32 	%f7, %f5, %f6;
	mul.ftz.f32 	%f8, %f7, %f1;
	mul.ftz.f32 	%f9, %f7, %f4;
	mov.f32 	%f10, 0f3b808081;    	// 0.00392157
	mul.ftz.f32 	%f11, %f9, %f10;
	mul.ftz.f32 	%f12, %f8, %f11;
	mov.f32 	%f13, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f14, %f12, %f13;
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p3, %f14, %f16;
	selp.f32 	%f17, %f14, %f15, %p3;
	mov.f32 	%f18, 0f437f0000;    	// 255
	min.ftz.f32 	%f19, %f17, %f18;
	cvt.rzi.ftz.u32.f32 	%r28, %f19;
	mul.ftz.f32 	%f20, %f7, %f2;
	mul.ftz.f32 	%f21, %f20, %f11;
	mov.f32 	%f22, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f23, %f21, %f22;
	mov.f32 	%f24, 0f00000000;    	// 0
	mov.f32 	%f25, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p4, %f23, %f25;
	selp.f32 	%f26, %f23, %f24, %p4;
	mov.f32 	%f27, 0f437f0000;    	// 255
	min.ftz.f32 	%f28, %f26, %f27;
	cvt.rzi.ftz.u32.f32 	%r29, %f28;
	mul.ftz.f32 	%f29, %f7, %f3;
	mul.ftz.f32 	%f30, %f29, %f11;
	mov.f32 	%f31, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f32, %f30, %f31;
	mov.f32 	%f33, 0f00000000;    	// 0
	mov.f32 	%f34, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p5, %f32, %f34;
	selp.f32 	%f35, %f32, %f33, %p5;
	mov.f32 	%f36, 0f437f0000;    	// 255
	min.ftz.f32 	%f37, %f35, %f36;
	cvt.rzi.ftz.u32.f32 	%r30, %f37;
	.loc	19	126	0
	ld.param.u64 	%rd7, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_8u_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_8u_Kernel_inDestPitch];
	mul.lo.s32 	%r32, %r31, %r10;
	add.s32 	%r33, %r8, %r32;
	cvt.s64.s32 	%rd8, %r33;
	mul.wide.s32 	%rd9, %r33, 4;
	add.u64 	%rd10, %rd7, %rd9;
	mov.f32 	%f38, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f39, %f9, %f38;
	mov.f32 	%f40, 0f00000000;    	// 0
	mov.f32 	%f41, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p6, %f39, %f41;
	selp.f32 	%f42, %f39, %f40, %p6;
	mov.f32 	%f43, 0f437f0000;    	// 255
	min.ftz.f32 	%f44, %f42, %f43;
	cvt.rzi.ftz.u32.f32 	%r34, %f44;
	st.global.v4.u8 	[%rd10+0], {%r28,%r29,%r30,%r34};
$Lt_86_195330:
	.loc	22	201	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_8u_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_8u_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_8u_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_8u_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_8u_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_8u_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_8u_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_8u_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_8u_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_8u_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_8u_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<40>;
	.reg .pred %p<7>;
	.loc	22	202	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_8u_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_8u_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_8u_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_87_195330;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_8u_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_8u_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_8u_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_87_196098;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	202	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_87_195842;
$Lt_87_196098:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_87_195842:
	.loc	22	56	0
	mov.f32 	%f5, 0f437f0000;     	// 255
	mov.f32 	%f6, 0f3f800000;     	// 1
	div.approx.ftz.f32 	%f7, %f5, %f6;
	mul.ftz.f32 	%f8, %f7, %f1;
	mul.ftz.f32 	%f9, %f7, %f4;
	mov.f32 	%f10, 0f3b808081;    	// 0.00392157
	mul.ftz.f32 	%f11, %f9, %f10;
	mul.ftz.f32 	%f12, %f8, %f11;
	mov.f32 	%f13, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f14, %f12, %f13;
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p3, %f14, %f16;
	selp.f32 	%f17, %f14, %f15, %p3;
	mov.f32 	%f18, 0f437f0000;    	// 255
	min.ftz.f32 	%f19, %f17, %f18;
	cvt.rzi.ftz.u32.f32 	%r28, %f19;
	mul.ftz.f32 	%f20, %f7, %f2;
	mul.ftz.f32 	%f21, %f20, %f11;
	mov.f32 	%f22, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f23, %f21, %f22;
	mov.f32 	%f24, 0f00000000;    	// 0
	mov.f32 	%f25, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p4, %f23, %f25;
	selp.f32 	%f26, %f23, %f24, %p4;
	mov.f32 	%f27, 0f437f0000;    	// 255
	min.ftz.f32 	%f28, %f26, %f27;
	cvt.rzi.ftz.u32.f32 	%r29, %f28;
	mul.ftz.f32 	%f29, %f7, %f3;
	mul.ftz.f32 	%f30, %f29, %f11;
	mov.f32 	%f31, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f32, %f30, %f31;
	mov.f32 	%f33, 0f00000000;    	// 0
	mov.f32 	%f34, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p5, %f32, %f34;
	selp.f32 	%f35, %f32, %f33, %p5;
	mov.f32 	%f36, 0f437f0000;    	// 255
	min.ftz.f32 	%f37, %f35, %f36;
	cvt.rzi.ftz.u32.f32 	%r30, %f37;
	.loc	19	126	0
	ld.param.u64 	%rd7, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_8u_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_8u_Kernel_inDestPitch];
	mul.lo.s32 	%r32, %r31, %r10;
	add.s32 	%r33, %r8, %r32;
	cvt.s64.s32 	%rd8, %r33;
	mul.wide.s32 	%rd9, %r33, 4;
	add.u64 	%rd10, %rd7, %rd9;
	mov.f32 	%f38, 0f437f0000;    	// 255
	cvt.rzi.ftz.u32.f32 	%r34, %f38;
	st.global.v4.u8 	[%rd10+0], {%r28,%r29,%r30,%r34};
$Lt_87_195330:
	.loc	22	202	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_8u_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_8u_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_8u_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_8u_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_8u_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_8u_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_8u_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_8u_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_8u_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_8u_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_8u_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<41>;
	.reg .pred %p<8>;
	.loc	22	203	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_8u_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_8u_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_8u_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_88_194562;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_8u_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_8u_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_8u_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_88_195330;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	203	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_88_195074;
$Lt_88_195330:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_88_195074:
	.loc	19	126	0
	mov.f32 	%f5, 0f437f0000;     	// 255
	mov.f32 	%f6, 0f3f800000;     	// 1
	div.approx.ftz.f32 	%f7, %f5, %f6;
	mul.ftz.f32 	%f8, %f7, %f4;
	mov.f32 	%f9, 0f3f000000;     	// 0.5
	add.ftz.f32 	%f10, %f8, %f9;
	ld.param.u64 	%rd7, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_8u_Kernel_inDest];
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_8u_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd8, %r30;
	mul.wide.s32 	%rd9, %r30, 4;
	add.u64 	%rd10, %rd7, %rd9;
	mov.f32 	%f11, 0f00000000;    	// 0
	mov.f32 	%f12, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p3, %f10, %f12;
	selp.f32 	%f13, %f10, %f11, %p3;
	mov.f32 	%f14, 0f437f0000;    	// 255
	min.ftz.f32 	%f15, %f13, %f14;
	cvt.rzi.ftz.u32.f32 	%r31, %f15;
	mul.ftz.f32 	%f16, %f7, %f3;
	mov.f32 	%f17, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f18, %f16, %f17;
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p4, %f18, %f20;
	selp.f32 	%f21, %f18, %f19, %p4;
	mov.f32 	%f22, 0f437f0000;    	// 255
	min.ftz.f32 	%f23, %f21, %f22;
	cvt.rzi.ftz.u32.f32 	%r32, %f23;
	mul.ftz.f32 	%f24, %f7, %f2;
	mov.f32 	%f25, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f26, %f24, %f25;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p5, %f26, %f28;
	selp.f32 	%f29, %f26, %f27, %p5;
	mov.f32 	%f30, 0f437f0000;    	// 255
	min.ftz.f32 	%f31, %f29, %f30;
	cvt.rzi.ftz.u32.f32 	%r33, %f31;
	mul.ftz.f32 	%f32, %f7, %f1;
	mov.f32 	%f33, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f34, %f32, %f33;
	mov.f32 	%f35, 0f00000000;    	// 0
	mov.f32 	%f36, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p6, %f34, %f36;
	selp.f32 	%f37, %f34, %f35, %p6;
	mov.f32 	%f38, 0f437f0000;    	// 255
	min.ftz.f32 	%f39, %f37, %f38;
	cvt.rzi.ftz.u32.f32 	%r34, %f39;
	st.global.v4.u8 	[%rd10+0], {%r31,%r32,%r33,%r34};
$Lt_88_194562:
	.loc	22	203	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_8u_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_8u_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_8u_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_8u_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_8u_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_8u_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_8u_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_8u_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_8u_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_8u_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_8u_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<46>;
	.reg .pred %p<8>;
	.loc	22	204	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_8u_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_8u_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_8u_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_89_195586;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_8u_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_8u_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_8u_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_89_196354;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	204	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_89_196098;
$Lt_89_196354:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_89_196098:
	.loc	22	56	0
	mov.f32 	%f5, 0f437f0000;     	// 255
	mov.f32 	%f6, 0f3f800000;     	// 1
	div.approx.ftz.f32 	%f7, %f5, %f6;
	mul.ftz.f32 	%f8, %f7, %f3;
	mul.ftz.f32 	%f9, %f7, %f4;
	mov.f32 	%f10, 0f3b808081;    	// 0.00392157
	mul.ftz.f32 	%f11, %f9, %f10;
	mul.ftz.f32 	%f12, %f8, %f11;
	mov.f32 	%f13, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f14, %f12, %f13;
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p3, %f14, %f16;
	selp.f32 	%f17, %f14, %f15, %p3;
	mov.f32 	%f18, 0f437f0000;    	// 255
	min.ftz.f32 	%f19, %f17, %f18;
	cvt.rzi.ftz.u32.f32 	%r28, %f19;
	mul.ftz.f32 	%f20, %f7, %f2;
	mul.ftz.f32 	%f21, %f20, %f11;
	mov.f32 	%f22, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f23, %f21, %f22;
	mov.f32 	%f24, 0f00000000;    	// 0
	mov.f32 	%f25, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p4, %f23, %f25;
	selp.f32 	%f26, %f23, %f24, %p4;
	mov.f32 	%f27, 0f437f0000;    	// 255
	min.ftz.f32 	%f28, %f26, %f27;
	cvt.rzi.ftz.u32.f32 	%r29, %f28;
	mul.ftz.f32 	%f29, %f7, %f1;
	mul.ftz.f32 	%f30, %f29, %f11;
	mov.f32 	%f31, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f32, %f30, %f31;
	mov.f32 	%f33, 0f00000000;    	// 0
	mov.f32 	%f34, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p5, %f32, %f34;
	selp.f32 	%f35, %f32, %f33, %p5;
	mov.f32 	%f36, 0f437f0000;    	// 255
	min.ftz.f32 	%f37, %f35, %f36;
	cvt.rzi.ftz.u32.f32 	%r30, %f37;
	.loc	19	126	0
	mov.f32 	%f38, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f39, %f9, %f38;
	ld.param.u64 	%rd7, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_8u_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_8u_Kernel_inDestPitch];
	mul.lo.s32 	%r32, %r31, %r10;
	add.s32 	%r33, %r8, %r32;
	cvt.s64.s32 	%rd8, %r33;
	mul.wide.s32 	%rd9, %r33, 4;
	add.u64 	%rd10, %rd7, %rd9;
	mov.f32 	%f40, 0f00000000;    	// 0
	mov.f32 	%f41, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p6, %f39, %f41;
	selp.f32 	%f42, %f39, %f40, %p6;
	mov.f32 	%f43, 0f437f0000;    	// 255
	min.ftz.f32 	%f44, %f42, %f43;
	cvt.rzi.ftz.u32.f32 	%r34, %f44;
	st.global.v4.u8 	[%rd10+0], {%r34,%r28,%r29,%r30};
$Lt_89_195586:
	.loc	22	204	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_8u_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_8u_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_8u_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_8u_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_8u_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_8u_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_8u_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_8u_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_8u_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_8u_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_8u_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<40>;
	.reg .pred %p<7>;
	.loc	22	205	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_8u_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_8u_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_8u_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_90_195586;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_8u_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_8u_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_8u_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_90_196354;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	205	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_90_196098;
$Lt_90_196354:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_90_196098:
	.loc	22	56	0
	mov.f32 	%f5, 0f437f0000;     	// 255
	mov.f32 	%f6, 0f3f800000;     	// 1
	div.approx.ftz.f32 	%f7, %f5, %f6;
	mul.ftz.f32 	%f8, %f7, %f3;
	mul.ftz.f32 	%f9, %f7, %f4;
	mov.f32 	%f10, 0f3b808081;    	// 0.00392157
	mul.ftz.f32 	%f11, %f9, %f10;
	mul.ftz.f32 	%f12, %f8, %f11;
	mov.f32 	%f13, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f14, %f12, %f13;
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p3, %f14, %f16;
	selp.f32 	%f17, %f14, %f15, %p3;
	mov.f32 	%f18, 0f437f0000;    	// 255
	min.ftz.f32 	%f19, %f17, %f18;
	cvt.rzi.ftz.u32.f32 	%r28, %f19;
	mul.ftz.f32 	%f20, %f7, %f2;
	mul.ftz.f32 	%f21, %f20, %f11;
	mov.f32 	%f22, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f23, %f21, %f22;
	mov.f32 	%f24, 0f00000000;    	// 0
	mov.f32 	%f25, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p4, %f23, %f25;
	selp.f32 	%f26, %f23, %f24, %p4;
	mov.f32 	%f27, 0f437f0000;    	// 255
	min.ftz.f32 	%f28, %f26, %f27;
	cvt.rzi.ftz.u32.f32 	%r29, %f28;
	mul.ftz.f32 	%f29, %f7, %f1;
	mul.ftz.f32 	%f30, %f29, %f11;
	mov.f32 	%f31, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f32, %f30, %f31;
	mov.f32 	%f33, 0f00000000;    	// 0
	mov.f32 	%f34, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p5, %f32, %f34;
	selp.f32 	%f35, %f32, %f33, %p5;
	mov.f32 	%f36, 0f437f0000;    	// 255
	min.ftz.f32 	%f37, %f35, %f36;
	cvt.rzi.ftz.u32.f32 	%r30, %f37;
	.loc	19	126	0
	ld.param.u64 	%rd7, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_8u_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_8u_Kernel_inDestPitch];
	mul.lo.s32 	%r32, %r31, %r10;
	add.s32 	%r33, %r8, %r32;
	cvt.s64.s32 	%rd8, %r33;
	mul.wide.s32 	%rd9, %r33, 4;
	add.u64 	%rd10, %rd7, %rd9;
	mov.f32 	%f38, 0f437f0000;    	// 255
	cvt.rzi.ftz.u32.f32 	%r34, %f38;
	st.global.v4.u8 	[%rd10+0], {%r34,%r28,%r29,%r30};
$Lt_90_195586:
	.loc	22	205	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_8u_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_8u_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<65>;
	.reg .pred %p<8>;
	.loc	22	206	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_91_195074;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_91_195842;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	206	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_91_195586;
$Lt_91_195842:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_91_195586:
	.loc	22	56	0
	ld.global.f32 	%f5, [kRGB32f_To_601YCbCr+16];
	mul.ftz.f32 	%f6, %f5, %f2;
	mov.f32 	%f7, 0f437f0000;     	// 255
	mov.f32 	%f8, 0f437f0000;     	// 255
	div.approx.ftz.f32 	%f9, %f7, %f8;
	ld.global.f32 	%f10, [kRGB32f_To_601YCbCr+12];
	fma.rn.ftz.f32 	%f11, %f10, %f3, %f6;
	ld.global.f32 	%f12, [kRGB32f_To_601YCbCr+20];
	fma.rn.ftz.f32 	%f13, %f12, %f1, %f11;
	ld.const.f32 	%f14, [kYCbCrOffset+4];
	fma.rn.ftz.f32 	%f15, %f9, %f14, %f13;
	mov.f32 	%f16, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f17, %f15, %f16;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p3, %f17, %f19;
	selp.f32 	%f20, %f17, %f18, %p3;
	mov.f32 	%f21, 0f437f0000;    	// 255
	min.ftz.f32 	%f22, %f20, %f21;
	cvt.rzi.ftz.u32.f32 	%r28, %f22;
	ld.global.f32 	%f23, [kRGB32f_To_601YCbCr+4];
	mul.ftz.f32 	%f24, %f23, %f2;
	ld.global.f32 	%f25, [kRGB32f_To_601YCbCr+0];
	fma.rn.ftz.f32 	%f26, %f25, %f3, %f24;
	ld.global.f32 	%f27, [kRGB32f_To_601YCbCr+8];
	fma.rn.ftz.f32 	%f28, %f27, %f1, %f26;
	ld.const.f32 	%f29, [kYCbCrOffset+0];
	fma.rn.ftz.f32 	%f30, %f9, %f29, %f28;
	mov.f32 	%f31, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f32, %f30, %f31;
	mov.f32 	%f33, 0f00000000;    	// 0
	mov.f32 	%f34, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p4, %f32, %f34;
	selp.f32 	%f35, %f32, %f33, %p4;
	mov.f32 	%f36, 0f437f0000;    	// 255
	min.ftz.f32 	%f37, %f35, %f36;
	cvt.rzi.ftz.u32.f32 	%r29, %f37;
	.loc	19	126	0
	ld.global.f32 	%f38, [kRGB32f_To_601YCbCr+28];
	mul.ftz.f32 	%f39, %f38, %f2;
	ld.global.f32 	%f40, [kRGB32f_To_601YCbCr+24];
	fma.rn.ftz.f32 	%f41, %f40, %f3, %f39;
	ld.global.f32 	%f42, [kRGB32f_To_601YCbCr+32];
	fma.rn.ftz.f32 	%f43, %f42, %f1, %f41;
	ld.const.f32 	%f44, [kYCbCrOffset+8];
	fma.rn.ftz.f32 	%f45, %f9, %f44, %f43;
	mov.f32 	%f46, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f47, %f45, %f46;
	ld.param.u64 	%rd7, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_Kernel_inDest];
	ld.param.s32 	%r30, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_Kernel_inDestPitch];
	mul.lo.s32 	%r31, %r30, %r10;
	add.s32 	%r32, %r8, %r31;
	cvt.s64.s32 	%rd8, %r32;
	mul.wide.s32 	%rd9, %r32, 4;
	add.u64 	%rd10, %rd7, %rd9;
	mov.f32 	%f48, 0f00000000;    	// 0
	mov.f32 	%f49, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p5, %f47, %f49;
	selp.f32 	%f50, %f47, %f48, %p5;
	mov.f32 	%f51, 0f437f0000;    	// 255
	min.ftz.f32 	%f52, %f50, %f51;
	cvt.rzi.ftz.u32.f32 	%r33, %f52;
	mov.f32 	%f53, 0f437f0000;    	// 255
	mov.f32 	%f54, 0f3f800000;    	// 1
	div.approx.ftz.f32 	%f55, %f53, %f54;
	mul.ftz.f32 	%f56, %f55, %f4;
	mov.f32 	%f57, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f58, %f56, %f57;
	mov.f32 	%f59, 0f00000000;    	// 0
	mov.f32 	%f60, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p6, %f58, %f60;
	selp.f32 	%f61, %f58, %f59, %p6;
	mov.f32 	%f62, 0f437f0000;    	// 255
	min.ftz.f32 	%f63, %f61, %f62;
	cvt.rzi.ftz.u32.f32 	%r34, %f63;
	st.global.v4.u8 	[%rd10+0], {%r33,%r28,%r29,%r34};
$Lt_91_195074:
	.loc	22	206	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<79>;
	.reg .pred %p<8>;
	.loc	22	207	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_92_196610;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_92_197378;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	207	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_92_197122;
$Lt_92_197378:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_92_197122:
	.loc	21	345	0
	mov.f32 	%f5, 0f437f0000;     	// 255
	mov.f32 	%f6, 0f3f800000;     	// 1
	div.approx.ftz.f32 	%f7, %f5, %f6;
	ld.global.f32 	%f8, [kRGB32f_To_601YCbCr+16];
	mul.ftz.f32 	%f9, %f8, %f2;
	mov.f32 	%f10, 0f437f0000;    	// 255
	mov.f32 	%f11, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f12, %f10, %f11;
	mul.ftz.f32 	%f13, %f7, %f4;
	ld.global.f32 	%f14, [kRGB32f_To_601YCbCr+12];
	fma.rn.ftz.f32 	%f15, %f14, %f3, %f9;
	mov.f32 	%f16, 0f3b808081;    	// 0.00392157
	mul.ftz.f32 	%f17, %f13, %f16;
	ld.global.f32 	%f18, [kRGB32f_To_601YCbCr+20];
	fma.rn.ftz.f32 	%f19, %f18, %f1, %f15;
	ld.const.f32 	%f20, [kYCbCrOffset+4];
	fma.rn.ftz.f32 	%f21, %f12, %f20, %f19;
	mul.ftz.f32 	%f22, %f12, %f20;
	sub.ftz.f32 	%f23, %f21, %f22;
	mul.ftz.f32 	%f24, %f17, %f23;
	fma.rn.ftz.f32 	%f25, %f12, %f20, %f24;
	mov.f32 	%f26, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f27, %f25, %f26;
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p3, %f27, %f29;
	selp.f32 	%f30, %f27, %f28, %p3;
	mov.f32 	%f31, 0f437f0000;    	// 255
	min.ftz.f32 	%f32, %f30, %f31;
	.loc	21	346	0
	ld.global.f32 	%f33, [kRGB32f_To_601YCbCr+4];
	mul.ftz.f32 	%f34, %f33, %f2;
	ld.global.f32 	%f35, [kRGB32f_To_601YCbCr+0];
	fma.rn.ftz.f32 	%f36, %f35, %f3, %f34;
	ld.global.f32 	%f37, [kRGB32f_To_601YCbCr+8];
	fma.rn.ftz.f32 	%f38, %f37, %f1, %f36;
	ld.const.f32 	%f39, [kYCbCrOffset+0];
	fma.rn.ftz.f32 	%f40, %f12, %f39, %f38;
	mul.ftz.f32 	%f41, %f12, %f39;
	sub.ftz.f32 	%f42, %f40, %f41;
	mul.ftz.f32 	%f43, %f17, %f42;
	fma.rn.ftz.f32 	%f44, %f12, %f39, %f43;
	mov.f32 	%f45, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f46, %f44, %f45;
	mov.f32 	%f47, 0f00000000;    	// 0
	mov.f32 	%f48, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p4, %f46, %f48;
	selp.f32 	%f49, %f46, %f47, %p4;
	mov.f32 	%f50, 0f437f0000;    	// 255
	min.ftz.f32 	%f51, %f49, %f50;
	.loc	19	126	0
	ld.global.f32 	%f52, [kRGB32f_To_601YCbCr+28];
	mul.ftz.f32 	%f53, %f52, %f2;
	ld.global.f32 	%f54, [kRGB32f_To_601YCbCr+24];
	fma.rn.ftz.f32 	%f55, %f54, %f3, %f53;
	ld.global.f32 	%f56, [kRGB32f_To_601YCbCr+32];
	fma.rn.ftz.f32 	%f57, %f56, %f1, %f55;
	ld.const.f32 	%f58, [kYCbCrOffset+8];
	fma.rn.ftz.f32 	%f59, %f12, %f58, %f57;
	mul.ftz.f32 	%f60, %f12, %f58;
	sub.ftz.f32 	%f61, %f59, %f60;
	mul.ftz.f32 	%f62, %f17, %f61;
	ld.param.u64 	%rd7, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_Kernel_inDest];
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd8, %r30;
	mul.wide.s32 	%rd9, %r30, 4;
	add.u64 	%rd10, %rd7, %rd9;
	fma.rn.ftz.f32 	%f63, %f12, %f58, %f62;
	mov.f32 	%f64, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f65, %f63, %f64;
	mov.f32 	%f66, 0f00000000;    	// 0
	mov.f32 	%f67, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p5, %f65, %f67;
	selp.f32 	%f68, %f65, %f66, %p5;
	mov.f32 	%f69, 0f437f0000;    	// 255
	min.ftz.f32 	%f70, %f68, %f69;
	cvt.rzi.ftz.u32.f32 	%r31, %f70;
	cvt.rzi.ftz.u32.f32 	%r32, %f32;
	cvt.rzi.ftz.u32.f32 	%r33, %f51;
	mov.f32 	%f71, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f72, %f13, %f71;
	mov.f32 	%f73, 0f00000000;    	// 0
	mov.f32 	%f74, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p6, %f72, %f74;
	selp.f32 	%f75, %f72, %f73, %p6;
	mov.f32 	%f76, 0f437f0000;    	// 255
	min.ftz.f32 	%f77, %f75, %f76;
	cvt.rzi.ftz.u32.f32 	%r34, %f77;
	st.global.v4.u8 	[%rd10+0], {%r31,%r32,%r33,%r34};
$Lt_92_196610:
	.loc	22	207	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<73>;
	.reg .pred %p<7>;
	.loc	22	208	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_93_196610;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_93_197378;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	208	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_93_197122;
$Lt_93_197378:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_93_197122:
	.loc	21	345	0
	mov.f32 	%f5, 0f437f0000;     	// 255
	mov.f32 	%f6, 0f3f800000;     	// 1
	div.approx.ftz.f32 	%f7, %f5, %f6;
	ld.global.f32 	%f8, [kRGB32f_To_601YCbCr+16];
	mul.ftz.f32 	%f9, %f8, %f2;
	mov.f32 	%f10, 0f437f0000;    	// 255
	mov.f32 	%f11, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f12, %f10, %f11;
	mul.ftz.f32 	%f13, %f7, %f4;
	ld.global.f32 	%f14, [kRGB32f_To_601YCbCr+12];
	fma.rn.ftz.f32 	%f15, %f14, %f3, %f9;
	mov.f32 	%f16, 0f3b808081;    	// 0.00392157
	mul.ftz.f32 	%f17, %f13, %f16;
	ld.global.f32 	%f18, [kRGB32f_To_601YCbCr+20];
	fma.rn.ftz.f32 	%f19, %f18, %f1, %f15;
	ld.const.f32 	%f20, [kYCbCrOffset+4];
	fma.rn.ftz.f32 	%f21, %f12, %f20, %f19;
	mul.ftz.f32 	%f22, %f12, %f20;
	sub.ftz.f32 	%f23, %f21, %f22;
	mul.ftz.f32 	%f24, %f17, %f23;
	fma.rn.ftz.f32 	%f25, %f12, %f20, %f24;
	mov.f32 	%f26, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f27, %f25, %f26;
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p3, %f27, %f29;
	selp.f32 	%f30, %f27, %f28, %p3;
	mov.f32 	%f31, 0f437f0000;    	// 255
	min.ftz.f32 	%f32, %f30, %f31;
	.loc	21	346	0
	ld.global.f32 	%f33, [kRGB32f_To_601YCbCr+4];
	mul.ftz.f32 	%f34, %f33, %f2;
	ld.global.f32 	%f35, [kRGB32f_To_601YCbCr+0];
	fma.rn.ftz.f32 	%f36, %f35, %f3, %f34;
	ld.global.f32 	%f37, [kRGB32f_To_601YCbCr+8];
	fma.rn.ftz.f32 	%f38, %f37, %f1, %f36;
	ld.const.f32 	%f39, [kYCbCrOffset+0];
	fma.rn.ftz.f32 	%f40, %f12, %f39, %f38;
	mul.ftz.f32 	%f41, %f12, %f39;
	sub.ftz.f32 	%f42, %f40, %f41;
	mul.ftz.f32 	%f43, %f17, %f42;
	fma.rn.ftz.f32 	%f44, %f12, %f39, %f43;
	mov.f32 	%f45, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f46, %f44, %f45;
	mov.f32 	%f47, 0f00000000;    	// 0
	mov.f32 	%f48, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p4, %f46, %f48;
	selp.f32 	%f49, %f46, %f47, %p4;
	mov.f32 	%f50, 0f437f0000;    	// 255
	min.ftz.f32 	%f51, %f49, %f50;
	.loc	19	126	0
	ld.global.f32 	%f52, [kRGB32f_To_601YCbCr+28];
	mul.ftz.f32 	%f53, %f52, %f2;
	ld.global.f32 	%f54, [kRGB32f_To_601YCbCr+24];
	fma.rn.ftz.f32 	%f55, %f54, %f3, %f53;
	ld.global.f32 	%f56, [kRGB32f_To_601YCbCr+32];
	fma.rn.ftz.f32 	%f57, %f56, %f1, %f55;
	ld.const.f32 	%f58, [kYCbCrOffset+8];
	fma.rn.ftz.f32 	%f59, %f12, %f58, %f57;
	mul.ftz.f32 	%f60, %f12, %f58;
	sub.ftz.f32 	%f61, %f59, %f60;
	mul.ftz.f32 	%f62, %f17, %f61;
	ld.param.u64 	%rd7, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_Kernel_inDest];
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd8, %r30;
	mul.wide.s32 	%rd9, %r30, 4;
	add.u64 	%rd10, %rd7, %rd9;
	fma.rn.ftz.f32 	%f63, %f12, %f58, %f62;
	mov.f32 	%f64, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f65, %f63, %f64;
	mov.f32 	%f66, 0f00000000;    	// 0
	mov.f32 	%f67, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p5, %f65, %f67;
	selp.f32 	%f68, %f65, %f66, %p5;
	mov.f32 	%f69, 0f437f0000;    	// 255
	min.ftz.f32 	%f70, %f68, %f69;
	cvt.rzi.ftz.u32.f32 	%r31, %f70;
	cvt.rzi.ftz.u32.f32 	%r32, %f32;
	cvt.rzi.ftz.u32.f32 	%r33, %f51;
	mov.f32 	%f71, 0f437f0000;    	// 255
	cvt.rzi.ftz.u32.f32 	%r34, %f71;
	st.global.v4.u8 	[%rd10+0], {%r31,%r32,%r33,%r34};
$Lt_93_196610:
	.loc	22	208	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_709_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_709_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_709_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_709_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_709_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_709_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_709_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_709_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_709_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<65>;
	.reg .pred %p<8>;
	.loc	22	209	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_709_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_709_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_709_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_94_195330;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_709_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_709_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_709_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_94_196098;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	209	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_94_195842;
$Lt_94_196098:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_94_195842:
	.loc	22	56	0
	ld.global.f32 	%f5, [kRGB32f_To_709YCbCr+16];
	mul.ftz.f32 	%f6, %f5, %f2;
	mov.f32 	%f7, 0f437f0000;     	// 255
	mov.f32 	%f8, 0f437f0000;     	// 255
	div.approx.ftz.f32 	%f9, %f7, %f8;
	ld.global.f32 	%f10, [kRGB32f_To_709YCbCr+12];
	fma.rn.ftz.f32 	%f11, %f10, %f3, %f6;
	ld.global.f32 	%f12, [kRGB32f_To_709YCbCr+20];
	fma.rn.ftz.f32 	%f13, %f12, %f1, %f11;
	ld.const.f32 	%f14, [kYCbCrOffset+4];
	fma.rn.ftz.f32 	%f15, %f9, %f14, %f13;
	mov.f32 	%f16, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f17, %f15, %f16;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p3, %f17, %f19;
	selp.f32 	%f20, %f17, %f18, %p3;
	mov.f32 	%f21, 0f437f0000;    	// 255
	min.ftz.f32 	%f22, %f20, %f21;
	cvt.rzi.ftz.u32.f32 	%r28, %f22;
	ld.global.f32 	%f23, [kRGB32f_To_709YCbCr+4];
	mul.ftz.f32 	%f24, %f23, %f2;
	ld.global.f32 	%f25, [kRGB32f_To_709YCbCr+0];
	fma.rn.ftz.f32 	%f26, %f25, %f3, %f24;
	ld.global.f32 	%f27, [kRGB32f_To_709YCbCr+8];
	fma.rn.ftz.f32 	%f28, %f27, %f1, %f26;
	ld.const.f32 	%f29, [kYCbCrOffset+0];
	fma.rn.ftz.f32 	%f30, %f9, %f29, %f28;
	mov.f32 	%f31, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f32, %f30, %f31;
	mov.f32 	%f33, 0f00000000;    	// 0
	mov.f32 	%f34, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p4, %f32, %f34;
	selp.f32 	%f35, %f32, %f33, %p4;
	mov.f32 	%f36, 0f437f0000;    	// 255
	min.ftz.f32 	%f37, %f35, %f36;
	cvt.rzi.ftz.u32.f32 	%r29, %f37;
	.loc	19	126	0
	ld.global.f32 	%f38, [kRGB32f_To_709YCbCr+28];
	mul.ftz.f32 	%f39, %f38, %f2;
	ld.global.f32 	%f40, [kRGB32f_To_709YCbCr+24];
	fma.rn.ftz.f32 	%f41, %f40, %f3, %f39;
	ld.global.f32 	%f42, [kRGB32f_To_709YCbCr+32];
	fma.rn.ftz.f32 	%f43, %f42, %f1, %f41;
	ld.const.f32 	%f44, [kYCbCrOffset+8];
	fma.rn.ftz.f32 	%f45, %f9, %f44, %f43;
	mov.f32 	%f46, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f47, %f45, %f46;
	ld.param.u64 	%rd7, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_709_Kernel_inDest];
	ld.param.s32 	%r30, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_709_Kernel_inDestPitch];
	mul.lo.s32 	%r31, %r30, %r10;
	add.s32 	%r32, %r8, %r31;
	cvt.s64.s32 	%rd8, %r32;
	mul.wide.s32 	%rd9, %r32, 4;
	add.u64 	%rd10, %rd7, %rd9;
	mov.f32 	%f48, 0f00000000;    	// 0
	mov.f32 	%f49, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p5, %f47, %f49;
	selp.f32 	%f50, %f47, %f48, %p5;
	mov.f32 	%f51, 0f437f0000;    	// 255
	min.ftz.f32 	%f52, %f50, %f51;
	cvt.rzi.ftz.u32.f32 	%r33, %f52;
	mov.f32 	%f53, 0f437f0000;    	// 255
	mov.f32 	%f54, 0f3f800000;    	// 1
	div.approx.ftz.f32 	%f55, %f53, %f54;
	mul.ftz.f32 	%f56, %f55, %f4;
	mov.f32 	%f57, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f58, %f56, %f57;
	mov.f32 	%f59, 0f00000000;    	// 0
	mov.f32 	%f60, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p6, %f58, %f60;
	selp.f32 	%f61, %f58, %f59, %p6;
	mov.f32 	%f62, 0f437f0000;    	// 255
	min.ftz.f32 	%f63, %f61, %f62;
	cvt.rzi.ftz.u32.f32 	%r34, %f63;
	st.global.v4.u8 	[%rd10+0], {%r33,%r28,%r29,%r34};
$Lt_94_195330:
	.loc	22	209	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_709_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_8u_709_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_709_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_709_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_709_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_709_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_709_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_709_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_709_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_709_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_709_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<79>;
	.reg .pred %p<8>;
	.loc	22	210	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_709_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_709_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_709_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_95_196866;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_709_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_709_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_709_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_95_197634;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	210	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_95_197378;
$Lt_95_197634:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_95_197378:
	.loc	21	345	0
	mov.f32 	%f5, 0f437f0000;     	// 255
	mov.f32 	%f6, 0f3f800000;     	// 1
	div.approx.ftz.f32 	%f7, %f5, %f6;
	ld.global.f32 	%f8, [kRGB32f_To_709YCbCr+16];
	mul.ftz.f32 	%f9, %f8, %f2;
	mov.f32 	%f10, 0f437f0000;    	// 255
	mov.f32 	%f11, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f12, %f10, %f11;
	mul.ftz.f32 	%f13, %f7, %f4;
	ld.global.f32 	%f14, [kRGB32f_To_709YCbCr+12];
	fma.rn.ftz.f32 	%f15, %f14, %f3, %f9;
	mov.f32 	%f16, 0f3b808081;    	// 0.00392157
	mul.ftz.f32 	%f17, %f13, %f16;
	ld.global.f32 	%f18, [kRGB32f_To_709YCbCr+20];
	fma.rn.ftz.f32 	%f19, %f18, %f1, %f15;
	ld.const.f32 	%f20, [kYCbCrOffset+4];
	fma.rn.ftz.f32 	%f21, %f12, %f20, %f19;
	mul.ftz.f32 	%f22, %f12, %f20;
	sub.ftz.f32 	%f23, %f21, %f22;
	mul.ftz.f32 	%f24, %f17, %f23;
	fma.rn.ftz.f32 	%f25, %f12, %f20, %f24;
	mov.f32 	%f26, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f27, %f25, %f26;
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p3, %f27, %f29;
	selp.f32 	%f30, %f27, %f28, %p3;
	mov.f32 	%f31, 0f437f0000;    	// 255
	min.ftz.f32 	%f32, %f30, %f31;
	.loc	21	346	0
	ld.global.f32 	%f33, [kRGB32f_To_709YCbCr+4];
	mul.ftz.f32 	%f34, %f33, %f2;
	ld.global.f32 	%f35, [kRGB32f_To_709YCbCr+0];
	fma.rn.ftz.f32 	%f36, %f35, %f3, %f34;
	ld.global.f32 	%f37, [kRGB32f_To_709YCbCr+8];
	fma.rn.ftz.f32 	%f38, %f37, %f1, %f36;
	ld.const.f32 	%f39, [kYCbCrOffset+0];
	fma.rn.ftz.f32 	%f40, %f12, %f39, %f38;
	mul.ftz.f32 	%f41, %f12, %f39;
	sub.ftz.f32 	%f42, %f40, %f41;
	mul.ftz.f32 	%f43, %f17, %f42;
	fma.rn.ftz.f32 	%f44, %f12, %f39, %f43;
	mov.f32 	%f45, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f46, %f44, %f45;
	mov.f32 	%f47, 0f00000000;    	// 0
	mov.f32 	%f48, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p4, %f46, %f48;
	selp.f32 	%f49, %f46, %f47, %p4;
	mov.f32 	%f50, 0f437f0000;    	// 255
	min.ftz.f32 	%f51, %f49, %f50;
	.loc	19	126	0
	ld.global.f32 	%f52, [kRGB32f_To_709YCbCr+28];
	mul.ftz.f32 	%f53, %f52, %f2;
	ld.global.f32 	%f54, [kRGB32f_To_709YCbCr+24];
	fma.rn.ftz.f32 	%f55, %f54, %f3, %f53;
	ld.global.f32 	%f56, [kRGB32f_To_709YCbCr+32];
	fma.rn.ftz.f32 	%f57, %f56, %f1, %f55;
	ld.const.f32 	%f58, [kYCbCrOffset+8];
	fma.rn.ftz.f32 	%f59, %f12, %f58, %f57;
	mul.ftz.f32 	%f60, %f12, %f58;
	sub.ftz.f32 	%f61, %f59, %f60;
	mul.ftz.f32 	%f62, %f17, %f61;
	ld.param.u64 	%rd7, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_709_Kernel_inDest];
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_709_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd8, %r30;
	mul.wide.s32 	%rd9, %r30, 4;
	add.u64 	%rd10, %rd7, %rd9;
	fma.rn.ftz.f32 	%f63, %f12, %f58, %f62;
	mov.f32 	%f64, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f65, %f63, %f64;
	mov.f32 	%f66, 0f00000000;    	// 0
	mov.f32 	%f67, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p5, %f65, %f67;
	selp.f32 	%f68, %f65, %f66, %p5;
	mov.f32 	%f69, 0f437f0000;    	// 255
	min.ftz.f32 	%f70, %f68, %f69;
	cvt.rzi.ftz.u32.f32 	%r31, %f70;
	cvt.rzi.ftz.u32.f32 	%r32, %f32;
	cvt.rzi.ftz.u32.f32 	%r33, %f51;
	mov.f32 	%f71, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f72, %f13, %f71;
	mov.f32 	%f73, 0f00000000;    	// 0
	mov.f32 	%f74, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p6, %f72, %f74;
	selp.f32 	%f75, %f72, %f73, %p6;
	mov.f32 	%f76, 0f437f0000;    	// 255
	min.ftz.f32 	%f77, %f75, %f76;
	cvt.rzi.ftz.u32.f32 	%r34, %f77;
	st.global.v4.u8 	[%rd10+0], {%r31,%r32,%r33,%r34};
$Lt_95_196866:
	.loc	22	210	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_709_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_8u_709_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_709_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_709_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_709_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_709_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_709_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_709_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_709_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_709_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_709_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<73>;
	.reg .pred %p<7>;
	.loc	22	211	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_709_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_709_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_709_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_96_196866;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_709_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_709_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_709_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_96_197634;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	211	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_96_197378;
$Lt_96_197634:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_96_197378:
	.loc	21	345	0
	mov.f32 	%f5, 0f437f0000;     	// 255
	mov.f32 	%f6, 0f3f800000;     	// 1
	div.approx.ftz.f32 	%f7, %f5, %f6;
	ld.global.f32 	%f8, [kRGB32f_To_709YCbCr+16];
	mul.ftz.f32 	%f9, %f8, %f2;
	mov.f32 	%f10, 0f437f0000;    	// 255
	mov.f32 	%f11, 0f437f0000;    	// 255
	div.approx.ftz.f32 	%f12, %f10, %f11;
	mul.ftz.f32 	%f13, %f7, %f4;
	ld.global.f32 	%f14, [kRGB32f_To_709YCbCr+12];
	fma.rn.ftz.f32 	%f15, %f14, %f3, %f9;
	mov.f32 	%f16, 0f3b808081;    	// 0.00392157
	mul.ftz.f32 	%f17, %f13, %f16;
	ld.global.f32 	%f18, [kRGB32f_To_709YCbCr+20];
	fma.rn.ftz.f32 	%f19, %f18, %f1, %f15;
	ld.const.f32 	%f20, [kYCbCrOffset+4];
	fma.rn.ftz.f32 	%f21, %f12, %f20, %f19;
	mul.ftz.f32 	%f22, %f12, %f20;
	sub.ftz.f32 	%f23, %f21, %f22;
	mul.ftz.f32 	%f24, %f17, %f23;
	fma.rn.ftz.f32 	%f25, %f12, %f20, %f24;
	mov.f32 	%f26, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f27, %f25, %f26;
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p3, %f27, %f29;
	selp.f32 	%f30, %f27, %f28, %p3;
	mov.f32 	%f31, 0f437f0000;    	// 255
	min.ftz.f32 	%f32, %f30, %f31;
	.loc	21	346	0
	ld.global.f32 	%f33, [kRGB32f_To_709YCbCr+4];
	mul.ftz.f32 	%f34, %f33, %f2;
	ld.global.f32 	%f35, [kRGB32f_To_709YCbCr+0];
	fma.rn.ftz.f32 	%f36, %f35, %f3, %f34;
	ld.global.f32 	%f37, [kRGB32f_To_709YCbCr+8];
	fma.rn.ftz.f32 	%f38, %f37, %f1, %f36;
	ld.const.f32 	%f39, [kYCbCrOffset+0];
	fma.rn.ftz.f32 	%f40, %f12, %f39, %f38;
	mul.ftz.f32 	%f41, %f12, %f39;
	sub.ftz.f32 	%f42, %f40, %f41;
	mul.ftz.f32 	%f43, %f17, %f42;
	fma.rn.ftz.f32 	%f44, %f12, %f39, %f43;
	mov.f32 	%f45, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f46, %f44, %f45;
	mov.f32 	%f47, 0f00000000;    	// 0
	mov.f32 	%f48, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p4, %f46, %f48;
	selp.f32 	%f49, %f46, %f47, %p4;
	mov.f32 	%f50, 0f437f0000;    	// 255
	min.ftz.f32 	%f51, %f49, %f50;
	.loc	19	126	0
	ld.global.f32 	%f52, [kRGB32f_To_709YCbCr+28];
	mul.ftz.f32 	%f53, %f52, %f2;
	ld.global.f32 	%f54, [kRGB32f_To_709YCbCr+24];
	fma.rn.ftz.f32 	%f55, %f54, %f3, %f53;
	ld.global.f32 	%f56, [kRGB32f_To_709YCbCr+32];
	fma.rn.ftz.f32 	%f57, %f56, %f1, %f55;
	ld.const.f32 	%f58, [kYCbCrOffset+8];
	fma.rn.ftz.f32 	%f59, %f12, %f58, %f57;
	mul.ftz.f32 	%f60, %f12, %f58;
	sub.ftz.f32 	%f61, %f59, %f60;
	mul.ftz.f32 	%f62, %f17, %f61;
	ld.param.u64 	%rd7, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_709_Kernel_inDest];
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_709_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd8, %r30;
	mul.wide.s32 	%rd9, %r30, 4;
	add.u64 	%rd10, %rd7, %rd9;
	fma.rn.ftz.f32 	%f63, %f12, %f58, %f62;
	mov.f32 	%f64, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f65, %f63, %f64;
	mov.f32 	%f66, 0f00000000;    	// 0
	mov.f32 	%f67, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p5, %f65, %f67;
	selp.f32 	%f68, %f65, %f66, %p5;
	mov.f32 	%f69, 0f437f0000;    	// 255
	min.ftz.f32 	%f70, %f68, %f69;
	cvt.rzi.ftz.u32.f32 	%r31, %f70;
	cvt.rzi.ftz.u32.f32 	%r32, %f32;
	cvt.rzi.ftz.u32.f32 	%r33, %f51;
	mov.f32 	%f71, 0f437f0000;    	// 255
	cvt.rzi.ftz.u32.f32 	%r34, %f71;
	st.global.v4.u8 	[%rd10+0], {%r31,%r32,%r33,%r34};
$Lt_96_196866:
	.loc	22	211	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_709_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_8u_709_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_15u_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_15u_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_15u_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_15u_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_15u_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_15u_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_15u_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_15u_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_15u_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<41>;
	.reg .pred %p<8>;
	.loc	22	213	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_15u_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_15u_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_15u_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_97_194306;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_15u_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_15u_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_15u_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_97_195074;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	213	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_97_194818;
$Lt_97_195074:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_97_194818:
	.loc	19	126	0
	mov.f32 	%f5, 0f47000000;     	// 32768
	mov.f32 	%f6, 0f3f800000;     	// 1
	div.approx.ftz.f32 	%f7, %f5, %f6;
	mul.ftz.f32 	%f8, %f7, %f1;
	mov.f32 	%f9, 0f3f000000;     	// 0.5
	add.ftz.f32 	%f10, %f8, %f9;
	ld.param.u64 	%rd7, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_15u_Kernel_inDest];
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_15u_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd8, %r30;
	mul.wide.s32 	%rd9, %r30, 8;
	add.u64 	%rd10, %rd7, %rd9;
	mov.f32 	%f11, 0f00000000;    	// 0
	mov.f32 	%f12, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p3, %f10, %f12;
	selp.f32 	%f13, %f10, %f11, %p3;
	mov.f32 	%f14, 0f47000000;    	// 32768
	min.ftz.f32 	%f15, %f13, %f14;
	cvt.rzi.ftz.u32.f32 	%r31, %f15;
	mul.ftz.f32 	%f16, %f7, %f2;
	mov.f32 	%f17, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f18, %f16, %f17;
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p4, %f18, %f20;
	selp.f32 	%f21, %f18, %f19, %p4;
	mov.f32 	%f22, 0f47000000;    	// 32768
	min.ftz.f32 	%f23, %f21, %f22;
	cvt.rzi.ftz.u32.f32 	%r32, %f23;
	mul.ftz.f32 	%f24, %f7, %f3;
	mov.f32 	%f25, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f26, %f24, %f25;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p5, %f26, %f28;
	selp.f32 	%f29, %f26, %f27, %p5;
	mov.f32 	%f30, 0f47000000;    	// 32768
	min.ftz.f32 	%f31, %f29, %f30;
	cvt.rzi.ftz.u32.f32 	%r33, %f31;
	mul.ftz.f32 	%f32, %f7, %f4;
	mov.f32 	%f33, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f34, %f32, %f33;
	mov.f32 	%f35, 0f00000000;    	// 0
	mov.f32 	%f36, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p6, %f34, %f36;
	selp.f32 	%f37, %f34, %f35, %p6;
	mov.f32 	%f38, 0f47000000;    	// 32768
	min.ftz.f32 	%f39, %f37, %f38;
	cvt.rzi.ftz.u32.f32 	%r34, %f39;
	st.global.v4.u16 	[%rd10+0], {%r31,%r32,%r33,%r34};
$Lt_97_194306:
	.loc	22	213	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_15u_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_15u_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_15u_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_15u_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_15u_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_15u_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_15u_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_15u_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_15u_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_15u_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_15u_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<46>;
	.reg .pred %p<8>;
	.loc	22	214	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_15u_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_15u_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_15u_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_98_195330;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_15u_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_15u_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_15u_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_98_196098;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	214	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_98_195842;
$Lt_98_196098:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_98_195842:
	.loc	22	64	0
	mov.f32 	%f5, 0f47000000;     	// 32768
	mov.f32 	%f6, 0f3f800000;     	// 1
	div.approx.ftz.f32 	%f7, %f5, %f6;
	mul.ftz.f32 	%f8, %f7, %f1;
	mul.ftz.f32 	%f9, %f7, %f4;
	mov.f32 	%f10, 0f38000000;    	// 3.05176e-005
	mul.ftz.f32 	%f11, %f9, %f10;
	mul.ftz.f32 	%f12, %f8, %f11;
	mov.f32 	%f13, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f14, %f12, %f13;
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p3, %f14, %f16;
	selp.f32 	%f17, %f14, %f15, %p3;
	mov.f32 	%f18, 0f47000000;    	// 32768
	min.ftz.f32 	%f19, %f17, %f18;
	cvt.rzi.ftz.u32.f32 	%r28, %f19;
	mul.ftz.f32 	%f20, %f7, %f2;
	mul.ftz.f32 	%f21, %f20, %f11;
	mov.f32 	%f22, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f23, %f21, %f22;
	mov.f32 	%f24, 0f00000000;    	// 0
	mov.f32 	%f25, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p4, %f23, %f25;
	selp.f32 	%f26, %f23, %f24, %p4;
	mov.f32 	%f27, 0f47000000;    	// 32768
	min.ftz.f32 	%f28, %f26, %f27;
	cvt.rzi.ftz.u32.f32 	%r29, %f28;
	mul.ftz.f32 	%f29, %f7, %f3;
	mul.ftz.f32 	%f30, %f29, %f11;
	mov.f32 	%f31, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f32, %f30, %f31;
	mov.f32 	%f33, 0f00000000;    	// 0
	mov.f32 	%f34, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p5, %f32, %f34;
	selp.f32 	%f35, %f32, %f33, %p5;
	mov.f32 	%f36, 0f47000000;    	// 32768
	min.ftz.f32 	%f37, %f35, %f36;
	cvt.rzi.ftz.u32.f32 	%r30, %f37;
	.loc	19	126	0
	ld.param.u64 	%rd7, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_15u_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_15u_Kernel_inDestPitch];
	mul.lo.s32 	%r32, %r31, %r10;
	add.s32 	%r33, %r8, %r32;
	cvt.s64.s32 	%rd8, %r33;
	mul.wide.s32 	%rd9, %r33, 8;
	add.u64 	%rd10, %rd7, %rd9;
	mov.f32 	%f38, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f39, %f9, %f38;
	mov.f32 	%f40, 0f00000000;    	// 0
	mov.f32 	%f41, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p6, %f39, %f41;
	selp.f32 	%f42, %f39, %f40, %p6;
	mov.f32 	%f43, 0f47000000;    	// 32768
	min.ftz.f32 	%f44, %f42, %f43;
	cvt.rzi.ftz.u32.f32 	%r34, %f44;
	st.global.v4.u16 	[%rd10+0], {%r28,%r29,%r30,%r34};
$Lt_98_195330:
	.loc	22	214	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_15u_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_15u_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_15u_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_15u_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_15u_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_15u_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_15u_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_15u_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_15u_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_15u_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_15u_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<40>;
	.reg .pred %p<7>;
	.loc	22	215	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_15u_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_15u_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_15u_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_99_195330;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_15u_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_15u_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_15u_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_99_196098;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	215	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_99_195842;
$Lt_99_196098:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_99_195842:
	.loc	22	64	0
	mov.f32 	%f5, 0f47000000;     	// 32768
	mov.f32 	%f6, 0f3f800000;     	// 1
	div.approx.ftz.f32 	%f7, %f5, %f6;
	mul.ftz.f32 	%f8, %f7, %f1;
	mul.ftz.f32 	%f9, %f7, %f4;
	mov.f32 	%f10, 0f38000000;    	// 3.05176e-005
	mul.ftz.f32 	%f11, %f9, %f10;
	mul.ftz.f32 	%f12, %f8, %f11;
	mov.f32 	%f13, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f14, %f12, %f13;
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p3, %f14, %f16;
	selp.f32 	%f17, %f14, %f15, %p3;
	mov.f32 	%f18, 0f47000000;    	// 32768
	min.ftz.f32 	%f19, %f17, %f18;
	cvt.rzi.ftz.u32.f32 	%r28, %f19;
	mul.ftz.f32 	%f20, %f7, %f2;
	mul.ftz.f32 	%f21, %f20, %f11;
	mov.f32 	%f22, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f23, %f21, %f22;
	mov.f32 	%f24, 0f00000000;    	// 0
	mov.f32 	%f25, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p4, %f23, %f25;
	selp.f32 	%f26, %f23, %f24, %p4;
	mov.f32 	%f27, 0f47000000;    	// 32768
	min.ftz.f32 	%f28, %f26, %f27;
	cvt.rzi.ftz.u32.f32 	%r29, %f28;
	mul.ftz.f32 	%f29, %f7, %f3;
	mul.ftz.f32 	%f30, %f29, %f11;
	mov.f32 	%f31, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f32, %f30, %f31;
	mov.f32 	%f33, 0f00000000;    	// 0
	mov.f32 	%f34, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p5, %f32, %f34;
	selp.f32 	%f35, %f32, %f33, %p5;
	mov.f32 	%f36, 0f47000000;    	// 32768
	min.ftz.f32 	%f37, %f35, %f36;
	cvt.rzi.ftz.u32.f32 	%r30, %f37;
	.loc	19	126	0
	ld.param.u64 	%rd7, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_15u_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_15u_Kernel_inDestPitch];
	mul.lo.s32 	%r32, %r31, %r10;
	add.s32 	%r33, %r8, %r32;
	cvt.s64.s32 	%rd8, %r33;
	mul.wide.s32 	%rd9, %r33, 8;
	add.u64 	%rd10, %rd7, %rd9;
	mov.f32 	%f38, 0f47000000;    	// 32768
	cvt.rzi.ftz.u32.f32 	%r34, %f38;
	st.global.v4.u16 	[%rd10+0], {%r28,%r29,%r30,%r34};
$Lt_99_195330:
	.loc	22	215	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_15u_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_15u_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_15u_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_15u_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_15u_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_15u_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_15u_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_15u_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_15u_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_15u_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_15u_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<41>;
	.reg .pred %p<8>;
	.loc	22	216	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_15u_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_15u_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_15u_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_100_194562;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_15u_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_15u_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_15u_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_100_195330;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	216	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_100_195074;
$Lt_100_195330:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_100_195074:
	.loc	19	126	0
	mov.f32 	%f5, 0f47000000;     	// 32768
	mov.f32 	%f6, 0f3f800000;     	// 1
	div.approx.ftz.f32 	%f7, %f5, %f6;
	mul.ftz.f32 	%f8, %f7, %f4;
	mov.f32 	%f9, 0f3f000000;     	// 0.5
	add.ftz.f32 	%f10, %f8, %f9;
	ld.param.u64 	%rd7, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_15u_Kernel_inDest];
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_15u_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd8, %r30;
	mul.wide.s32 	%rd9, %r30, 8;
	add.u64 	%rd10, %rd7, %rd9;
	mov.f32 	%f11, 0f00000000;    	// 0
	mov.f32 	%f12, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p3, %f10, %f12;
	selp.f32 	%f13, %f10, %f11, %p3;
	mov.f32 	%f14, 0f47000000;    	// 32768
	min.ftz.f32 	%f15, %f13, %f14;
	cvt.rzi.ftz.u32.f32 	%r31, %f15;
	mul.ftz.f32 	%f16, %f7, %f3;
	mov.f32 	%f17, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f18, %f16, %f17;
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p4, %f18, %f20;
	selp.f32 	%f21, %f18, %f19, %p4;
	mov.f32 	%f22, 0f47000000;    	// 32768
	min.ftz.f32 	%f23, %f21, %f22;
	cvt.rzi.ftz.u32.f32 	%r32, %f23;
	mul.ftz.f32 	%f24, %f7, %f2;
	mov.f32 	%f25, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f26, %f24, %f25;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p5, %f26, %f28;
	selp.f32 	%f29, %f26, %f27, %p5;
	mov.f32 	%f30, 0f47000000;    	// 32768
	min.ftz.f32 	%f31, %f29, %f30;
	cvt.rzi.ftz.u32.f32 	%r33, %f31;
	mul.ftz.f32 	%f32, %f7, %f1;
	mov.f32 	%f33, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f34, %f32, %f33;
	mov.f32 	%f35, 0f00000000;    	// 0
	mov.f32 	%f36, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p6, %f34, %f36;
	selp.f32 	%f37, %f34, %f35, %p6;
	mov.f32 	%f38, 0f47000000;    	// 32768
	min.ftz.f32 	%f39, %f37, %f38;
	cvt.rzi.ftz.u32.f32 	%r34, %f39;
	st.global.v4.u16 	[%rd10+0], {%r31,%r32,%r33,%r34};
$Lt_100_194562:
	.loc	22	216	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_15u_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_15u_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_15u_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_15u_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_15u_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_15u_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_15u_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_15u_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_15u_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_15u_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_15u_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<46>;
	.reg .pred %p<8>;
	.loc	22	217	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_15u_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_15u_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_15u_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_101_195586;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_15u_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_15u_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_15u_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_101_196354;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	217	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_101_196098;
$Lt_101_196354:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_101_196098:
	.loc	22	64	0
	mov.f32 	%f5, 0f47000000;     	// 32768
	mov.f32 	%f6, 0f3f800000;     	// 1
	div.approx.ftz.f32 	%f7, %f5, %f6;
	mul.ftz.f32 	%f8, %f7, %f3;
	mul.ftz.f32 	%f9, %f7, %f4;
	mov.f32 	%f10, 0f38000000;    	// 3.05176e-005
	mul.ftz.f32 	%f11, %f9, %f10;
	mul.ftz.f32 	%f12, %f8, %f11;
	mov.f32 	%f13, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f14, %f12, %f13;
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p3, %f14, %f16;
	selp.f32 	%f17, %f14, %f15, %p3;
	mov.f32 	%f18, 0f47000000;    	// 32768
	min.ftz.f32 	%f19, %f17, %f18;
	cvt.rzi.ftz.u32.f32 	%r28, %f19;
	mul.ftz.f32 	%f20, %f7, %f2;
	mul.ftz.f32 	%f21, %f20, %f11;
	mov.f32 	%f22, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f23, %f21, %f22;
	mov.f32 	%f24, 0f00000000;    	// 0
	mov.f32 	%f25, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p4, %f23, %f25;
	selp.f32 	%f26, %f23, %f24, %p4;
	mov.f32 	%f27, 0f47000000;    	// 32768
	min.ftz.f32 	%f28, %f26, %f27;
	cvt.rzi.ftz.u32.f32 	%r29, %f28;
	mul.ftz.f32 	%f29, %f7, %f1;
	mul.ftz.f32 	%f30, %f29, %f11;
	mov.f32 	%f31, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f32, %f30, %f31;
	mov.f32 	%f33, 0f00000000;    	// 0
	mov.f32 	%f34, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p5, %f32, %f34;
	selp.f32 	%f35, %f32, %f33, %p5;
	mov.f32 	%f36, 0f47000000;    	// 32768
	min.ftz.f32 	%f37, %f35, %f36;
	cvt.rzi.ftz.u32.f32 	%r30, %f37;
	.loc	19	126	0
	mov.f32 	%f38, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f39, %f9, %f38;
	ld.param.u64 	%rd7, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_15u_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_15u_Kernel_inDestPitch];
	mul.lo.s32 	%r32, %r31, %r10;
	add.s32 	%r33, %r8, %r32;
	cvt.s64.s32 	%rd8, %r33;
	mul.wide.s32 	%rd9, %r33, 8;
	add.u64 	%rd10, %rd7, %rd9;
	mov.f32 	%f40, 0f00000000;    	// 0
	mov.f32 	%f41, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p6, %f39, %f41;
	selp.f32 	%f42, %f39, %f40, %p6;
	mov.f32 	%f43, 0f47000000;    	// 32768
	min.ftz.f32 	%f44, %f42, %f43;
	cvt.rzi.ftz.u32.f32 	%r34, %f44;
	st.global.v4.u16 	[%rd10+0], {%r34,%r28,%r29,%r30};
$Lt_101_195586:
	.loc	22	217	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_15u_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_15u_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_15u_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_15u_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_15u_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_15u_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_15u_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_15u_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_15u_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_15u_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_15u_Kernel_inHeight)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<12>;
	.reg .f32 %f<40>;
	.reg .pred %p<7>;
	.loc	22	218	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_15u_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_15u_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_15u_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_102_195586;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_15u_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_15u_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_15u_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_102_196354;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	218	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_102_196098;
$Lt_102_196354:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_102_196098:
	.loc	22	64	0
	mov.f32 	%f5, 0f47000000;     	// 32768
	mov.f32 	%f6, 0f3f800000;     	// 1
	div.approx.ftz.f32 	%f7, %f5, %f6;
	mul.ftz.f32 	%f8, %f7, %f3;
	mul.ftz.f32 	%f9, %f7, %f4;
	mov.f32 	%f10, 0f38000000;    	// 3.05176e-005
	mul.ftz.f32 	%f11, %f9, %f10;
	mul.ftz.f32 	%f12, %f8, %f11;
	mov.f32 	%f13, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f14, %f12, %f13;
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p3, %f14, %f16;
	selp.f32 	%f17, %f14, %f15, %p3;
	mov.f32 	%f18, 0f47000000;    	// 32768
	min.ftz.f32 	%f19, %f17, %f18;
	cvt.rzi.ftz.u32.f32 	%r28, %f19;
	mul.ftz.f32 	%f20, %f7, %f2;
	mul.ftz.f32 	%f21, %f20, %f11;
	mov.f32 	%f22, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f23, %f21, %f22;
	mov.f32 	%f24, 0f00000000;    	// 0
	mov.f32 	%f25, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p4, %f23, %f25;
	selp.f32 	%f26, %f23, %f24, %p4;
	mov.f32 	%f27, 0f47000000;    	// 32768
	min.ftz.f32 	%f28, %f26, %f27;
	cvt.rzi.ftz.u32.f32 	%r29, %f28;
	mul.ftz.f32 	%f29, %f7, %f1;
	mul.ftz.f32 	%f30, %f29, %f11;
	mov.f32 	%f31, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f32, %f30, %f31;
	mov.f32 	%f33, 0f00000000;    	// 0
	mov.f32 	%f34, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p5, %f32, %f34;
	selp.f32 	%f35, %f32, %f33, %p5;
	mov.f32 	%f36, 0f47000000;    	// 32768
	min.ftz.f32 	%f37, %f35, %f36;
	cvt.rzi.ftz.u32.f32 	%r30, %f37;
	.loc	19	126	0
	ld.param.u64 	%rd7, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_15u_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_15u_Kernel_inDestPitch];
	mul.lo.s32 	%r32, %r31, %r10;
	add.s32 	%r33, %r8, %r32;
	cvt.s64.s32 	%rd8, %r33;
	mul.wide.s32 	%rd9, %r33, 8;
	add.u64 	%rd10, %rd7, %rd9;
	mov.f32 	%f38, 0f47000000;    	// 32768
	cvt.rzi.ftz.u32.f32 	%r34, %f38;
	st.global.v4.u16 	[%rd10+0], {%r34,%r28,%r29,%r30};
$Lt_102_195586:
	.loc	22	218	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_15u_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_15u_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<9>;
	.reg .pred %p<5>;
	.loc	22	220	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_103_195586;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_103_196354;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	220	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_103_196098;
$Lt_103_196354:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_103_196098:
	mul.ftz.f32 	%f5, %f1, %f4;
	mul.ftz.f32 	%f6, %f2, %f4;
	mul.ftz.f32 	%f7, %f3, %f4;
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p3, %r31, %r32;
	@%p3 bra 	$Lt_103_196866;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f5;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f7;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	220	0
	bra.uni 	$Lt_103_196610;
$Lt_103_196866:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	st.global.v4.f32 	[%rd12+0], {%f5,%f6,%f7,%f4};
$Lt_103_196610:
$Lt_103_195586:
	.loc	22	220	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<11>;
	.reg .pred %p<5>;
	.loc	22	221	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_104_195330;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_104_196098;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	221	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_104_195842;
$Lt_104_196098:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_104_195842:
	mul.ftz.f32 	%f5, %f1, %f4;
	mul.ftz.f32 	%f6, %f2, %f4;
	mul.ftz.f32 	%f7, %f3, %f4;
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p3, %r31, %r32;
	@%p3 bra 	$Lt_104_196610;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f5;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f7;
	mov.b32		%r35, %b1; }
	mov.f32 	%f8, 0f3f800000;     	// 1
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f8;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	221	0
	bra.uni 	$Lt_104_196354;
$Lt_104_196610:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	mov.f32 	%f9, 0f3f800000;     	// 1
	st.global.v4.f32 	[%rd12+0], {%f5,%f6,%f7,%f9};
$Lt_104_196354:
$Lt_104_195330:
	.loc	22	221	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<6>;
	.reg .pred %p<5>;
	.loc	22	222	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_105_195074;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_105_195842;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	222	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_105_195586;
$Lt_105_195842:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_105_195586:
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p3, %r31, %r32;
	@%p3 bra 	$Lt_105_196354;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f3;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f2;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f1;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	222	0
	bra.uni 	$Lt_105_196098;
$Lt_105_196354:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	st.global.v4.f32 	[%rd12+0], {%f4,%f3,%f2,%f1};
$Lt_105_196098:
$Lt_105_195074:
	.loc	22	222	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<9>;
	.reg .pred %p<5>;
	.loc	22	223	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_106_195842;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_106_196610;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	223	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_106_196354;
$Lt_106_196610:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_106_196354:
	mul.ftz.f32 	%f5, %f3, %f4;
	mul.ftz.f32 	%f6, %f2, %f4;
	mul.ftz.f32 	%f7, %f1, %f4;
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p3, %r31, %r32;
	@%p3 bra 	$Lt_106_197122;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f5;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f7;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	223	0
	bra.uni 	$Lt_106_196866;
$Lt_106_197122:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	st.global.v4.f32 	[%rd12+0], {%f4,%f5,%f6,%f7};
$Lt_106_196866:
$Lt_106_195842:
	.loc	22	223	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<11>;
	.reg .pred %p<5>;
	.loc	22	224	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_107_195586;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_107_196354;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	224	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_107_196098;
$Lt_107_196354:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_107_196098:
	mul.ftz.f32 	%f5, %f3, %f4;
	mul.ftz.f32 	%f6, %f2, %f4;
	mul.ftz.f32 	%f7, %f1, %f4;
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p3, %r31, %r32;
	@%p3 bra 	$Lt_107_196866;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	mov.f32 	%f8, 0f3f800000;     	// 1
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f8;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f5;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f7;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	224	0
	bra.uni 	$Lt_107_196610;
$Lt_107_196866:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	mov.f32 	%f9, 0f3f800000;     	// 1
	st.global.v4.f32 	[%rd12+0], {%f9,%f5,%f6,%f7};
$Lt_107_196610:
$Lt_107_195586:
	.loc	22	224	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<36>;
	.reg .pred %p<5>;
	.loc	22	225	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_108_195586;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_108_196354;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	225	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_108_196098;
$Lt_108_196354:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_108_196098:
	ld.global.f32 	%f5, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f6, %f5, %f2;
	ld.global.f32 	%f7, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f8, %f7, %f3, %f6;
	ld.global.f32 	%f9, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f10, %f9, %f1, %f8;
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p3, %r31, %r32;
	@%p3 bra 	$Lt_108_196866;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r33, %b1; }
	ld.global.f32 	%f11, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f12, %f11, %f2;
	ld.global.f32 	%f13, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f14, %f13, %f3, %f12;
	ld.global.f32 	%f15, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f16, %f15, %f1, %f14;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f16;
	mov.b32		%r34, %b1; }
	ld.global.f32 	%f17, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f18, %f17, %f2;
	ld.global.f32 	%f19, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f20, %f19, %f3, %f18;
	ld.global.f32 	%f21, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f22, %f21, %f1, %f20;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f22;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	225	0
	bra.uni 	$Lt_108_196610;
$Lt_108_196866:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.f32 	%f23, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f24, %f23, %f2;
	ld.global.f32 	%f25, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f26, %f25, %f3, %f24;
	ld.global.f32 	%f27, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f28, %f27, %f1, %f26;
	ld.global.f32 	%f29, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f30, %f29, %f2;
	ld.global.f32 	%f31, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f32, %f31, %f3, %f30;
	ld.global.f32 	%f33, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f34, %f33, %f1, %f32;
	st.global.v4.f32 	[%rd12+0], {%f10,%f28,%f34,%f4};
$Lt_108_196610:
$Lt_108_195586:
	.loc	22	225	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<41>;
	.reg .pred %p<5>;
	.loc	22	226	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_109_196354;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_109_197122;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	226	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_109_196866;
$Lt_109_197122:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_109_196866:
	ld.global.f32 	%f5, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f6, %f5, %f2;
	ld.global.f32 	%f7, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f8, %f7, %f3, %f6;
	ld.global.f32 	%f9, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f10, %f9, %f1, %f8;
	mul.ftz.f32 	%f11, %f10, %f4;
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p3, %r31, %r32;
	@%p3 bra 	$Lt_109_197634;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r33, %b1; }
	ld.global.f32 	%f12, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f13, %f12, %f2;
	ld.global.f32 	%f14, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f15, %f14, %f3, %f13;
	ld.global.f32 	%f16, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f17, %f16, %f1, %f15;
	mul.ftz.f32 	%f18, %f4, %f17;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r34, %b1; }
	ld.global.f32 	%f19, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f20, %f19, %f2;
	ld.global.f32 	%f21, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f22, %f21, %f3, %f20;
	ld.global.f32 	%f23, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f24, %f23, %f1, %f22;
	mul.ftz.f32 	%f25, %f4, %f24;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f25;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	226	0
	bra.uni 	$Lt_109_197378;
$Lt_109_197634:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.f32 	%f26, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f27, %f26, %f2;
	ld.global.f32 	%f28, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f29, %f28, %f3, %f27;
	ld.global.f32 	%f30, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f31, %f30, %f1, %f29;
	mul.ftz.f32 	%f32, %f4, %f31;
	ld.global.f32 	%f33, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f34, %f33, %f2;
	ld.global.f32 	%f35, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f36, %f35, %f3, %f34;
	ld.global.f32 	%f37, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f38, %f37, %f1, %f36;
	mul.ftz.f32 	%f39, %f4, %f38;
	st.global.v4.f32 	[%rd12+0], {%f11,%f32,%f39,%f4};
$Lt_109_197378:
$Lt_109_196354:
	.loc	22	226	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<43>;
	.reg .pred %p<5>;
	.loc	22	227	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_110_196098;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_110_196866;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	227	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_110_196610;
$Lt_110_196866:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_110_196610:
	ld.global.f32 	%f5, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f6, %f5, %f2;
	ld.global.f32 	%f7, [kRGB32f_To_601YPbPr+24];
	fma.rn.ftz.f32 	%f8, %f7, %f3, %f6;
	ld.global.f32 	%f9, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f10, %f9, %f1, %f8;
	mul.ftz.f32 	%f11, %f10, %f4;
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p3, %r31, %r32;
	@%p3 bra 	$Lt_110_197378;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r33, %b1; }
	ld.global.f32 	%f12, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f13, %f12, %f2;
	ld.global.f32 	%f14, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f15, %f14, %f3, %f13;
	ld.global.f32 	%f16, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f17, %f16, %f1, %f15;
	mul.ftz.f32 	%f18, %f4, %f17;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r34, %b1; }
	ld.global.f32 	%f19, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f20, %f19, %f2;
	ld.global.f32 	%f21, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f22, %f21, %f3, %f20;
	ld.global.f32 	%f23, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f24, %f23, %f1, %f22;
	mul.ftz.f32 	%f25, %f4, %f24;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f25;
	mov.b32		%r35, %b1; }
	mov.f32 	%f26, 0f3f800000;    	// 1
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f26;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	227	0
	bra.uni 	$Lt_110_197122;
$Lt_110_197378:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.f32 	%f27, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f28, %f27, %f2;
	ld.global.f32 	%f29, [kRGB32f_To_601YPbPr+12];
	fma.rn.ftz.f32 	%f30, %f29, %f3, %f28;
	ld.global.f32 	%f31, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f32, %f31, %f1, %f30;
	mul.ftz.f32 	%f33, %f4, %f32;
	ld.global.f32 	%f34, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f35, %f34, %f2;
	ld.global.f32 	%f36, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f37, %f36, %f3, %f35;
	ld.global.f32 	%f38, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f39, %f38, %f1, %f37;
	mul.ftz.f32 	%f40, %f4, %f39;
	mov.f32 	%f41, 0f3f800000;    	// 1
	st.global.v4.f32 	[%rd12+0], {%f11,%f33,%f40,%f41};
$Lt_110_197122:
$Lt_110_196098:
	.loc	22	227	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_709_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_709_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_709_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_709_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_709_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_709_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_709_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_709_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_709_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<36>;
	.reg .pred %p<5>;
	.loc	22	228	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_709_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_709_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_709_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_111_195842;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_709_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_709_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_709_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_111_196610;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	228	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_111_196354;
$Lt_111_196610:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_111_196354:
	ld.global.f32 	%f5, [kRGB32f_To_709YPbPr+28];
	mul.ftz.f32 	%f6, %f5, %f2;
	ld.global.f32 	%f7, [kRGB32f_To_709YPbPr+24];
	fma.rn.ftz.f32 	%f8, %f7, %f3, %f6;
	ld.global.f32 	%f9, [kRGB32f_To_709YPbPr+32];
	fma.rn.ftz.f32 	%f10, %f9, %f1, %f8;
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_709_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_709_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_709_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p3, %r31, %r32;
	@%p3 bra 	$Lt_111_197122;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f10;
	mov.b32		%r33, %b1; }
	ld.global.f32 	%f11, [kRGB32f_To_709YPbPr+16];
	mul.ftz.f32 	%f12, %f11, %f2;
	ld.global.f32 	%f13, [kRGB32f_To_709YPbPr+12];
	fma.rn.ftz.f32 	%f14, %f13, %f3, %f12;
	ld.global.f32 	%f15, [kRGB32f_To_709YPbPr+20];
	fma.rn.ftz.f32 	%f16, %f15, %f1, %f14;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f16;
	mov.b32		%r34, %b1; }
	ld.global.f32 	%f17, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f18, %f17, %f2;
	ld.global.f32 	%f19, [kRGB32f_To_709YPbPr+0];
	fma.rn.ftz.f32 	%f20, %f19, %f3, %f18;
	ld.global.f32 	%f21, [kRGB32f_To_709YPbPr+8];
	fma.rn.ftz.f32 	%f22, %f21, %f1, %f20;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f22;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	228	0
	bra.uni 	$Lt_111_196866;
$Lt_111_197122:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.f32 	%f23, [kRGB32f_To_709YPbPr+16];
	mul.ftz.f32 	%f24, %f23, %f2;
	ld.global.f32 	%f25, [kRGB32f_To_709YPbPr+12];
	fma.rn.ftz.f32 	%f26, %f25, %f3, %f24;
	ld.global.f32 	%f27, [kRGB32f_To_709YPbPr+20];
	fma.rn.ftz.f32 	%f28, %f27, %f1, %f26;
	ld.global.f32 	%f29, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f30, %f29, %f2;
	ld.global.f32 	%f31, [kRGB32f_To_709YPbPr+0];
	fma.rn.ftz.f32 	%f32, %f31, %f3, %f30;
	ld.global.f32 	%f33, [kRGB32f_To_709YPbPr+8];
	fma.rn.ftz.f32 	%f34, %f33, %f1, %f32;
	st.global.v4.f32 	[%rd12+0], {%f10,%f28,%f34,%f4};
$Lt_111_196866:
$Lt_111_195842:
	.loc	22	228	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_709_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYA_4444_32f_709_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_709_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_709_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_709_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_709_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_709_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_709_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_709_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_709_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_709_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<41>;
	.reg .pred %p<5>;
	.loc	22	229	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_709_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_709_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_709_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_112_196610;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_709_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_709_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_709_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_112_197378;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	229	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_112_197122;
$Lt_112_197378:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_112_197122:
	ld.global.f32 	%f5, [kRGB32f_To_709YPbPr+28];
	mul.ftz.f32 	%f6, %f5, %f2;
	ld.global.f32 	%f7, [kRGB32f_To_709YPbPr+24];
	fma.rn.ftz.f32 	%f8, %f7, %f3, %f6;
	ld.global.f32 	%f9, [kRGB32f_To_709YPbPr+32];
	fma.rn.ftz.f32 	%f10, %f9, %f1, %f8;
	mul.ftz.f32 	%f11, %f10, %f4;
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_709_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_709_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_709_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p3, %r31, %r32;
	@%p3 bra 	$Lt_112_197890;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r33, %b1; }
	ld.global.f32 	%f12, [kRGB32f_To_709YPbPr+16];
	mul.ftz.f32 	%f13, %f12, %f2;
	ld.global.f32 	%f14, [kRGB32f_To_709YPbPr+12];
	fma.rn.ftz.f32 	%f15, %f14, %f3, %f13;
	ld.global.f32 	%f16, [kRGB32f_To_709YPbPr+20];
	fma.rn.ftz.f32 	%f17, %f16, %f1, %f15;
	mul.ftz.f32 	%f18, %f4, %f17;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r34, %b1; }
	ld.global.f32 	%f19, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f20, %f19, %f2;
	ld.global.f32 	%f21, [kRGB32f_To_709YPbPr+0];
	fma.rn.ftz.f32 	%f22, %f21, %f3, %f20;
	ld.global.f32 	%f23, [kRGB32f_To_709YPbPr+8];
	fma.rn.ftz.f32 	%f24, %f23, %f1, %f22;
	mul.ftz.f32 	%f25, %f4, %f24;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f25;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	229	0
	bra.uni 	$Lt_112_197634;
$Lt_112_197890:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.f32 	%f26, [kRGB32f_To_709YPbPr+16];
	mul.ftz.f32 	%f27, %f26, %f2;
	ld.global.f32 	%f28, [kRGB32f_To_709YPbPr+12];
	fma.rn.ftz.f32 	%f29, %f28, %f3, %f27;
	ld.global.f32 	%f30, [kRGB32f_To_709YPbPr+20];
	fma.rn.ftz.f32 	%f31, %f30, %f1, %f29;
	mul.ftz.f32 	%f32, %f4, %f31;
	ld.global.f32 	%f33, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f34, %f33, %f2;
	ld.global.f32 	%f35, [kRGB32f_To_709YPbPr+0];
	fma.rn.ftz.f32 	%f36, %f35, %f3, %f34;
	ld.global.f32 	%f37, [kRGB32f_To_709YPbPr+8];
	fma.rn.ftz.f32 	%f38, %f37, %f1, %f36;
	mul.ftz.f32 	%f39, %f4, %f38;
	st.global.v4.f32 	[%rd12+0], {%f11,%f32,%f39,%f4};
$Lt_112_197634:
$Lt_112_196610:
	.loc	22	229	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_709_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYP_4444_32f_709_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_709_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_709_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_709_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_709_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_709_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_709_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_709_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_709_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_709_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<43>;
	.reg .pred %p<5>;
	.loc	22	230	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_709_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_709_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_709_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_113_196354;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_709_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_709_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_709_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_113_197122;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	230	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_113_196866;
$Lt_113_197122:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_113_196866:
	ld.global.f32 	%f5, [kRGB32f_To_709YPbPr+28];
	mul.ftz.f32 	%f6, %f5, %f2;
	ld.global.f32 	%f7, [kRGB32f_To_709YPbPr+24];
	fma.rn.ftz.f32 	%f8, %f7, %f3, %f6;
	ld.global.f32 	%f9, [kRGB32f_To_709YPbPr+32];
	fma.rn.ftz.f32 	%f10, %f9, %f1, %f8;
	mul.ftz.f32 	%f11, %f10, %f4;
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_709_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_709_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_709_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p3, %r31, %r32;
	@%p3 bra 	$Lt_113_197634;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r33, %b1; }
	ld.global.f32 	%f12, [kRGB32f_To_709YPbPr+16];
	mul.ftz.f32 	%f13, %f12, %f2;
	ld.global.f32 	%f14, [kRGB32f_To_709YPbPr+12];
	fma.rn.ftz.f32 	%f15, %f14, %f3, %f13;
	ld.global.f32 	%f16, [kRGB32f_To_709YPbPr+20];
	fma.rn.ftz.f32 	%f17, %f16, %f1, %f15;
	mul.ftz.f32 	%f18, %f4, %f17;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r34, %b1; }
	ld.global.f32 	%f19, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f20, %f19, %f2;
	ld.global.f32 	%f21, [kRGB32f_To_709YPbPr+0];
	fma.rn.ftz.f32 	%f22, %f21, %f3, %f20;
	ld.global.f32 	%f23, [kRGB32f_To_709YPbPr+8];
	fma.rn.ftz.f32 	%f24, %f23, %f1, %f22;
	mul.ftz.f32 	%f25, %f4, %f24;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f25;
	mov.b32		%r35, %b1; }
	mov.f32 	%f26, 0f3f800000;    	// 1
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f26;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	230	0
	bra.uni 	$Lt_113_197378;
$Lt_113_197634:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.f32 	%f27, [kRGB32f_To_709YPbPr+16];
	mul.ftz.f32 	%f28, %f27, %f2;
	ld.global.f32 	%f29, [kRGB32f_To_709YPbPr+12];
	fma.rn.ftz.f32 	%f30, %f29, %f3, %f28;
	ld.global.f32 	%f31, [kRGB32f_To_709YPbPr+20];
	fma.rn.ftz.f32 	%f32, %f31, %f1, %f30;
	mul.ftz.f32 	%f33, %f4, %f32;
	ld.global.f32 	%f34, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f35, %f34, %f2;
	ld.global.f32 	%f36, [kRGB32f_To_709YPbPr+0];
	fma.rn.ftz.f32 	%f37, %f36, %f3, %f35;
	ld.global.f32 	%f38, [kRGB32f_To_709YPbPr+8];
	fma.rn.ftz.f32 	%f39, %f38, %f1, %f37;
	mul.ftz.f32 	%f40, %f4, %f39;
	mov.f32 	%f41, 0f3f800000;    	// 1
	st.global.v4.f32 	[%rd12+0], {%f11,%f33,%f40,%f41};
$Lt_113_197378:
$Lt_113_196354:
	.loc	22	230	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_709_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_VUYX_4444_32f_709_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Linear_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Linear_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Linear_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Linear_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Linear_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Linear_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Linear_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Linear_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Linear_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<36>;
	.reg .pred %p<8>;
	.loc	22	232	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Linear_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Linear_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Linear_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_114_194818;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Linear_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Linear_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Linear_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_114_195586;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	232	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_114_195330;
$Lt_114_195586:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_114_195330:
	.loc	20	520	0
	mov.f32 	%f5, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p3, %f3, %f5;
	@!%p3 bra 	$Lt_114_195842;
	.loc	20	372	0
	neg.ftz.f32 	%f6, %f3;
	lg2.approx.ftz.f32 	%f7, %f6;
	mov.f32 	%f8, 0f400e38e4;     	// 2.22222
	mul.ftz.f32 	%f9, %f7, %f8;
	ex2.approx.ftz.f32 	%f10, %f9;
	neg.ftz.f32 	%f11, %f10;
	bra.uni 	$LDWendi___log2f_291_35;
$Lt_114_195842:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f12, %f3;
	mov.f32 	%f13, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f11, %f14;
$LDWendi___log2f_291_35:
	.loc	20	522	0
	mov.f32 	%f15, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f2, %f15;
	@!%p4 bra 	$Lt_114_196354;
	.loc	20	372	0
	neg.ftz.f32 	%f16, %f2;
	lg2.approx.ftz.f32 	%f17, %f16;
	mov.f32 	%f18, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f20, %f19;
	neg.ftz.f32 	%f21, %f20;
	bra.uni 	$LDWendi___log2f_291_33;
$Lt_114_196354:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f22, %f2;
	mov.f32 	%f23, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f21, %f24;
$LDWendi___log2f_291_33:
	.loc	20	522	0
	mov.f32 	%f25, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p5, %f1, %f25;
	@!%p5 bra 	$Lt_114_196866;
	.loc	20	372	0
	neg.ftz.f32 	%f26, %f1;
	lg2.approx.ftz.f32 	%f27, %f26;
	mov.f32 	%f28, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f30, %f29;
	neg.ftz.f32 	%f31, %f30;
	bra.uni 	$LDWendi___log2f_291_31;
$Lt_114_196866:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f32, %f1;
	mov.f32 	%f33, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f31, %f34;
$LDWendi___log2f_291_31:
	.loc	22	232	0
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Linear_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Linear_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Linear_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p6, %r31, %r32;
	@%p6 bra 	$Lt_114_197634;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f31;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f21;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	232	0
	bra.uni 	$Lt_114_197378;
$Lt_114_197634:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	st.global.v4.f32 	[%rd12+0], {%f31,%f21,%f11,%f4};
$Lt_114_197378:
$Lt_114_194818:
	.loc	22	232	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Linear_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRA_4444_32f_Linear_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Linear_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Linear_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Linear_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Linear_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Linear_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Linear_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Linear_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Linear_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Linear_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<39>;
	.reg .pred %p<8>;
	.loc	22	233	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Linear_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Linear_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Linear_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_115_195586;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Linear_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Linear_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Linear_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_115_196354;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	233	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_115_196098;
$Lt_115_196354:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_115_196098:
	.loc	20	520	0
	mov.f32 	%f5, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p3, %f3, %f5;
	@!%p3 bra 	$Lt_115_196610;
	.loc	20	372	0
	neg.ftz.f32 	%f6, %f3;
	lg2.approx.ftz.f32 	%f7, %f6;
	mov.f32 	%f8, 0f400e38e4;     	// 2.22222
	mul.ftz.f32 	%f9, %f7, %f8;
	ex2.approx.ftz.f32 	%f10, %f9;
	neg.ftz.f32 	%f11, %f10;
	bra.uni 	$LDWendi___log2f_292_35;
$Lt_115_196610:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f12, %f3;
	mov.f32 	%f13, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f11, %f14;
$LDWendi___log2f_292_35:
	.loc	20	522	0
	mov.f32 	%f15, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f2, %f15;
	@!%p4 bra 	$Lt_115_197122;
	.loc	20	372	0
	neg.ftz.f32 	%f16, %f2;
	lg2.approx.ftz.f32 	%f17, %f16;
	mov.f32 	%f18, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f20, %f19;
	neg.ftz.f32 	%f21, %f20;
	bra.uni 	$LDWendi___log2f_292_33;
$Lt_115_197122:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f22, %f2;
	mov.f32 	%f23, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f21, %f24;
$LDWendi___log2f_292_33:
	.loc	20	522	0
	mov.f32 	%f25, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p5, %f1, %f25;
	@!%p5 bra 	$Lt_115_197634;
	.loc	20	372	0
	neg.ftz.f32 	%f26, %f1;
	lg2.approx.ftz.f32 	%f27, %f26;
	mov.f32 	%f28, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f30, %f29;
	neg.ftz.f32 	%f31, %f30;
	bra.uni 	$LDWendi___log2f_292_31;
$Lt_115_197634:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f32, %f1;
	mov.f32 	%f33, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f31, %f34;
$LDWendi___log2f_292_31:
	.loc	22	233	0
	mul.ftz.f32 	%f35, %f31, %f4;
	mul.ftz.f32 	%f36, %f21, %f4;
	mul.ftz.f32 	%f37, %f11, %f4;
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Linear_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Linear_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Linear_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p6, %r31, %r32;
	@%p6 bra 	$Lt_115_198402;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f35;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f36;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f37;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	233	0
	bra.uni 	$Lt_115_198146;
$Lt_115_198402:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	st.global.v4.f32 	[%rd12+0], {%f35,%f36,%f37,%f4};
$Lt_115_198146:
$Lt_115_195586:
	.loc	22	233	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Linear_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRP_4444_32f_Linear_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Linear_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Linear_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Linear_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Linear_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Linear_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Linear_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Linear_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Linear_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Linear_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<41>;
	.reg .pred %p<8>;
	.loc	22	234	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Linear_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Linear_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Linear_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_116_195330;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Linear_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Linear_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Linear_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_116_196098;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	234	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_116_195842;
$Lt_116_196098:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_116_195842:
	.loc	20	520	0
	mov.f32 	%f5, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p3, %f3, %f5;
	@!%p3 bra 	$Lt_116_196354;
	.loc	20	372	0
	neg.ftz.f32 	%f6, %f3;
	lg2.approx.ftz.f32 	%f7, %f6;
	mov.f32 	%f8, 0f400e38e4;     	// 2.22222
	mul.ftz.f32 	%f9, %f7, %f8;
	ex2.approx.ftz.f32 	%f10, %f9;
	neg.ftz.f32 	%f11, %f10;
	bra.uni 	$LDWendi___log2f_293_35;
$Lt_116_196354:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f12, %f3;
	mov.f32 	%f13, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f11, %f14;
$LDWendi___log2f_293_35:
	.loc	20	522	0
	mov.f32 	%f15, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f2, %f15;
	@!%p4 bra 	$Lt_116_196866;
	.loc	20	372	0
	neg.ftz.f32 	%f16, %f2;
	lg2.approx.ftz.f32 	%f17, %f16;
	mov.f32 	%f18, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f20, %f19;
	neg.ftz.f32 	%f21, %f20;
	bra.uni 	$LDWendi___log2f_293_33;
$Lt_116_196866:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f22, %f2;
	mov.f32 	%f23, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f21, %f24;
$LDWendi___log2f_293_33:
	.loc	20	522	0
	mov.f32 	%f25, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p5, %f1, %f25;
	@!%p5 bra 	$Lt_116_197378;
	.loc	20	372	0
	neg.ftz.f32 	%f26, %f1;
	lg2.approx.ftz.f32 	%f27, %f26;
	mov.f32 	%f28, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f30, %f29;
	neg.ftz.f32 	%f31, %f30;
	bra.uni 	$LDWendi___log2f_293_31;
$Lt_116_197378:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f32, %f1;
	mov.f32 	%f33, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f31, %f34;
$LDWendi___log2f_293_31:
	.loc	22	234	0
	mul.ftz.f32 	%f35, %f31, %f4;
	mul.ftz.f32 	%f36, %f21, %f4;
	mul.ftz.f32 	%f37, %f11, %f4;
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Linear_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Linear_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Linear_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p6, %r31, %r32;
	@%p6 bra 	$Lt_116_198146;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f35;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f36;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f37;
	mov.b32		%r35, %b1; }
	mov.f32 	%f38, 0f3f800000;    	// 1
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f38;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	234	0
	bra.uni 	$Lt_116_197890;
$Lt_116_198146:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	mov.f32 	%f39, 0f3f800000;    	// 1
	st.global.v4.f32 	[%rd12+0], {%f35,%f36,%f37,%f39};
$Lt_116_197890:
$Lt_116_195330:
	.loc	22	234	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Linear_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_BGRX_4444_32f_Linear_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Linear_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Linear_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Linear_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Linear_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Linear_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Linear_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Linear_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Linear_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Linear_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<36>;
	.reg .pred %p<8>;
	.loc	22	235	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Linear_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Linear_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Linear_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_117_195074;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Linear_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Linear_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Linear_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_117_195842;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	235	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_117_195586;
$Lt_117_195842:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_117_195586:
	.loc	20	520	0
	mov.f32 	%f5, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p3, %f3, %f5;
	@!%p3 bra 	$Lt_117_196098;
	.loc	20	372	0
	neg.ftz.f32 	%f6, %f3;
	lg2.approx.ftz.f32 	%f7, %f6;
	mov.f32 	%f8, 0f400e38e4;     	// 2.22222
	mul.ftz.f32 	%f9, %f7, %f8;
	ex2.approx.ftz.f32 	%f10, %f9;
	neg.ftz.f32 	%f11, %f10;
	bra.uni 	$LDWendi___log2f_294_35;
$Lt_117_196098:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f12, %f3;
	mov.f32 	%f13, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f11, %f14;
$LDWendi___log2f_294_35:
	.loc	20	522	0
	mov.f32 	%f15, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f2, %f15;
	@!%p4 bra 	$Lt_117_196610;
	.loc	20	372	0
	neg.ftz.f32 	%f16, %f2;
	lg2.approx.ftz.f32 	%f17, %f16;
	mov.f32 	%f18, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f20, %f19;
	neg.ftz.f32 	%f21, %f20;
	bra.uni 	$LDWendi___log2f_294_33;
$Lt_117_196610:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f22, %f2;
	mov.f32 	%f23, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f21, %f24;
$LDWendi___log2f_294_33:
	.loc	20	522	0
	mov.f32 	%f25, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p5, %f1, %f25;
	@!%p5 bra 	$Lt_117_197122;
	.loc	20	372	0
	neg.ftz.f32 	%f26, %f1;
	lg2.approx.ftz.f32 	%f27, %f26;
	mov.f32 	%f28, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f30, %f29;
	neg.ftz.f32 	%f31, %f30;
	bra.uni 	$LDWendi___log2f_294_31;
$Lt_117_197122:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f32, %f1;
	mov.f32 	%f33, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f31, %f34;
$LDWendi___log2f_294_31:
	.loc	22	235	0
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Linear_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Linear_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Linear_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p6, %r31, %r32;
	@%p6 bra 	$Lt_117_197890;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f11;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f21;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f31;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	235	0
	bra.uni 	$Lt_117_197634;
$Lt_117_197890:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	st.global.v4.f32 	[%rd12+0], {%f4,%f11,%f21,%f31};
$Lt_117_197634:
$Lt_117_195074:
	.loc	22	235	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Linear_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_ARGB_4444_32f_Linear_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Linear_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Linear_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Linear_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Linear_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Linear_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Linear_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Linear_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Linear_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Linear_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<39>;
	.reg .pred %p<8>;
	.loc	22	236	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Linear_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Linear_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Linear_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_118_195842;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Linear_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Linear_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Linear_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_118_196610;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	236	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_118_196354;
$Lt_118_196610:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_118_196354:
	.loc	20	520	0
	mov.f32 	%f5, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p3, %f3, %f5;
	@!%p3 bra 	$Lt_118_196866;
	.loc	20	372	0
	neg.ftz.f32 	%f6, %f3;
	lg2.approx.ftz.f32 	%f7, %f6;
	mov.f32 	%f8, 0f400e38e4;     	// 2.22222
	mul.ftz.f32 	%f9, %f7, %f8;
	ex2.approx.ftz.f32 	%f10, %f9;
	neg.ftz.f32 	%f11, %f10;
	bra.uni 	$LDWendi___log2f_295_35;
$Lt_118_196866:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f12, %f3;
	mov.f32 	%f13, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f11, %f14;
$LDWendi___log2f_295_35:
	.loc	20	522	0
	mov.f32 	%f15, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f2, %f15;
	@!%p4 bra 	$Lt_118_197378;
	.loc	20	372	0
	neg.ftz.f32 	%f16, %f2;
	lg2.approx.ftz.f32 	%f17, %f16;
	mov.f32 	%f18, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f20, %f19;
	neg.ftz.f32 	%f21, %f20;
	bra.uni 	$LDWendi___log2f_295_33;
$Lt_118_197378:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f22, %f2;
	mov.f32 	%f23, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f21, %f24;
$LDWendi___log2f_295_33:
	.loc	20	522	0
	mov.f32 	%f25, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p5, %f1, %f25;
	@!%p5 bra 	$Lt_118_197890;
	.loc	20	372	0
	neg.ftz.f32 	%f26, %f1;
	lg2.approx.ftz.f32 	%f27, %f26;
	mov.f32 	%f28, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f30, %f29;
	neg.ftz.f32 	%f31, %f30;
	bra.uni 	$LDWendi___log2f_295_31;
$Lt_118_197890:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f32, %f1;
	mov.f32 	%f33, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f31, %f34;
$LDWendi___log2f_295_31:
	.loc	22	236	0
	mul.ftz.f32 	%f35, %f11, %f4;
	mul.ftz.f32 	%f36, %f21, %f4;
	mul.ftz.f32 	%f37, %f31, %f4;
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Linear_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Linear_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Linear_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p6, %r31, %r32;
	@%p6 bra 	$Lt_118_198658;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f35;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f36;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f37;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	236	0
	bra.uni 	$Lt_118_198402;
$Lt_118_198658:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	st.global.v4.f32 	[%rd12+0], {%f4,%f35,%f36,%f37};
$Lt_118_198402:
$Lt_118_195842:
	.loc	22	236	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Linear_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_PRGB_4444_32f_Linear_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Linear_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Linear_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Linear_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Linear_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Linear_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Linear_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Linear_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Linear_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Linear_Kernel_inHeight)
	{
	.reg .u32 %r<38>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<41>;
	.reg .pred %p<8>;
	.loc	22	237	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Linear_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Linear_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Linear_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_119_195586;
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Linear_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd1, %r21;
	ld.param.u64 	%rd2, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Linear_Kernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Linear_Kernel_inSrcDeviceFormat];
	mov.u32 	%r23, 0;
	setp.ne.s32 	%p2, %r22, %r23;
	@%p2 bra 	$Lt_119_196354;
	.loc	19	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	237	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_119_196098;
$Lt_119_196354:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_119_196098:
	.loc	20	520	0
	mov.f32 	%f5, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p3, %f3, %f5;
	@!%p3 bra 	$Lt_119_196610;
	.loc	20	372	0
	neg.ftz.f32 	%f6, %f3;
	lg2.approx.ftz.f32 	%f7, %f6;
	mov.f32 	%f8, 0f400e38e4;     	// 2.22222
	mul.ftz.f32 	%f9, %f7, %f8;
	ex2.approx.ftz.f32 	%f10, %f9;
	neg.ftz.f32 	%f11, %f10;
	bra.uni 	$LDWendi___log2f_296_35;
$Lt_119_196610:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f12, %f3;
	mov.f32 	%f13, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f11, %f14;
$LDWendi___log2f_296_35:
	.loc	20	522	0
	mov.f32 	%f15, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f2, %f15;
	@!%p4 bra 	$Lt_119_197122;
	.loc	20	372	0
	neg.ftz.f32 	%f16, %f2;
	lg2.approx.ftz.f32 	%f17, %f16;
	mov.f32 	%f18, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f20, %f19;
	neg.ftz.f32 	%f21, %f20;
	bra.uni 	$LDWendi___log2f_296_33;
$Lt_119_197122:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f22, %f2;
	mov.f32 	%f23, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f21, %f24;
$LDWendi___log2f_296_33:
	.loc	20	522	0
	mov.f32 	%f25, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p5, %f1, %f25;
	@!%p5 bra 	$Lt_119_197634;
	.loc	20	372	0
	neg.ftz.f32 	%f26, %f1;
	lg2.approx.ftz.f32 	%f27, %f26;
	mov.f32 	%f28, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f30, %f29;
	neg.ftz.f32 	%f31, %f30;
	bra.uni 	$LDWendi___log2f_296_31;
$Lt_119_197634:
	.loc	20	374	0
	lg2.approx.ftz.f32 	%f32, %f1;
	mov.f32 	%f33, 0f400e38e4;    	// 2.22222
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f31, %f34;
$LDWendi___log2f_296_31:
	.loc	22	237	0
	mul.ftz.f32 	%f35, %f11, %f4;
	mul.ftz.f32 	%f36, %f21, %f4;
	mul.ftz.f32 	%f37, %f31, %f4;
	ld.param.s32 	%r28, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Linear_Kernel_inDestPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Linear_Kernel_inDest];
	ld.param.s32 	%r31, [__cudaparm_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Linear_Kernel_inDestDeviceFormat];
	mov.u32 	%r32, 0;
	setp.ne.s32 	%p6, %r31, %r32;
	@%p6 bra 	$Lt_119_198402;
	.loc	19	126	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	mov.f32 	%f38, 0f3f800000;    	// 1
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f38;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f35;
	mov.b32		%r34, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f36;
	mov.b32		%r35, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f37;
	mov.b32		%r36, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r33,%r34,%r35,%r36};
	.loc	22	237	0
	bra.uni 	$Lt_119_198146;
$Lt_119_198402:
	.loc	19	126	0
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	mov.f32 	%f39, 0f3f800000;    	// 1
	st.global.v4.f32 	[%rd12+0], {%f39,%f35,%f36,%f37};
$Lt_119_198146:
$Lt_119_195586:
	.loc	22	237	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Linear_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_BGRA_4444_32f_To_IR_PixelFormat_XRGB_4444_32f_Linear_Kernel

	.entry PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_8u_Kernel (
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inSrc,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inSrcPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inSrcDeviceFormat,
		.param .u64 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inDest,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inDestPitch,
		.param .u32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inDestDeviceFormat,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inWidth,
		.param .s32 __cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inHeight)
	{
	.reg .u32 %r<34>;
	.reg .u64 %rd<10>;
	.reg .f32 %f<64>;
	.reg .pred %p<7>;
	.loc	22	242	0
$LDWbegin_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_8u_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_120_194050;
	.loc	19	115	0
	ld.param.u64 	%rd1, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inSrc];
	ld.param.s32 	%r19, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inSrcPitch];
	mul.lo.s32 	%r20, %r19, %r10;
	add.s32 	%r21, %r8, %r20;
	cvt.s64.s32 	%rd2, %r21;
	mul.wide.s32 	%rd3, %r21, 4;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.v4.u8 	{%r22,%r23,%r24,%r25}, [%rd4+0];
	.loc	22	73	0
	cvt.rn.f32.u32 	%f1, %r23;
	mov.f32 	%f2, 0f437f0000;     	// 255
	mov.f32 	%f3, 0f437f0000;     	// 255
	div.approx.ftz.f32 	%f4, %f2, %f3;
	cvt.rn.f32.u32 	%f5, %r24;
	cvt.rn.f32.u32 	%f6, %r22;
	ld.const.f32 	%f7, [kYCbCrOffset+4];
	mul.ftz.f32 	%f8, %f4, %f7;
	sub.ftz.f32 	%f9, %f1, %f8;
	ld.const.f32 	%f10, [kYCbCrOffset+0];
	mul.ftz.f32 	%f11, %f4, %f10;
	sub.ftz.f32 	%f12, %f5, %f11;
	ld.const.f32 	%f13, [kYCbCrOffset+8];
	mul.ftz.f32 	%f14, %f4, %f13;
	sub.ftz.f32 	%f15, %f6, %f14;
	ld.global.f32 	%f16, [k601YCbCr_To_RGB8u+28];
	mul.ftz.f32 	%f17, %f16, %f9;
	ld.global.f32 	%f18, [k601YCbCr_To_RGB8u+24];
	fma.rn.ftz.f32 	%f19, %f18, %f12, %f17;
	ld.global.f32 	%f20, [k601YCbCr_To_RGB8u+32];
	fma.rn.ftz.f32 	%f21, %f20, %f15, %f19;
	mov.f32 	%f22, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f23, %f21, %f22;
	mov.f32 	%f24, 0f00000000;    	// 0
	mov.f32 	%f25, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p2, %f23, %f25;
	selp.f32 	%f26, %f23, %f24, %p2;
	mov.f32 	%f27, 0f437f0000;    	// 255
	min.ftz.f32 	%f28, %f26, %f27;
	cvt.rzi.ftz.u32.f32 	%r26, %f28;
	ld.global.f32 	%f29, [k601YCbCr_To_RGB8u+16];
	mul.ftz.f32 	%f30, %f29, %f9;
	ld.global.f32 	%f31, [k601YCbCr_To_RGB8u+12];
	fma.rn.ftz.f32 	%f32, %f31, %f12, %f30;
	ld.global.f32 	%f33, [k601YCbCr_To_RGB8u+20];
	fma.rn.ftz.f32 	%f34, %f33, %f15, %f32;
	mov.f32 	%f35, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f36, %f34, %f35;
	mov.f32 	%f37, 0f00000000;    	// 0
	mov.f32 	%f38, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p3, %f36, %f38;
	selp.f32 	%f39, %f36, %f37, %p3;
	mov.f32 	%f40, 0f437f0000;    	// 255
	min.ftz.f32 	%f41, %f39, %f40;
	cvt.rzi.ftz.u32.f32 	%r27, %f41;
	ld.global.f32 	%f42, [k601YCbCr_To_RGB8u+4];
	mul.ftz.f32 	%f43, %f42, %f9;
	ld.global.f32 	%f44, [k601YCbCr_To_RGB8u+0];
	fma.rn.ftz.f32 	%f45, %f44, %f12, %f43;
	ld.global.f32 	%f46, [k601YCbCr_To_RGB8u+8];
	fma.rn.ftz.f32 	%f47, %f46, %f15, %f45;
	mov.f32 	%f48, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f49, %f47, %f48;
	mov.f32 	%f50, 0f00000000;    	// 0
	mov.f32 	%f51, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p4, %f49, %f51;
	selp.f32 	%f52, %f49, %f50, %p4;
	mov.f32 	%f53, 0f437f0000;    	// 255
	min.ftz.f32 	%f54, %f52, %f53;
	cvt.rzi.ftz.u32.f32 	%r28, %f54;
	.loc	19	126	0
	ld.param.u64 	%rd5, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inDest];
	ld.param.s32 	%r29, [__cudaparm_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_8u_Kernel_inDestPitch];
	mul.lo.s32 	%r30, %r29, %r10;
	add.s32 	%r31, %r8, %r30;
	cvt.s64.s32 	%rd6, %r31;
	mul.wide.s32 	%rd7, %r31, 4;
	add.u64 	%rd8, %rd5, %rd7;
	cvt.rn.f32.u32 	%f55, %r25;
	mov.f32 	%f56, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f57, %f55, %f56;
	mov.f32 	%f58, 0f00000000;    	// 0
	mov.f32 	%f59, 0f00000000;    	// 0
	setp.gt.ftz.f32 	%p5, %f57, %f59;
	selp.f32 	%f60, %f57, %f58, %p5;
	mov.f32 	%f61, 0f437f0000;    	// 255
	min.ftz.f32 	%f62, %f60, %f61;
	cvt.rzi.ftz.u32.f32 	%r32, %f62;
	st.global.v4.u8 	[%rd8+0], {%r26,%r27,%r28,%r32};
$Lt_120_194050:
	.loc	22	242	0
	exit;
$LDWend_PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_8u_Kernel:
	} // PixelFormatConvert_IR_PixelFormat_VUYA_4444_8u_To_IR_PixelFormat_BGRA_4444_8u_Kernel
	.const .align 4 .b8 kRGB32f_To_YIQ[36] = {135,22,153,62,162,69,22,63,213,120,233,61,216,128,24,63,27,133,140,190,149,124,164,190,236,135,88,62,134,200,5,191,22,77,159,62};
	.const .align 4 .b8 kYIQ_To_RGB32f[36] = {0,0,128,63,20,208,116,63,219,249,30,63,0,0,128,63,177,80,139,190,2,188,37,191,0,0,128,63,45,178,141,191,85,48,218,63};

