Win32基于Refs实现 Copy On Write

Refs相对Ntfs来说,有一个很重要的技术 Block Clone。

块克隆指令文件系统代表应用程序复制一段文件字节,目标文件可能与源文件相同,也可能不同。不幸的是,传统的复制操作成本高昂,因为它们会触发对底层物理数据的昂贵读写操作。
然而,在ReFS中,块克隆执行的是低成本元数据操作,而不是读取和写入文件数据。因为ReFS允许多个文件共享相同的逻辑簇(卷上的物理位置),复制操作只需将文件的一个区域重新映射到单独的物理位置,将昂贵的物理操作转换为快速的逻辑操作。这使得复制操作能够更快完成,并且对底层存储的I/O操作更少。这项改进也惠及了虚拟化工作负载,因为在使用块克隆操作时,.vhdx检查点合并操作的速度显著提升。此外,由于多个文件可以共享相同的逻辑簇,相同的数据不会多次物理存储,从而提高了存储容量。

根据MS DOCS所述,Block Clone允许多个文件共享一个物理区块,减少文件复制时候占用额外的资源。因此我们可以借助这个技术来实现类似BtrFS的COW(Copy On Write)技术。

FILE_SUPPORTS_BLOCK_REFCOUNTING 可以判断文件是否支持Block Clone。

参考代码:https://github.com/0xbadfca11/reflink/blob/master/reflink.cpp

实现代码

#include <atlbase.h>
#include <windows.h>
#include <winioctl.h>
#include <crtdbg.h>
#include <CLocale>


constexpr LONG64 inline ROUNDUP(LONG64 file_size, ULONG cluster_size) noexcept
{
	return (file_size + cluster_size - 1) / cluster_size * cluster_size;
}


BOOL CreateForkW(HANDLE hSrc, HANDLE hDst)
{
	DWORD fs_flags;

	if (!GetVolumeInformationByHandleW(hSrc, NULL, 0, NULL, NULL, &fs_flags, NULL, 0))
	{
		return FALSE;
	}
	if (!(fs_flags & FILE_SUPPORTS_BLOCK_REFCOUNTING))
	{
		SetLastError(ERROR_NOT_CAPABLE);
		return FALSE;
	}

	FILE_END_OF_FILE_INFO file_size;
	if (!GetFileSizeEx(hSrc, &file_size.EndOfFile))
	{
		return FALSE;
	}

	FILE_BASIC_INFO file_basic;
	if (!GetFileInformationByHandleEx(hSrc, FileBasicInfo, &file_basic, sizeof file_basic)) 
	{
		return FALSE;
	}
	DWORD _;
	FSCTL_GET_INTEGRITY_INFORMATION_BUFFER get_integrity;
	if (!DeviceIoControl(hSrc, FSCTL_GET_INTEGRITY_INFORMATION, nullptr, 0, &get_integrity, sizeof get_integrity, &_, nullptr))
	{
		return FALSE;
	}


	FILE_DISPOSITION_INFO dispos = { TRUE };
	if (!SetFileInformationByHandle(hDst, FileDispositionInfo, &dispos, sizeof dispos))
	{
		return FALSE;
	}

	if (!DeviceIoControl(hDst, FSCTL_SET_SPARSE, NULL, 0, NULL, 0, &_, NULL))
	{
		return FALSE;
	}
	FSCTL_SET_INTEGRITY_INFORMATION_BUFFER set_integrity = { get_integrity.ChecksumAlgorithm, get_integrity.Reserved, get_integrity.Flags };
	if (!DeviceIoControl(hDst, FSCTL_SET_INTEGRITY_INFORMATION, &set_integrity, sizeof set_integrity, nullptr, 0, nullptr, nullptr))
	{
		return FALSE;
	}
	if (!SetFileInformationByHandle(hDst, FileEndOfFileInfo, &file_size, sizeof file_size))
	{
		return FALSE;
	}

	const LONG64 split_threshold = (1LL << 32) - get_integrity.ClusterSizeInBytes;

	DUPLICATE_EXTENTS_DATA dup_extent;
	dup_extent.FileHandle = hSrc;
	for (LONG64 offset = 0, remain = ROUNDUP(file_size.EndOfFile.QuadPart, get_integrity.ClusterSizeInBytes); remain > 0; offset += split_threshold, remain -= split_threshold)
	{
		dup_extent.SourceFileOffset.QuadPart = dup_extent.TargetFileOffset.QuadPart = offset;
		dup_extent.ByteCount.QuadPart = min(split_threshold, remain);
		_ASSERTE(dup_extent.SourceFileOffset.QuadPart % get_integrity.ClusterSizeInBytes == 0);
		_ASSERTE(dup_extent.ByteCount.QuadPart % get_integrity.ClusterSizeInBytes == 0);
		_ASSERTE(dup_extent.ByteCount.QuadPart <= UINT32_MAX);
		_RPT3(_CRT_WARN, "Remain=%llx\nOffset=%llx\nLength=%llx\n\n", remain, dup_extent.SourceFileOffset.QuadPart, dup_extent.ByteCount.QuadPart);
		if (!DeviceIoControl(hDst, FSCTL_DUPLICATE_EXTENTS_TO_FILE, &dup_extent, sizeof dup_extent, nullptr, 0, &_, nullptr))
		{
			_CrtDbgBreak();
			return FALSE;
		}
	}

	if (!(file_basic.FileAttributes & FILE_ATTRIBUTE_SPARSE_FILE))
	{
		FILE_SET_SPARSE_BUFFER set_sparse = { FALSE };
		if (!DeviceIoControl(hDst, FSCTL_SET_SPARSE, &set_sparse, sizeof set_sparse, nullptr, 0, &_, nullptr))
		{
			return FALSE;
		}
	}

	file_basic.CreationTime.QuadPart = 0;
	if (!SetFileInformationByHandle(hDst, FileBasicInfo, &file_basic, sizeof file_basic))
	{
		return FALSE;
	}
	if (!FlushFileBuffers(hDst))
	{
		return FALSE;
	}
	dispos = { FALSE };
	return !!SetFileInformationByHandle(hDst, FileDispositionInfo, &dispos, sizeof dispos);
}

BOOL CreateForkForFileW(LPCWSTR SrcFile, LPCWSTR DstFile)
{
#ifdef DEBUG
	_putws(SrcFile);
#endif // DEBUG

	// Judge both files are in the same volume
	WCHAR src_volume[MAX_PATH], dst_volume[MAX_PATH];
	if (GetVolumePathNameW(SrcFile, src_volume, MAX_PATH) == 0 || GetVolumePathNameW(DstFile, dst_volume, MAX_PATH) == 0)
	{
		return FALSE;
	}
	if (lstrcmpiW(src_volume, dst_volume) != 0)
	{
		SetLastError(ERROR_NOT_SAME_DEVICE);
		return FALSE;
	}

	HANDLE hSrc = CreateFileW(SrcFile, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL);
	if (hSrc == INVALID_HANDLE_VALUE)
	{
		CloseHandle(hSrc);
		return FALSE;
	}
	HANDLE hDst = CreateFileW(DstFile, GENERIC_READ | GENERIC_WRITE | DELETE, 0, nullptr, CREATE_NEW, 0, hSrc);
	if (hDst == INVALID_HANDLE_VALUE)
	{
		SetLastError(ERROR_FILE_EXISTS);
		CloseHandle(hDst);
		CloseHandle(hSrc);
		return FALSE;
	}
	BOOL ret = CreateForkW(hSrc, hDst);
	CloseHandle(hDst);
	CloseHandle(hSrc);
	return ret;
}

BOOL CreateForkForDirW(LPCWSTR SrcDir, LPCWSTR DstDir)
{
	// Judge SrcDir is a directory
	DWORD attr = GetFileAttributesW(SrcDir);
	if (attr == INVALID_FILE_ATTRIBUTES || !(attr & FILE_ATTRIBUTE_DIRECTORY))
	{
		return FALSE;
	}

	// Judge both directories are in the same volume
	WCHAR src_volume[MAX_PATH], dst_volume[MAX_PATH];
	if (GetVolumePathNameW(SrcDir, src_volume, MAX_PATH) == 0 || GetVolumePathNameW(DstDir, dst_volume, MAX_PATH) == 0)
	{
		return FALSE;
	}
	if (lstrcmpiW(src_volume, dst_volume) != 0)
	{
		SetLastError(ERROR_NOT_SAME_DEVICE);
		return FALSE;
	}

	HANDLE hSrc = CreateFileW(SrcDir, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL);
	if (hSrc == INVALID_HANDLE_VALUE)
	{
		CloseHandle(hSrc);
		return FALSE;
	}
	DWORD fs_flags;
	if (!GetVolumeInformationByHandleW(hSrc, NULL, 0, NULL, NULL, &fs_flags, NULL, 0))
	{
		return FALSE;
	}
	if (!(fs_flags & FILE_SUPPORTS_BLOCK_REFCOUNTING))
	{
		SetLastError(ERROR_NOT_CAPABLE);
		return FALSE;
	}
	CloseHandle(hSrc);

	if (!CreateDirectoryW(DstDir, NULL))
	{
		return FALSE;
	}

	// Enumerate all files in the directory
	WCHAR src_dir[MAX_PATH];
	lstrcpyW(src_dir, SrcDir);
	PathAppendW(src_dir, L"*");
	WIN32_FIND_DATAW find_data;
	HANDLE hFind = FindFirstFileW(src_dir, &find_data);
	if (hFind == INVALID_HANDLE_VALUE)
	{
		return FALSE;
	}
	do {
		// ignore current and father path
		if (StrCmpW(find_data.cFileName, L".") == 0 || StrCmpW(find_data.cFileName, L"..") == 0)
			continue;

		// if it is a directory, create a new directory. And then recursively call this function.
		if (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
		{
			WCHAR src_subdir[MAX_PATH], dst_subdir[MAX_PATH];
			StrCpyW(src_subdir, SrcDir);
			StrCpyW(dst_subdir, DstDir);
			PathAppendW(src_subdir, find_data.cFileName);
			PathAppendW(dst_subdir, find_data.cFileName);
			if (!CreateForkForDirW(src_subdir, dst_subdir))
			{
				FindClose(hFind);
				return FALSE;
			}
		}
		else
		{
			WCHAR src_file[MAX_PATH], dst_file[MAX_PATH];
			StrCpyW(src_file, SrcDir);
			StrCpyW(dst_file, DstDir);
			PathAppendW(src_file, find_data.cFileName);
			PathAppendW(dst_file, find_data.cFileName);
			if (!CreateForkForFileW(src_file, dst_file))
			{
				CloseHandle(hFind);
				return FALSE;
			}
		}
	} while (FindNextFile(hFind, &find_data) != 0);
	return TRUE;
}

int main()
{
	setlocale(LC_ALL, "chs");
	CreateForkForDirW(L"E:\\开发项目", L"E:\\test");
}