diff --git a/MAINTAINERS b/MAINTAINERS index 101f4e185d..332112f6fd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -599,6 +599,8 @@ F: tools/binman/ BTRFS M: Marek Behun +R: Qu Wenruo +L: linux-btrfs@vger.kernel.org S: Maintained F: cmd/btrfs.c F: fs/btrfs/ diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 9b3159296f..fc074c84d2 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -2,5 +2,6 @@ # # 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz -obj-y := btrfs.o chunk-map.o compression.o ctree.o dev.o dir-item.o \ - extent-io.o hash.o inode.o root.o subvolume.o super.o +obj-y := btrfs.o compression.o ctree.o dev.o dir-item.o \ + extent-io.o inode.o subvolume.o crypto/hash.o disk-io.o \ + common/rbtree-utils.o extent-cache.o volumes.o root-tree.o diff --git a/fs/btrfs/btrfs.c b/fs/btrfs/btrfs.c index de16217d0d..cbf9dcffeb 100644 --- a/fs/btrfs/btrfs.c +++ b/fs/btrfs/btrfs.c @@ -5,219 +5,280 @@ * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz */ -#include "btrfs.h" #include #include #include #include +#include "btrfs.h" +#include "crypto/hash.h" +#include "disk-io.h" -struct btrfs_info btrfs_info; +struct btrfs_fs_info *current_fs_info; -static int readdir_callback(const struct btrfs_root *root, - struct btrfs_dir_item *item) +static int show_dir(struct btrfs_root *root, struct extent_buffer *eb, + struct btrfs_dir_item *di) { - static const char typestr[BTRFS_FT_MAX][4] = { - [BTRFS_FT_UNKNOWN] = " ? ", - [BTRFS_FT_REG_FILE] = " ", - [BTRFS_FT_DIR] = "DIR", - [BTRFS_FT_CHRDEV] = "CHR", - [BTRFS_FT_BLKDEV] = "BLK", - [BTRFS_FT_FIFO] = "FIF", - [BTRFS_FT_SOCK] = "SCK", - [BTRFS_FT_SYMLINK] = "SYM", - [BTRFS_FT_XATTR] = " ? ", + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_inode_item ii; + struct btrfs_key key; + static const char* dir_item_str[] = { + [BTRFS_FT_REG_FILE] = "FILE", + [BTRFS_FT_DIR] = "DIR", + [BTRFS_FT_CHRDEV] = "CHRDEV", + [BTRFS_FT_BLKDEV] = "BLKDEV", + [BTRFS_FT_FIFO] = "FIFO", + [BTRFS_FT_SOCK] = "SOCK", + [BTRFS_FT_SYMLINK] = "SYMLINK", + [BTRFS_FT_XATTR] = "XATTR" }; - struct btrfs_inode_item inode; - const char *name = (const char *) (item + 1); - char filetime[32], *target = NULL; + u8 type = btrfs_dir_type(eb, di); + char namebuf[BTRFS_NAME_LEN]; + char *target = NULL; + char filetime[32]; time_t mtime; + int ret; - if (btrfs_lookup_inode(root, &item->location, &inode, NULL)) { - printf("%s: Cannot find inode item for directory entry %.*s!\n", - __func__, item->name_len, name); - return 0; + btrfs_dir_item_key_to_cpu(eb, di, &key); + + if (key.type == BTRFS_ROOT_ITEM_KEY) { + struct btrfs_root *subvol; + + /* It's a subvolume, get its mtime from root item */ + subvol = btrfs_read_fs_root(fs_info, &key); + if (IS_ERR(subvol)) { + ret = PTR_ERR(subvol); + error("Can't find root %llu", key.objectid); + return ret; + } + mtime = btrfs_stack_timespec_sec(&subvol->root_item.otime); + } else { + struct btrfs_path path; + + /* It's regular inode, get its mtime from inode item */ + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret > 0) + ret = -ENOENT; + if (ret < 0) { + error("Can't find inode %llu", key.objectid); + btrfs_release_path(&path); + return ret; + } + read_extent_buffer(path.nodes[0], &ii, + btrfs_item_ptr_offset(path.nodes[0], path.slots[0]), + sizeof(ii)); + btrfs_release_path(&path); + mtime = btrfs_stack_timespec_sec(&ii.mtime); } - - mtime = inode.mtime.sec; ctime_r(&mtime, filetime); - if (item->type == BTRFS_FT_SYMLINK) { - target = malloc(min(inode.size + 1, - (u64) btrfs_info.sb.sectorsize)); - - if (target && btrfs_readlink(root, item->location.objectid, - target)) { - free(target); - target = NULL; + if (type == BTRFS_FT_SYMLINK) { + target = malloc(fs_info->sectorsize); + if (!target) { + error("Can't alloc memory for symlink %llu", + key.objectid); + return -ENOMEM; } - - if (!target) - printf("%s: Cannot read symlink target!\n", __func__); + ret = btrfs_readlink(root, key.objectid, target); + if (ret < 0) { + error("Failed to read symlink %llu", key.objectid); + goto out; + } + target[ret] = '\0'; } - printf("<%s> ", typestr[item->type]); - if (item->type == BTRFS_FT_CHRDEV || item->type == BTRFS_FT_BLKDEV) - printf("%4u,%5u ", (unsigned int) (inode.rdev >> 20), - (unsigned int) (inode.rdev & 0xfffff)); + if (type < ARRAY_SIZE(dir_item_str) && dir_item_str[type]) + printf("<%s> ", dir_item_str[type]); else - printf("%10llu ", inode.size); - - printf("%24.24s %.*s", filetime, item->name_len, name); - - if (item->type == BTRFS_FT_SYMLINK) { - printf(" -> %s", target ? target : "?"); - if (target) - free(target); + printf("DIR_ITEM.%u", type); + if (type == BTRFS_FT_CHRDEV || type == BTRFS_FT_BLKDEV) { + ASSERT(key.type == BTRFS_INODE_ITEM_KEY); + printf("%4llu,%5llu ", btrfs_stack_inode_rdev(&ii) >> 20, + btrfs_stack_inode_rdev(&ii) & 0xfffff); + } else { + if (key.type == BTRFS_INODE_ITEM_KEY) + printf("%10llu ", btrfs_stack_inode_size(&ii)); + else + printf("%10llu ", 0ULL); } + read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), + btrfs_dir_name_len(eb, di)); + printf("%24.24s %.*s", filetime, btrfs_dir_name_len(eb, di), namebuf); + if (type == BTRFS_FT_SYMLINK) + printf(" -> %s", target ? target : "?"); printf("\n"); - - return 0; +out: + free(target); + return ret; } int btrfs_probe(struct blk_desc *fs_dev_desc, struct disk_partition *fs_partition) { - btrfs_blk_desc = fs_dev_desc; - btrfs_part_info = fs_partition; - - memset(&btrfs_info, 0, sizeof(btrfs_info)); + struct btrfs_fs_info *fs_info; + int ret = -1; btrfs_hash_init(); - if (btrfs_read_superblock()) - return -1; - - if (btrfs_chunk_map_init()) { - printf("%s: failed to init chunk map\n", __func__); - return -1; + fs_info = open_ctree_fs_info(fs_dev_desc, fs_partition); + if (fs_info) { + current_fs_info = fs_info; + ret = 0; } - - btrfs_info.tree_root.objectid = 0; - btrfs_info.tree_root.bytenr = btrfs_info.sb.root; - btrfs_info.chunk_root.objectid = 0; - btrfs_info.chunk_root.bytenr = btrfs_info.sb.chunk_root; - - if (btrfs_read_chunk_tree()) { - printf("%s: failed to read chunk tree\n", __func__); - return -1; - } - - if (btrfs_find_root(btrfs_get_default_subvol_objectid(), - &btrfs_info.fs_root, NULL)) { - printf("%s: failed to find default subvolume\n", __func__); - return -1; - } - - return 0; + return ret; } int btrfs_ls(const char *path) { - struct btrfs_root root = btrfs_info.fs_root; - u64 inr; + struct btrfs_fs_info *fs_info = current_fs_info; + struct btrfs_root *root = fs_info->fs_root; + u64 ino = BTRFS_FIRST_FREE_OBJECTID; u8 type; + int ret; - inr = btrfs_lookup_path(&root, root.root_dirid, path, &type, NULL, 40); - - if (inr == -1ULL) { + ASSERT(fs_info); + ret = btrfs_lookup_path(fs_info->fs_root, BTRFS_FIRST_FREE_OBJECTID, + path, &root, &ino, &type, 40); + if (ret < 0) { printf("Cannot lookup path %s\n", path); - return -1; + return ret; } if (type != BTRFS_FT_DIR) { - printf("Not a directory: %s\n", path); - return -1; + error("Not a directory: %s", path); + return -ENOENT; } - - if (btrfs_readdir(&root, inr, readdir_callback)) { - printf("An error occured while listing directory %s\n", path); - return -1; + ret = btrfs_iter_dir(root, ino, show_dir); + if (ret < 0) { + error("An error occured while listing directory %s", path); + return ret; } - return 0; } int btrfs_exists(const char *file) { - struct btrfs_root root = btrfs_info.fs_root; - u64 inr; + struct btrfs_fs_info *fs_info = current_fs_info; + struct btrfs_root *root; + u64 ino; u8 type; + int ret; - inr = btrfs_lookup_path(&root, root.root_dirid, file, &type, NULL, 40); + ASSERT(fs_info); - return (inr != -1ULL && type == BTRFS_FT_REG_FILE); + ret = btrfs_lookup_path(fs_info->fs_root, BTRFS_FIRST_FREE_OBJECTID, + file, &root, &ino, &type, 40); + if (ret < 0) + return 0; + + if (type == BTRFS_FT_REG_FILE) + return 1; + return 0; } int btrfs_size(const char *file, loff_t *size) { - struct btrfs_root root = btrfs_info.fs_root; - struct btrfs_inode_item inode; - u64 inr; + struct btrfs_fs_info *fs_info = current_fs_info; + struct btrfs_inode_item *ii; + struct btrfs_root *root; + struct btrfs_path path; + struct btrfs_key key; + u64 ino; u8 type; + int ret; - inr = btrfs_lookup_path(&root, root.root_dirid, file, &type, &inode, - 40); - - if (inr == -1ULL) { + ret = btrfs_lookup_path(fs_info->fs_root, BTRFS_FIRST_FREE_OBJECTID, + file, &root, &ino, &type, 40); + if (ret < 0) { printf("Cannot lookup file %s\n", file); - return -1; + return ret; } - if (type != BTRFS_FT_REG_FILE) { printf("Not a regular file: %s\n", file); - return -1; + return -ENOENT; } + btrfs_init_path(&path); + key.objectid = ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; - *size = inode.size; - return 0; + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) { + printf("Cannot lookup ino %llu\n", ino); + return ret; + } + if (ret > 0) { + printf("Ino %llu does not exist\n", ino); + ret = -ENOENT; + goto out; + } + ii = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_inode_item); + *size = btrfs_inode_size(path.nodes[0], ii); +out: + btrfs_release_path(&path); + return ret; } int btrfs_read(const char *file, void *buf, loff_t offset, loff_t len, loff_t *actread) { - struct btrfs_root root = btrfs_info.fs_root; - struct btrfs_inode_item inode; - u64 inr, rd; + struct btrfs_fs_info *fs_info = current_fs_info; + struct btrfs_root *root; + loff_t real_size = 0; + u64 ino; u8 type; + int ret; - inr = btrfs_lookup_path(&root, root.root_dirid, file, &type, &inode, - 40); - - if (inr == -1ULL) { - printf("Cannot lookup file %s\n", file); - return -1; + ASSERT(fs_info); + ret = btrfs_lookup_path(fs_info->fs_root, BTRFS_FIRST_FREE_OBJECTID, + file, &root, &ino, &type, 40); + if (ret < 0) { + error("Cannot lookup file %s", file); + return ret; } if (type != BTRFS_FT_REG_FILE) { - printf("Not a regular file: %s\n", file); - return -1; + error("Not a regular file: %s", file); + return -EINVAL; } - if (!len) - len = inode.size; - - if (len > inode.size - offset) - len = inode.size - offset; - - rd = btrfs_file_read(&root, inr, offset, len, buf); - if (rd == -1ULL) { - printf("An error occured while reading file %s\n", file); - return -1; + if (!len) { + ret = btrfs_size(file, &real_size); + if (ret < 0) { + error("Failed to get inode size: %s", file); + return ret; + } + len = real_size; } - *actread = rd; + if (len > real_size - offset) + len = real_size - offset; + + ret = btrfs_file_read(root, ino, offset, len, buf); + if (ret < 0) { + error("An error occured while reading file %s", file); + return ret; + } + + *actread = len; return 0; } void btrfs_close(void) { - btrfs_chunk_map_exit(); + if (current_fs_info) { + close_ctree_fs_info(current_fs_info); + current_fs_info = NULL; + } } int btrfs_uuid(char *uuid_str) { #ifdef CONFIG_LIB_UUID - uuid_bin_to_str(btrfs_info.sb.fsid, uuid_str, UUID_STR_FORMAT_STD); + if (current_fs_info) + uuid_bin_to_str(current_fs_info->super_copy->fsid, uuid_str, + UUID_STR_FORMAT_STD); return 0; #endif return -ENOSYS; diff --git a/fs/btrfs/btrfs.h b/fs/btrfs/btrfs.h index 25a8cf6a87..7d8b395b26 100644 --- a/fs/btrfs/btrfs.h +++ b/fs/btrfs/btrfs.h @@ -11,77 +11,18 @@ #include #include "conv-funcs.h" -struct btrfs_info { - struct btrfs_super_block sb; - - struct btrfs_root tree_root; - struct btrfs_root fs_root; - struct btrfs_root chunk_root; - - struct rb_root chunks_root; -}; - extern struct btrfs_info btrfs_info; - -/* hash.c */ -void btrfs_hash_init(void); -u32 btrfs_crc32c(u32, const void *, size_t); -u32 btrfs_csum_data(char *, u32, size_t); -void btrfs_csum_final(u32, void *); - -static inline u64 btrfs_name_hash(const char *name, int len) -{ - return btrfs_crc32c((u32) ~1, name, len); -} - -/* dev.c */ -extern struct blk_desc *btrfs_blk_desc; -extern struct disk_partition *btrfs_part_info; - -int btrfs_devread(u64, int, void *); - -/* chunk-map.c */ -u64 btrfs_map_logical_to_physical(u64); -int btrfs_chunk_map_init(void); -void btrfs_chunk_map_exit(void); -int btrfs_read_chunk_tree(void); +extern struct btrfs_fs_info *current_fs_info; /* compression.c */ u32 btrfs_decompress(u8 type, const char *, u32, char *, u32); -/* super.c */ -int btrfs_read_superblock(void); - -/* dir-item.c */ -typedef int (*btrfs_readdir_callback_t)(const struct btrfs_root *, - struct btrfs_dir_item *); - -int btrfs_lookup_dir_item(const struct btrfs_root *, u64, const char *, int, - struct btrfs_dir_item *); -int btrfs_readdir(const struct btrfs_root *, u64, btrfs_readdir_callback_t); - -/* root.c */ -int btrfs_find_root(u64, struct btrfs_root *, struct btrfs_root_item *); -u64 btrfs_lookup_root_ref(u64, struct btrfs_root_ref *, char *); - /* inode.c */ -u64 btrfs_lookup_inode_ref(struct btrfs_root *, u64, struct btrfs_inode_ref *, - char *); -int btrfs_lookup_inode(const struct btrfs_root *, struct btrfs_key *, - struct btrfs_inode_item *, struct btrfs_root *); -int btrfs_readlink(const struct btrfs_root *, u64, char *); -u64 btrfs_lookup_path(struct btrfs_root *, u64, const char *, u8 *, - struct btrfs_inode_item *, int); -u64 btrfs_file_read(const struct btrfs_root *, u64, u64, u64, char *); +int btrfs_readlink(struct btrfs_root *root, u64 ino, char *target); +int btrfs_file_read(struct btrfs_root *root, u64 ino, u64 file_offset, u64 len, + char *dest); /* subvolume.c */ u64 btrfs_get_default_subvol_objectid(void); -/* extent-io.c */ -u64 btrfs_read_extent_inline(struct btrfs_path *, - struct btrfs_file_extent_item *, u64, u64, - char *); -u64 btrfs_read_extent_reg(struct btrfs_path *, struct btrfs_file_extent_item *, - u64, u64, char *); - #endif /* !__BTRFS_BTRFS_H__ */ diff --git a/fs/btrfs/btrfs_tree.h b/fs/btrfs/btrfs_tree.h deleted file mode 100644 index aa0f3d6c86..0000000000 --- a/fs/btrfs/btrfs_tree.h +++ /dev/null @@ -1,766 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * From linux/include/uapi/linux/btrfs_tree.h - */ - -#ifndef __BTRFS_BTRFS_TREE_H__ -#define __BTRFS_BTRFS_TREE_H__ - -#include - -#define BTRFS_VOL_NAME_MAX 255 -#define BTRFS_NAME_MAX 255 -#define BTRFS_LABEL_SIZE 256 -#define BTRFS_FSID_SIZE 16 -#define BTRFS_UUID_SIZE 16 - -/* - * This header contains the structure definitions and constants used - * by file system objects that can be retrieved using - * the BTRFS_IOC_SEARCH_TREE ioctl. That means basically anything that - * is needed to describe a leaf node's key or item contents. - */ - -/* holds pointers to all of the tree roots */ -#define BTRFS_ROOT_TREE_OBJECTID 1ULL - -/* stores information about which extents are in use, and reference counts */ -#define BTRFS_EXTENT_TREE_OBJECTID 2ULL - -/* - * chunk tree stores translations from logical -> physical block numbering - * the super block points to the chunk tree - */ -#define BTRFS_CHUNK_TREE_OBJECTID 3ULL - -/* - * stores information about which areas of a given device are in use. - * one per device. The tree of tree roots points to the device tree - */ -#define BTRFS_DEV_TREE_OBJECTID 4ULL - -/* one per subvolume, storing files and directories */ -#define BTRFS_FS_TREE_OBJECTID 5ULL - -/* directory objectid inside the root tree */ -#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL - -/* holds checksums of all the data extents */ -#define BTRFS_CSUM_TREE_OBJECTID 7ULL - -/* holds quota configuration and tracking */ -#define BTRFS_QUOTA_TREE_OBJECTID 8ULL - -/* for storing items that use the BTRFS_UUID_KEY* types */ -#define BTRFS_UUID_TREE_OBJECTID 9ULL - -/* tracks free space in block groups. */ -#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL - -/* device stats in the device tree */ -#define BTRFS_DEV_STATS_OBJECTID 0ULL - -/* for storing balance parameters in the root tree */ -#define BTRFS_BALANCE_OBJECTID -4ULL - -/* orhpan objectid for tracking unlinked/truncated files */ -#define BTRFS_ORPHAN_OBJECTID -5ULL - -/* does write ahead logging to speed up fsyncs */ -#define BTRFS_TREE_LOG_OBJECTID -6ULL -#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL - -/* for space balancing */ -#define BTRFS_TREE_RELOC_OBJECTID -8ULL -#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL - -/* - * extent checksums all have this objectid - * this allows them to share the logging tree - * for fsyncs - */ -#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL - -/* For storing free space cache */ -#define BTRFS_FREE_SPACE_OBJECTID -11ULL - -/* - * The inode number assigned to the special inode for storing - * free ino cache - */ -#define BTRFS_FREE_INO_OBJECTID -12ULL - -/* dummy objectid represents multiple objectids */ -#define BTRFS_MULTIPLE_OBJECTIDS -255ULL - -/* - * All files have objectids in this range. - */ -#define BTRFS_FIRST_FREE_OBJECTID 256ULL -#define BTRFS_LAST_FREE_OBJECTID -256ULL -#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL - - -/* - * the device items go into the chunk tree. The key is in the form - * [ 1 BTRFS_DEV_ITEM_KEY device_id ] - */ -#define BTRFS_DEV_ITEMS_OBJECTID 1ULL - -#define BTRFS_BTREE_INODE_OBJECTID 1 - -#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 - -#define BTRFS_DEV_REPLACE_DEVID 0ULL - -/* - * inode items have the data typically returned from stat and store other - * info about object characteristics. There is one for every file and dir in - * the FS - */ -#define BTRFS_INODE_ITEM_KEY 1 -#define BTRFS_INODE_REF_KEY 12 -#define BTRFS_INODE_EXTREF_KEY 13 -#define BTRFS_XATTR_ITEM_KEY 24 -#define BTRFS_ORPHAN_ITEM_KEY 48 -/* reserve 2-15 close to the inode for later flexibility */ - -/* - * dir items are the name -> inode pointers in a directory. There is one - * for every name in a directory. - */ -#define BTRFS_DIR_LOG_ITEM_KEY 60 -#define BTRFS_DIR_LOG_INDEX_KEY 72 -#define BTRFS_DIR_ITEM_KEY 84 -#define BTRFS_DIR_INDEX_KEY 96 -/* - * extent data is for file data - */ -#define BTRFS_EXTENT_DATA_KEY 108 - -/* - * extent csums are stored in a separate tree and hold csums for - * an entire extent on disk. - */ -#define BTRFS_EXTENT_CSUM_KEY 128 - -/* - * root items point to tree roots. They are typically in the root - * tree used by the super block to find all the other trees - */ -#define BTRFS_ROOT_ITEM_KEY 132 - -/* - * root backrefs tie subvols and snapshots to the directory entries that - * reference them - */ -#define BTRFS_ROOT_BACKREF_KEY 144 - -/* - * root refs make a fast index for listing all of the snapshots and - * subvolumes referenced by a given root. They point directly to the - * directory item in the root that references the subvol - */ -#define BTRFS_ROOT_REF_KEY 156 - -/* - * extent items are in the extent map tree. These record which blocks - * are used, and how many references there are to each block - */ -#define BTRFS_EXTENT_ITEM_KEY 168 - -/* - * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know - * the length, so we save the level in key->offset instead of the length. - */ -#define BTRFS_METADATA_ITEM_KEY 169 - -#define BTRFS_TREE_BLOCK_REF_KEY 176 - -#define BTRFS_EXTENT_DATA_REF_KEY 178 - -#define BTRFS_EXTENT_REF_V0_KEY 180 - -#define BTRFS_SHARED_BLOCK_REF_KEY 182 - -#define BTRFS_SHARED_DATA_REF_KEY 184 - -/* - * block groups give us hints into the extent allocation trees. Which - * blocks are free etc etc - */ -#define BTRFS_BLOCK_GROUP_ITEM_KEY 192 - -/* - * Every block group is represented in the free space tree by a free space info - * item, which stores some accounting information. It is keyed on - * (block_group_start, FREE_SPACE_INFO, block_group_length). - */ -#define BTRFS_FREE_SPACE_INFO_KEY 198 - -/* - * A free space extent tracks an extent of space that is free in a block group. - * It is keyed on (start, FREE_SPACE_EXTENT, length). - */ -#define BTRFS_FREE_SPACE_EXTENT_KEY 199 - -/* - * When a block group becomes very fragmented, we convert it to use bitmaps - * instead of extents. A free space bitmap is keyed on - * (start, FREE_SPACE_BITMAP, length); the corresponding item is a bitmap with - * (length / sectorsize) bits. - */ -#define BTRFS_FREE_SPACE_BITMAP_KEY 200 - -#define BTRFS_DEV_EXTENT_KEY 204 -#define BTRFS_DEV_ITEM_KEY 216 -#define BTRFS_CHUNK_ITEM_KEY 228 - -/* - * Records the overall state of the qgroups. - * There's only one instance of this key present, - * (0, BTRFS_QGROUP_STATUS_KEY, 0) - */ -#define BTRFS_QGROUP_STATUS_KEY 240 -/* - * Records the currently used space of the qgroup. - * One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid). - */ -#define BTRFS_QGROUP_INFO_KEY 242 -/* - * Contains the user configured limits for the qgroup. - * One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid). - */ -#define BTRFS_QGROUP_LIMIT_KEY 244 -/* - * Records the child-parent relationship of qgroups. For - * each relation, 2 keys are present: - * (childid, BTRFS_QGROUP_RELATION_KEY, parentid) - * (parentid, BTRFS_QGROUP_RELATION_KEY, childid) - */ -#define BTRFS_QGROUP_RELATION_KEY 246 - -/* - * Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY. - */ -#define BTRFS_BALANCE_ITEM_KEY 248 - -/* - * The key type for tree items that are stored persistently, but do not need to - * exist for extended period of time. The items can exist in any tree. - * - * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data] - * - * Existing items: - * - * - balance status item - * (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0) - */ -#define BTRFS_TEMPORARY_ITEM_KEY 248 - -/* - * Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY - */ -#define BTRFS_DEV_STATS_KEY 249 - -/* - * The key type for tree items that are stored persistently and usually exist - * for a long period, eg. filesystem lifetime. The item kinds can be status - * information, stats or preference values. The item can exist in any tree. - * - * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data] - * - * Existing items: - * - * - device statistics, store IO stats in the device tree, one key for all - * stats - * (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0) - */ -#define BTRFS_PERSISTENT_ITEM_KEY 249 - -/* - * Persistantly stores the device replace state in the device tree. - * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0). - */ -#define BTRFS_DEV_REPLACE_KEY 250 - -/* - * Stores items that allow to quickly map UUIDs to something else. - * These items are part of the filesystem UUID tree. - * The key is built like this: - * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits). - */ -#if BTRFS_UUID_SIZE != 16 -#error "UUID items require BTRFS_UUID_SIZE == 16!" -#endif -#define BTRFS_UUID_KEY_SUBVOL 251 /* for UUIDs assigned to subvols */ -#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252 /* for UUIDs assigned to - * received subvols */ - -/* - * string items are for debugging. They just store a short string of - * data in the FS - */ -#define BTRFS_STRING_ITEM_KEY 253 - - - -/* 32 bytes in various csum fields */ -#define BTRFS_CSUM_SIZE 32 - -/* csum types */ -#define BTRFS_CSUM_TYPE_CRC32 0 - -/* - * flags definitions for directory entry item type - * - * Used by: - * struct btrfs_dir_item.type - */ -#define BTRFS_FT_UNKNOWN 0 -#define BTRFS_FT_REG_FILE 1 -#define BTRFS_FT_DIR 2 -#define BTRFS_FT_CHRDEV 3 -#define BTRFS_FT_BLKDEV 4 -#define BTRFS_FT_FIFO 5 -#define BTRFS_FT_SOCK 6 -#define BTRFS_FT_SYMLINK 7 -#define BTRFS_FT_XATTR 8 -#define BTRFS_FT_MAX 9 - -/* - * The key defines the order in the tree, and so it also defines (optimal) - * block layout. - * - * objectid corresponds to the inode number. - * - * type tells us things about the object, and is a kind of stream selector. - * so for a given inode, keys with type of 1 might refer to the inode data, - * type of 2 may point to file data in the btree and type == 3 may point to - * extents. - * - * offset is the starting byte offset for this key in the stream. - */ - -struct btrfs_key { - __u64 objectid; - __u8 type; - __u64 offset; -} __attribute__ ((__packed__)); - -struct btrfs_dev_item { - /* the internal btrfs device id */ - __u64 devid; - - /* size of the device */ - __u64 total_bytes; - - /* bytes used */ - __u64 bytes_used; - - /* optimal io alignment for this device */ - __u32 io_align; - - /* optimal io width for this device */ - __u32 io_width; - - /* minimal io size for this device */ - __u32 sector_size; - - /* type and info about this device */ - __u64 type; - - /* expected generation for this device */ - __u64 generation; - - /* - * starting byte of this partition on the device, - * to allow for stripe alignment in the future - */ - __u64 start_offset; - - /* grouping information for allocation decisions */ - __u32 dev_group; - - /* seek speed 0-100 where 100 is fastest */ - __u8 seek_speed; - - /* bandwidth 0-100 where 100 is fastest */ - __u8 bandwidth; - - /* btrfs generated uuid for this device */ - __u8 uuid[BTRFS_UUID_SIZE]; - - /* uuid of FS who owns this device */ - __u8 fsid[BTRFS_UUID_SIZE]; -} __attribute__ ((__packed__)); - -struct btrfs_stripe { - __u64 devid; - __u64 offset; - __u8 dev_uuid[BTRFS_UUID_SIZE]; -} __attribute__ ((__packed__)); - -struct btrfs_chunk { - /* size of this chunk in bytes */ - __u64 length; - - /* objectid of the root referencing this chunk */ - __u64 owner; - - __u64 stripe_len; - __u64 type; - - /* optimal io alignment for this chunk */ - __u32 io_align; - - /* optimal io width for this chunk */ - __u32 io_width; - - /* minimal io size for this chunk */ - __u32 sector_size; - - /* 2^16 stripes is quite a lot, a second limit is the size of a single - * item in the btree - */ - __u16 num_stripes; - - /* sub stripes only matter for raid10 */ - __u16 sub_stripes; - struct btrfs_stripe stripe; - /* additional stripes go here */ -} __attribute__ ((__packed__)); - -#define BTRFS_FREE_SPACE_EXTENT 1 -#define BTRFS_FREE_SPACE_BITMAP 2 - -struct btrfs_free_space_entry { - __u64 offset; - __u64 bytes; - __u8 type; -} __attribute__ ((__packed__)); - -struct btrfs_free_space_header { - struct btrfs_key location; - __u64 generation; - __u64 num_entries; - __u64 num_bitmaps; -} __attribute__ ((__packed__)); - -#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) -#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) - -/* Super block flags */ -/* Errors detected */ -#define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) - -#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) -#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) - - -/* - * items in the extent btree are used to record the objectid of the - * owner of the block and the number of references - */ - -struct btrfs_extent_item { - __u64 refs; - __u64 generation; - __u64 flags; -} __attribute__ ((__packed__)); - - -#define BTRFS_EXTENT_FLAG_DATA (1ULL << 0) -#define BTRFS_EXTENT_FLAG_TREE_BLOCK (1ULL << 1) - -/* following flags only apply to tree blocks */ - -/* use full backrefs for extent pointers in the block */ -#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8) - -/* - * this flag is only used internally by scrub and may be changed at any time - * it is only declared here to avoid collisions - */ -#define BTRFS_EXTENT_FLAG_SUPER (1ULL << 48) - -struct btrfs_tree_block_info { - struct btrfs_key key; - __u8 level; -} __attribute__ ((__packed__)); - -struct btrfs_extent_data_ref { - __u64 root; - __u64 objectid; - __u64 offset; - __u32 count; -} __attribute__ ((__packed__)); - -struct btrfs_shared_data_ref { - __u32 count; -} __attribute__ ((__packed__)); - -struct btrfs_extent_inline_ref { - __u8 type; - __u64 offset; -} __attribute__ ((__packed__)); - -/* dev extents record free space on individual devices. The owner - * field points back to the chunk allocation mapping tree that allocated - * the extent. The chunk tree uuid field is a way to double check the owner - */ -struct btrfs_dev_extent { - __u64 chunk_tree; - __u64 chunk_objectid; - __u64 chunk_offset; - __u64 length; - __u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; -} __attribute__ ((__packed__)); - -struct btrfs_inode_ref { - __u64 index; - __u16 name_len; - /* name goes here */ -} __attribute__ ((__packed__)); - -struct btrfs_inode_extref { - __u64 parent_objectid; - __u64 index; - __u16 name_len; - __u8 name[0]; - /* name goes here */ -} __attribute__ ((__packed__)); - -struct btrfs_timespec { - __u64 sec; - __u32 nsec; -} __attribute__ ((__packed__)); - -struct btrfs_inode_item { - /* nfs style generation number */ - __u64 generation; - /* transid that last touched this inode */ - __u64 transid; - __u64 size; - __u64 nbytes; - __u64 block_group; - __u32 nlink; - __u32 uid; - __u32 gid; - __u32 mode; - __u64 rdev; - __u64 flags; - - /* modification sequence number for NFS */ - __u64 sequence; - - /* - * a little future expansion, for more than this we can - * just grow the inode item and version it - */ - __u64 reserved[4]; - struct btrfs_timespec atime; - struct btrfs_timespec ctime; - struct btrfs_timespec mtime; - struct btrfs_timespec otime; -} __attribute__ ((__packed__)); - -struct btrfs_dir_log_item { - __u64 end; -} __attribute__ ((__packed__)); - -struct btrfs_dir_item { - struct btrfs_key location; - __u64 transid; - __u16 data_len; - __u16 name_len; - __u8 type; -} __attribute__ ((__packed__)); - -#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) - -/* - * Internal in-memory flag that a subvolume has been marked for deletion but - * still visible as a directory - */ -#define BTRFS_ROOT_SUBVOL_DEAD (1ULL << 48) - -struct btrfs_root_item { - struct btrfs_inode_item inode; - __u64 generation; - __u64 root_dirid; - __u64 bytenr; - __u64 byte_limit; - __u64 bytes_used; - __u64 last_snapshot; - __u64 flags; - __u32 refs; - struct btrfs_key drop_progress; - __u8 drop_level; - __u8 level; - - /* - * The following fields appear after subvol_uuids+subvol_times - * were introduced. - */ - - /* - * This generation number is used to test if the new fields are valid - * and up to date while reading the root item. Every time the root item - * is written out, the "generation" field is copied into this field. If - * anyone ever mounted the fs with an older kernel, we will have - * mismatching generation values here and thus must invalidate the - * new fields. See btrfs_update_root and btrfs_find_last_root for - * details. - * the offset of generation_v2 is also used as the start for the memset - * when invalidating the fields. - */ - __u64 generation_v2; - __u8 uuid[BTRFS_UUID_SIZE]; - __u8 parent_uuid[BTRFS_UUID_SIZE]; - __u8 received_uuid[BTRFS_UUID_SIZE]; - __u64 ctransid; /* updated when an inode changes */ - __u64 otransid; /* trans when created */ - __u64 stransid; /* trans when sent. non-zero for received subvol */ - __u64 rtransid; /* trans when received. non-zero for received subvol */ - struct btrfs_timespec ctime; - struct btrfs_timespec otime; - struct btrfs_timespec stime; - struct btrfs_timespec rtime; - __u64 reserved[8]; /* for future */ -} __attribute__ ((__packed__)); - -/* - * this is used for both forward and backward root refs - */ -struct btrfs_root_ref { - __u64 dirid; - __u64 sequence; - __u16 name_len; -} __attribute__ ((__packed__)); - -#define BTRFS_FILE_EXTENT_INLINE 0 -#define BTRFS_FILE_EXTENT_REG 1 -#define BTRFS_FILE_EXTENT_PREALLOC 2 - -enum btrfs_compression_type { - BTRFS_COMPRESS_NONE = 0, - BTRFS_COMPRESS_ZLIB = 1, - BTRFS_COMPRESS_LZO = 2, - BTRFS_COMPRESS_ZSTD = 3, - BTRFS_COMPRESS_TYPES = 3, - BTRFS_COMPRESS_LAST = 4, -}; - -struct btrfs_file_extent_item { - /* - * transaction id that created this extent - */ - __u64 generation; - /* - * max number of bytes to hold this extent in ram - * when we split a compressed extent we can't know how big - * each of the resulting pieces will be. So, this is - * an upper limit on the size of the extent in ram instead of - * an exact limit. - */ - __u64 ram_bytes; - - /* - * 32 bits for the various ways we might encode the data, - * including compression and encryption. If any of these - * are set to something a given disk format doesn't understand - * it is treated like an incompat flag for reading and writing, - * but not for stat. - */ - __u8 compression; - __u8 encryption; - __u16 other_encoding; /* spare for later use */ - - /* are we inline data or a real extent? */ - __u8 type; - - /* - * disk space consumed by the extent, checksum blocks are included - * in these numbers - * - * At this offset in the structure, the inline extent data start. - */ - __u64 disk_bytenr; - __u64 disk_num_bytes; - /* - * the logical offset in file blocks (no csums) - * this extent record is for. This allows a file extent to point - * into the middle of an existing extent on disk, sharing it - * between two snapshots (useful if some bytes in the middle of the - * extent have changed - */ - __u64 offset; - /* - * the logical number of file blocks (no csums included). This - * always reflects the size uncompressed and without encoding. - */ - __u64 num_bytes; - -} __attribute__ ((__packed__)); - -struct btrfs_csum_item { - __u8 csum; -} __attribute__ ((__packed__)); - -/* different types of block groups (and chunks) */ -#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) -#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) -#define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2) -#define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3) -#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4) -#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5) -#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) -#define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7) -#define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8) -#define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \ - BTRFS_SPACE_INFO_GLOBAL_RSV) - -enum btrfs_raid_types { - BTRFS_RAID_RAID10, - BTRFS_RAID_RAID1, - BTRFS_RAID_DUP, - BTRFS_RAID_RAID0, - BTRFS_RAID_SINGLE, - BTRFS_RAID_RAID5, - BTRFS_RAID_RAID6, - BTRFS_NR_RAID_TYPES -}; - -#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \ - BTRFS_BLOCK_GROUP_SYSTEM | \ - BTRFS_BLOCK_GROUP_METADATA) - -#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \ - BTRFS_BLOCK_GROUP_RAID1 | \ - BTRFS_BLOCK_GROUP_RAID5 | \ - BTRFS_BLOCK_GROUP_RAID6 | \ - BTRFS_BLOCK_GROUP_DUP | \ - BTRFS_BLOCK_GROUP_RAID10) -#define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \ - BTRFS_BLOCK_GROUP_RAID6) - -/* - * We need a bit for restriper to be able to tell when chunks of type - * SINGLE are available. This "extended" profile format is used in - * fs_info->avail_*_alloc_bits (in-memory) and balance item fields - * (on-disk). The corresponding on-disk bit in chunk.type is reserved - * to avoid remappings between two formats in future. - */ -#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48) - -/* - * A fake block group type that is used to communicate global block reserve - * size to userspace via the SPACE_INFO ioctl. - */ -#define BTRFS_SPACE_INFO_GLOBAL_RSV (1ULL << 49) - -#define BTRFS_EXTENDED_PROFILE_MASK (BTRFS_BLOCK_GROUP_PROFILE_MASK | \ - BTRFS_AVAIL_ALLOC_BIT_SINGLE) - -#endif /* __BTRFS_BTRFS_TREE_H__ */ diff --git a/fs/btrfs/chunk-map.c b/fs/btrfs/chunk-map.c deleted file mode 100644 index 2e5be65067..0000000000 --- a/fs/btrfs/chunk-map.c +++ /dev/null @@ -1,178 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * BTRFS filesystem implementation for U-Boot - * - * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz - */ - -#include "btrfs.h" -#include -#include - -struct chunk_map_item { - struct rb_node node; - u64 logical; - u64 length; - u64 physical; -}; - -static int add_chunk_mapping(struct btrfs_key *key, struct btrfs_chunk *chunk) -{ - struct btrfs_stripe *stripe; - u64 block_profile = chunk->type & BTRFS_BLOCK_GROUP_PROFILE_MASK; - struct rb_node **new = &(btrfs_info.chunks_root.rb_node), *prnt = NULL; - struct chunk_map_item *map_item; - - if (block_profile && block_profile != BTRFS_BLOCK_GROUP_DUP) { - printf("%s: unsupported chunk profile %llu\n", __func__, - block_profile); - return -1; - } else if (!chunk->length) { - printf("%s: zero length chunk\n", __func__); - return -1; - } - - stripe = &chunk->stripe; - btrfs_stripe_to_cpu(stripe); - - while (*new) { - struct chunk_map_item *this; - - this = rb_entry(*new, struct chunk_map_item, node); - - prnt = *new; - if (key->offset < this->logical) { - new = &((*new)->rb_left); - } else if (key->offset > this->logical) { - new = &((*new)->rb_right); - } else { - debug("%s: Logical address %llu already in map!\n", - __func__, key->offset); - return 0; - } - } - - map_item = malloc(sizeof(struct chunk_map_item)); - if (!map_item) - return -1; - - map_item->logical = key->offset; - map_item->length = chunk->length; - map_item->physical = le64_to_cpu(chunk->stripe.offset); - rb_link_node(&map_item->node, prnt, new); - rb_insert_color(&map_item->node, &btrfs_info.chunks_root); - - debug("%s: Mapping %llu to %llu\n", __func__, map_item->logical, - map_item->physical); - - return 0; -} - -u64 btrfs_map_logical_to_physical(u64 logical) -{ - struct rb_node *node = btrfs_info.chunks_root.rb_node; - - while (node) { - struct chunk_map_item *item; - - item = rb_entry(node, struct chunk_map_item, node); - - if (item->logical > logical) - node = node->rb_left; - else if (logical >= item->logical + item->length) - node = node->rb_right; - else - return item->physical + logical - item->logical; - } - - printf("%s: Cannot map logical address %llu to physical\n", __func__, - logical); - - return -1ULL; -} - -void btrfs_chunk_map_exit(void) -{ - struct rb_node *now, *next; - struct chunk_map_item *item; - - for (now = rb_first_postorder(&btrfs_info.chunks_root); now; now = next) - { - item = rb_entry(now, struct chunk_map_item, node); - next = rb_next_postorder(now); - free(item); - } -} - -int btrfs_chunk_map_init(void) -{ - u8 sys_chunk_array_copy[sizeof(btrfs_info.sb.sys_chunk_array)]; - u8 * const start = sys_chunk_array_copy; - u8 * const end = start + btrfs_info.sb.sys_chunk_array_size; - u8 *cur; - struct btrfs_key *key; - struct btrfs_chunk *chunk; - - btrfs_info.chunks_root = RB_ROOT; - - memcpy(sys_chunk_array_copy, btrfs_info.sb.sys_chunk_array, - sizeof(sys_chunk_array_copy)); - - for (cur = start; cur < end;) { - key = (struct btrfs_key *) cur; - cur += sizeof(struct btrfs_key); - chunk = (struct btrfs_chunk *) cur; - - btrfs_key_to_cpu(key); - btrfs_chunk_to_cpu(chunk); - - if (key->type != BTRFS_CHUNK_ITEM_KEY) { - printf("%s: invalid key type %u\n", __func__, - key->type); - return -1; - } - - if (add_chunk_mapping(key, chunk)) - return -1; - - cur += sizeof(struct btrfs_chunk); - cur += sizeof(struct btrfs_stripe) * (chunk->num_stripes - 1); - } - - return 0; -} - -int btrfs_read_chunk_tree(void) -{ - struct btrfs_path path; - struct btrfs_key key, *found_key; - struct btrfs_chunk *chunk; - int res = 0; - - key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; - key.type = BTRFS_CHUNK_ITEM_KEY; - key.offset = 0; - - if (btrfs_search_tree(&btrfs_info.chunk_root, &key, &path)) - return -1; - - do { - found_key = btrfs_path_leaf_key(&path); - if (btrfs_comp_keys_type(&key, found_key)) - continue; - - chunk = btrfs_path_item_ptr(&path, struct btrfs_chunk); - btrfs_chunk_to_cpu(chunk); - if (add_chunk_mapping(found_key, chunk)) { - res = -1; - break; - } - } while (!(res = btrfs_next_slot(&path))); - - btrfs_free_path(&path); - - if (res < 0) - return -1; - - return 0; -} diff --git a/fs/btrfs/common/rbtree-utils.c b/fs/btrfs/common/rbtree-utils.c new file mode 100644 index 0000000000..7a7d7e84e6 --- /dev/null +++ b/fs/btrfs/common/rbtree-utils.c @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2014 Facebook. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include "rbtree-utils.h" + +int rb_insert(struct rb_root *root, struct rb_node *node, + rb_compare_nodes comp) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + int ret; + + while(*p) { + parent = *p; + + ret = comp(parent, node); + if (ret < 0) + p = &(*p)->rb_left; + else if (ret > 0) + p = &(*p)->rb_right; + else + return -EEXIST; + } + + rb_link_node(node, parent, p); + rb_insert_color(node, root); + return 0; +} + +struct rb_node *rb_search(struct rb_root *root, void *key, rb_compare_keys comp, + struct rb_node **next_ret) +{ + struct rb_node *n = root->rb_node; + struct rb_node *parent = NULL; + int ret = 0; + + while(n) { + parent = n; + + ret = comp(n, key); + if (ret < 0) + n = n->rb_left; + else if (ret > 0) + n = n->rb_right; + else + return n; + } + + if (!next_ret) + return NULL; + + if (parent && ret > 0) + parent = rb_next(parent); + + *next_ret = parent; + return NULL; +} + +void rb_free_nodes(struct rb_root *root, rb_free_node free_node) +{ + struct rb_node *node; + + while ((node = rb_first(root))) { + rb_erase(node, root); + free_node(node); + } +} diff --git a/fs/btrfs/common/rbtree-utils.h b/fs/btrfs/common/rbtree-utils.h new file mode 100644 index 0000000000..d977cfd955 --- /dev/null +++ b/fs/btrfs/common/rbtree-utils.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2014 Facebook. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __RBTREE_UTILS__ +#define __RBTREE_UTILS__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* The common insert/search/free functions */ +typedef int (*rb_compare_nodes)(struct rb_node *node1, struct rb_node *node2); +typedef int (*rb_compare_keys)(struct rb_node *node, void *key); +typedef void (*rb_free_node)(struct rb_node *node); + +int rb_insert(struct rb_root *root, struct rb_node *node, + rb_compare_nodes comp); +/* + * In some cases, we need return the next node if we don't find the node we + * specify. At this time, we can use next_ret. + */ +struct rb_node *rb_search(struct rb_root *root, void *key, rb_compare_keys comp, + struct rb_node **next_ret); +void rb_free_nodes(struct rb_root *root, rb_free_node free_node); + +#define FREE_RB_BASED_TREE(name, free_func) \ +static void free_##name##_tree(struct rb_root *root) \ +{ \ + rb_free_nodes(root, free_func); \ +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h new file mode 100644 index 0000000000..9cf8a10c76 --- /dev/null +++ b/fs/btrfs/compat.h @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#ifndef __BTRFS_COMPAT_H__ +#define __BTRFS_COMPAT_H__ + +#include +#include +#include + +/* Provide a compatibility layer to make code syncing easier */ + +/* A simple wraper to for error() used in btrfs-progs */ +#define error(fmt, ...) pr_err("BTRFS: " fmt "\n", ##__VA_ARGS__) + +#define ASSERT(c) assert(c) + +#define BTRFS_UUID_UNPARSED_SIZE 37 + +/* No so have to define it here */ +#define XATTR_NAME_MAX 255 +#define PATH_MAX 4096 + +/* + * Macros to generate set/get funcs for the struct fields + * assume there is a lefoo_to_cpu for every type, so lets make a simple + * one for u8: + */ +#define le8_to_cpu(v) (v) +#define cpu_to_le8(v) (v) +#define __le8 u8 + +/* + * Macros to generate set/get funcs for the struct fields + * assume there is a lefoo_to_cpu for every type, so lets make a simple + * one for u8: + */ +#define le8_to_cpu(v) (v) +#define cpu_to_le8(v) (v) +#define __le8 u8 + +#define get_unaligned_le8(p) (*((u8 *)(p))) +#define get_unaligned_8(p) (*((u8 *)(p))) +#define put_unaligned_le8(val,p) ((*((u8 *)(p))) = (val)) +#define put_unaligned_8(val,p) ((*((u8 *)(p))) = (val)) + +/* + * Read data from device specified by @desc and @part + * + * U-boot equivalent of pread(). + * + * Return the bytes of data read. + * Return <0 for error. + */ +static inline int __btrfs_devread(struct blk_desc *desc, + struct disk_partition *part, + void *buf, size_t size, u64 offset) +{ + lbaint_t sector; + int byte_offset; + int ret; + + sector = offset >> desc->log2blksz; + byte_offset = offset % desc->blksz; + + /* fs_devread() return 0 for error, >0 for success */ + ret = fs_devread(desc, part, sector, byte_offset, size, buf); + if (!ret) + return -EIO; + return size; +} + +static inline void uuid_unparse(const u8 *uuid, char *out) +{ + return uuid_bin_to_str((unsigned char *)uuid, out, 0); +} + +static inline int is_power_of_2(unsigned long n) +{ + return (n != 0 && ((n & (n - 1)) == 0)); +} + +#endif diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 59e4a94cb2..23efefa199 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -115,7 +115,7 @@ static u32 decompress_zlib(const u8 *_cbuf, u32 clen, u8 *dbuf, u32 dlen) while (stream.total_in < clen) { stream.next_in = cbuf + stream.total_in; stream.avail_in = min((u32) (clen - stream.total_in), - (u32) btrfs_info.sb.sectorsize); + current_fs_info->sectorsize); ret = inflate(&stream, Z_NO_FLUSH); if (ret != Z_OK) diff --git a/fs/btrfs/crypto/hash.c b/fs/btrfs/crypto/hash.c new file mode 100644 index 0000000000..fb51f6386c --- /dev/null +++ b/fs/btrfs/crypto/hash.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include +#include +#include +#include +#include + +static u32 btrfs_crc32c_table[256]; + +void btrfs_hash_init(void) +{ + static int inited = 0; + + if (!inited) { + crc32c_init(btrfs_crc32c_table, 0x82F63B78); + inited = 1; + } +} + +int hash_sha256(const u8 *buf, size_t length, u8 *out) +{ + sha256_context ctx; + + sha256_starts(&ctx); + sha256_update(&ctx, buf, length); + sha256_finish(&ctx, out); + + return 0; +} + +int hash_xxhash(const u8 *buf, size_t length, u8 *out) +{ + u64 hash; + + hash = xxh64(buf, length, 0); + put_unaligned_le64(hash, out); + + return 0; +} + +int hash_crc32c(const u8 *buf, size_t length, u8 *out) +{ + u32 crc; + + crc = crc32c_cal((u32)~0, (char *)buf, length, btrfs_crc32c_table); + put_unaligned_le32(~crc, out); + + return 0; +} + +u32 crc32c(u32 seed, const void * data, size_t len) +{ + return crc32c_cal(seed, data, len, btrfs_crc32c_table); +} diff --git a/fs/btrfs/crypto/hash.h b/fs/btrfs/crypto/hash.h new file mode 100644 index 0000000000..d1ba1fa374 --- /dev/null +++ b/fs/btrfs/crypto/hash.h @@ -0,0 +1,17 @@ +#ifndef CRYPTO_HASH_H +#define CRYPTO_HASH_H + +#include + +#define CRYPTO_HASH_SIZE_MAX 32 + +void btrfs_hash_init(void); +int hash_crc32c(const u8 *buf, size_t length, u8 *out); +int hash_xxhash(const u8 *buf, size_t length, u8 *out); +int hash_sha256(const u8 *buf, size_t length, u8 *out); + +u32 crc32c(u32 seed, const void * data, size_t len); + +/* Blake2B is not yet supported due to lack of library */ + +#endif diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 28f98d43ad..5ffced9160 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -5,287 +5,738 @@ * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz */ -#include "btrfs.h" +#include #include #include #include +#include "btrfs.h" +#include "disk-io.h" -int btrfs_comp_keys(struct btrfs_key *a, struct btrfs_key *b) +static const struct btrfs_csum { + u16 size; + const char name[14]; +} btrfs_csums[] = { + [BTRFS_CSUM_TYPE_CRC32] = { 4, "crc32c" }, + [BTRFS_CSUM_TYPE_XXHASH] = { 8, "xxhash64" }, + [BTRFS_CSUM_TYPE_SHA256] = { 32, "sha256" }, + [BTRFS_CSUM_TYPE_BLAKE2] = { 32, "blake2" }, +}; + +u16 btrfs_super_csum_size(const struct btrfs_super_block *sb) { - if (a->objectid > b->objectid) - return 1; - if (a->objectid < b->objectid) - return -1; - if (a->type > b->type) - return 1; - if (a->type < b->type) - return -1; - if (a->offset > b->offset) - return 1; - if (a->offset < b->offset) - return -1; - return 0; + const u16 csum_type = btrfs_super_csum_type(sb); + + return btrfs_csums[csum_type].size; } -int btrfs_comp_keys_type(struct btrfs_key *a, struct btrfs_key *b) +const char *btrfs_super_csum_name(u16 csum_type) { - if (a->objectid > b->objectid) - return 1; - if (a->objectid < b->objectid) - return -1; - if (a->type > b->type) - return 1; - if (a->type < b->type) - return -1; - return 0; + return btrfs_csums[csum_type].name; } -static int generic_bin_search(void *addr, int item_size, struct btrfs_key *key, - int max, int *slot) +size_t btrfs_super_num_csums(void) { - int low = 0, high = max, mid, ret; - struct btrfs_key *tmp; - - while (low < high) { - mid = (low + high) / 2; - - tmp = (struct btrfs_key *) ((u8 *) addr + mid*item_size); - ret = btrfs_comp_keys(tmp, key); - - if (ret < 0) { - low = mid + 1; - } else if (ret > 0) { - high = mid; - } else { - *slot = mid; - return 0; - } - } - - *slot = low; - return 1; + return ARRAY_SIZE(btrfs_csums); } -int btrfs_bin_search(union btrfs_tree_node *p, struct btrfs_key *key, - int *slot) +u16 btrfs_csum_type_size(u16 csum_type) { - void *addr; - unsigned long size; - - if (p->header.level) { - addr = p->node.ptrs; - size = sizeof(struct btrfs_key_ptr); - } else { - addr = p->leaf.items; - size = sizeof(struct btrfs_item); - } - - return generic_bin_search(addr, size, key, p->header.nritems, slot); + return btrfs_csums[csum_type].size; } -static void clear_path(struct btrfs_path *p) +struct btrfs_path *btrfs_alloc_path(void) { - int i; - - for (i = 0; i < BTRFS_MAX_LEVEL; ++i) { - p->nodes[i] = NULL; - p->slots[i] = 0; - } + struct btrfs_path *path; + path = kzalloc(sizeof(struct btrfs_path), GFP_NOFS); + return path; } void btrfs_free_path(struct btrfs_path *p) +{ + if (!p) + return; + btrfs_release_path(p); + kfree(p); +} + +void btrfs_release_path(struct btrfs_path *p) { int i; - - for (i = 0; i < BTRFS_MAX_LEVEL; ++i) { - if (p->nodes[i]) - free(p->nodes[i]); + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { + if (!p->nodes[i]) + continue; + free_extent_buffer(p->nodes[i]); } - - clear_path(p); + memset(p, 0, sizeof(*p)); } -static int read_tree_node(u64 physical, union btrfs_tree_node **buf) +int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2) { - ALLOC_CACHE_ALIGN_BUFFER(struct btrfs_header, hdr, - sizeof(struct btrfs_header)); - unsigned long size, offset = sizeof(*hdr); - union btrfs_tree_node *res; - u32 i; - - if (!btrfs_devread(physical, sizeof(*hdr), hdr)) + if (k1->objectid > k2->objectid) + return 1; + if (k1->objectid < k2->objectid) return -1; - - btrfs_header_to_cpu(hdr); - - if (hdr->level) - size = sizeof(struct btrfs_node) - + hdr->nritems * sizeof(struct btrfs_key_ptr); - else - size = btrfs_info.sb.nodesize; - - res = malloc_cache_aligned(size); - if (!res) { - debug("%s: malloc failed\n", __func__); + if (k1->type > k2->type) + return 1; + if (k1->type < k2->type) return -1; - } - - if (!btrfs_devread(physical + offset, size - offset, - ((u8 *) res) + offset)) { - free(res); + if (k1->offset > k2->offset) + return 1; + if (k1->offset < k2->offset) return -1; - } - - memcpy(&res->header, hdr, sizeof(*hdr)); - if (hdr->level) - for (i = 0; i < hdr->nritems; ++i) - btrfs_key_ptr_to_cpu(&res->node.ptrs[i]); - else - for (i = 0; i < hdr->nritems; ++i) - btrfs_item_to_cpu(&res->leaf.items[i]); - - *buf = res; - return 0; } -int btrfs_search_tree(const struct btrfs_root *root, struct btrfs_key *key, - struct btrfs_path *p) +static int btrfs_comp_keys(struct btrfs_disk_key *disk, + const struct btrfs_key *k2) { - u8 lvl, prev_lvl; - int i, slot, ret; - u64 logical, physical; - union btrfs_tree_node *buf; + struct btrfs_key k1; - clear_path(p); + btrfs_disk_key_to_cpu(&k1, disk); + return btrfs_comp_cpu_keys(&k1, k2); +} - logical = root->bytenr; +enum btrfs_tree_block_status +btrfs_check_node(struct btrfs_fs_info *fs_info, + struct btrfs_disk_key *parent_key, struct extent_buffer *buf) +{ + int i; + struct btrfs_key cpukey; + struct btrfs_disk_key key; + u32 nritems = btrfs_header_nritems(buf); + enum btrfs_tree_block_status ret = BTRFS_TREE_BLOCK_INVALID_NRITEMS; - for (i = 0; i < BTRFS_MAX_LEVEL; ++i) { - physical = btrfs_map_logical_to_physical(logical); - if (physical == -1ULL) - goto err; + if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(fs_info)) + goto fail; - if (read_tree_node(physical, &buf)) - goto err; + ret = BTRFS_TREE_BLOCK_INVALID_PARENT_KEY; + if (parent_key && parent_key->type) { + btrfs_node_key(buf, &key, 0); + if (memcmp(parent_key, &key, sizeof(key))) + goto fail; + } + ret = BTRFS_TREE_BLOCK_BAD_KEY_ORDER; + for (i = 0; nritems > 1 && i < nritems - 2; i++) { + btrfs_node_key(buf, &key, i); + btrfs_node_key_to_cpu(buf, &cpukey, i + 1); + if (btrfs_comp_keys(&key, &cpukey) >= 0) + goto fail; + } + return BTRFS_TREE_BLOCK_CLEAN; +fail: + return ret; +} - lvl = buf->header.level; - if (i && prev_lvl != lvl + 1) { - printf("%s: invalid level in header at %llu\n", - __func__, logical); - goto err; +enum btrfs_tree_block_status +btrfs_check_leaf(struct btrfs_fs_info *fs_info, + struct btrfs_disk_key *parent_key, struct extent_buffer *buf) +{ + int i; + struct btrfs_key cpukey; + struct btrfs_disk_key key; + u32 nritems = btrfs_header_nritems(buf); + enum btrfs_tree_block_status ret = BTRFS_TREE_BLOCK_INVALID_NRITEMS; + + if (nritems * sizeof(struct btrfs_item) > buf->len) { + fprintf(stderr, "invalid number of items %llu\n", + (unsigned long long)buf->start); + goto fail; + } + + if (btrfs_header_level(buf) != 0) { + ret = BTRFS_TREE_BLOCK_INVALID_LEVEL; + fprintf(stderr, "leaf is not a leaf %llu\n", + (unsigned long long)btrfs_header_bytenr(buf)); + goto fail; + } + if (btrfs_leaf_free_space(buf) < 0) { + ret = BTRFS_TREE_BLOCK_INVALID_FREE_SPACE; + fprintf(stderr, "leaf free space incorrect %llu %d\n", + (unsigned long long)btrfs_header_bytenr(buf), + btrfs_leaf_free_space(buf)); + goto fail; + } + + if (nritems == 0) + return BTRFS_TREE_BLOCK_CLEAN; + + btrfs_item_key(buf, &key, 0); + if (parent_key && parent_key->type && + memcmp(parent_key, &key, sizeof(key))) { + ret = BTRFS_TREE_BLOCK_INVALID_PARENT_KEY; + fprintf(stderr, "leaf parent key incorrect %llu\n", + (unsigned long long)btrfs_header_bytenr(buf)); + goto fail; + } + for (i = 0; nritems > 1 && i < nritems - 1; i++) { + btrfs_item_key(buf, &key, i); + btrfs_item_key_to_cpu(buf, &cpukey, i + 1); + if (btrfs_comp_keys(&key, &cpukey) >= 0) { + ret = BTRFS_TREE_BLOCK_BAD_KEY_ORDER; + fprintf(stderr, "bad key ordering %d %d\n", i, i+1); + goto fail; } - prev_lvl = lvl; + if (btrfs_item_offset_nr(buf, i) != + btrfs_item_end_nr(buf, i + 1)) { + ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS; + fprintf(stderr, "incorrect offsets %u %u\n", + btrfs_item_offset_nr(buf, i), + btrfs_item_end_nr(buf, i + 1)); + goto fail; + } + if (i == 0 && btrfs_item_end_nr(buf, i) != + BTRFS_LEAF_DATA_SIZE(fs_info)) { + ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS; + fprintf(stderr, "bad item end %u wanted %u\n", + btrfs_item_end_nr(buf, i), + (unsigned)BTRFS_LEAF_DATA_SIZE(fs_info)); + goto fail; + } + } + + for (i = 0; i < nritems; i++) { + if (btrfs_item_end_nr(buf, i) > + BTRFS_LEAF_DATA_SIZE(fs_info)) { + btrfs_item_key(buf, &key, 0); + ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS; + fprintf(stderr, "slot end outside of leaf %llu > %llu\n", + (unsigned long long)btrfs_item_end_nr(buf, i), + (unsigned long long)BTRFS_LEAF_DATA_SIZE( + fs_info)); + goto fail; + } + } + + return BTRFS_TREE_BLOCK_CLEAN; +fail: + return ret; +} + +static int noinline check_block(struct btrfs_fs_info *fs_info, + struct btrfs_path *path, int level) +{ + struct btrfs_disk_key key; + struct btrfs_disk_key *key_ptr = NULL; + struct extent_buffer *parent; + enum btrfs_tree_block_status ret; + + if (path->nodes[level + 1]) { + parent = path->nodes[level + 1]; + btrfs_node_key(parent, &key, path->slots[level + 1]); + key_ptr = &key; + } + if (level == 0) + ret = btrfs_check_leaf(fs_info, key_ptr, path->nodes[0]); + else + ret = btrfs_check_node(fs_info, key_ptr, path->nodes[level]); + if (ret == BTRFS_TREE_BLOCK_CLEAN) + return 0; + return -EIO; +} + +/* + * search for key in the extent_buffer. The items start at offset p, + * and they are item_size apart. There are 'max' items in p. + * + * the slot in the array is returned via slot, and it points to + * the place where you would insert key if it is not found in + * the array. + * + * slot may point to max if the key is bigger than all of the keys + */ +static int generic_bin_search(struct extent_buffer *eb, unsigned long p, + int item_size, const struct btrfs_key *key, + int max, int *slot) +{ + int low = 0; + int high = max; + int mid; + int ret; + unsigned long offset; + struct btrfs_disk_key *tmp; + + while(low < high) { + mid = (low + high) / 2; + offset = p + mid * item_size; + + tmp = (struct btrfs_disk_key *)(eb->data + offset); + ret = btrfs_comp_keys(tmp, key); - ret = btrfs_bin_search(buf, key, &slot); if (ret < 0) - goto err; - if (ret && slot > 0 && lvl) - slot -= 1; - - p->slots[lvl] = slot; - p->nodes[lvl] = buf; - - if (lvl) { - logical = buf->node.ptrs[slot].blockptr; - } else { - /* - * The path might be invalid if: - * cur leaf max < searched value < next leaf min - * - * Jump to the next valid element if it exists. - */ - if (slot >= buf->header.nritems) - if (btrfs_next_slot(p) < 0) - goto err; - break; + low = mid + 1; + else if (ret > 0) + high = mid; + else { + *slot = mid; + return 0; } } - - return 0; -err: - btrfs_free_path(p); - return -1; + *slot = low; + return 1; } -static int jump_leaf(struct btrfs_path *path, int dir) +/* + * simple bin_search frontend that does the right thing for + * leaves vs nodes + */ +int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key, + int *slot) { - struct btrfs_path p; - u32 slot; - int level = 1, from_level, i; + if (btrfs_header_level(eb) == 0) + return generic_bin_search(eb, + offsetof(struct btrfs_leaf, items), + sizeof(struct btrfs_item), + key, btrfs_header_nritems(eb), + slot); + else + return generic_bin_search(eb, + offsetof(struct btrfs_node, ptrs), + sizeof(struct btrfs_key_ptr), + key, btrfs_header_nritems(eb), + slot); +} - dir = dir >= 0 ? 1 : -1; +struct extent_buffer *read_node_slot(struct btrfs_fs_info *fs_info, + struct extent_buffer *parent, int slot) +{ + struct extent_buffer *ret; + int level = btrfs_header_level(parent); - p = *path; + if (slot < 0) + return NULL; + if (slot >= btrfs_header_nritems(parent)) + return NULL; - while (level < BTRFS_MAX_LEVEL) { - if (!p.nodes[level]) + if (level == 0) + return NULL; + + ret = read_tree_block(fs_info, btrfs_node_blockptr(parent, slot), + btrfs_node_ptr_generation(parent, slot)); + if (!extent_buffer_uptodate(ret)) + return ERR_PTR(-EIO); + + if (btrfs_header_level(ret) != level - 1) { + error("child eb corrupted: parent bytenr=%llu item=%d parent level=%d child level=%d", + btrfs_header_bytenr(parent), slot, + btrfs_header_level(parent), btrfs_header_level(ret)); + free_extent_buffer(ret); + return ERR_PTR(-EIO); + } + return ret; +} + +int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path, + u64 iobjectid, u64 ioff, u8 key_type, + struct btrfs_key *found_key) +{ + int ret; + struct btrfs_key key; + struct extent_buffer *eb; + struct btrfs_path *path; + + key.type = key_type; + key.objectid = iobjectid; + key.offset = ioff; + + if (found_path == NULL) { + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + } else + path = found_path; + + ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); + if ((ret < 0) || (found_key == NULL)) + goto out; + + eb = path->nodes[0]; + if (ret && path->slots[0] >= btrfs_header_nritems(eb)) { + ret = btrfs_next_leaf(fs_root, path); + if (ret) + goto out; + eb = path->nodes[0]; + } + + btrfs_item_key_to_cpu(eb, found_key, path->slots[0]); + if (found_key->type != key.type || + found_key->objectid != key.objectid) { + ret = 1; + goto out; + } + +out: + if (path != found_path) + btrfs_free_path(path); + return ret; +} + +/* + * look for key in the tree. path is filled in with nodes along the way + * if key is found, we return zero and you can find the item in the leaf + * level of the path (level 0) + * + * If the key isn't found, the path points to the slot where it should + * be inserted, and 1 is returned. If there are other errors during the + * search a negative error number is returned. + * + * if ins_len > 0, nodes and leaves will be split as we walk down the + * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if + * possible) + * + * NOTE: This version has no COW ability, thus we expect trans == NULL, + * ins_len == 0 and cow == 0. + */ +int btrfs_search_slot(struct btrfs_trans_handle *trans, + struct btrfs_root *root, const struct btrfs_key *key, + struct btrfs_path *p, int ins_len, int cow) +{ + struct extent_buffer *b; + int slot; + int ret; + int level; + struct btrfs_fs_info *fs_info = root->fs_info; + u8 lowest_level = 0; + + assert(trans == NULL && ins_len == 0 && cow == 0); + lowest_level = p->lowest_level; + WARN_ON(lowest_level && ins_len > 0); + WARN_ON(p->nodes[0] != NULL); + + b = root->node; + extent_buffer_get(b); + while (b) { + level = btrfs_header_level(b); + /* + if (cow) { + int wret; + wret = btrfs_cow_block(trans, root, b, + p->nodes[level + 1], + p->slots[level + 1], + &b); + if (wret) { + free_extent_buffer(b); + return wret; + } + } + */ + BUG_ON(!cow && ins_len); + if (level != btrfs_header_level(b)) + WARN_ON(1); + level = btrfs_header_level(b); + p->nodes[level] = b; + ret = check_block(fs_info, p, level); + if (ret) + return -1; + ret = btrfs_bin_search(b, key, &slot); + if (level != 0) { + if (ret && slot > 0) + slot -= 1; + p->slots[level] = slot; + /* + if ((p->search_for_split || ins_len > 0) && + btrfs_header_nritems(b) >= + BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) { + int sret = split_node(trans, root, p, level); + BUG_ON(sret > 0); + if (sret) + return sret; + b = p->nodes[level]; + slot = p->slots[level]; + } else if (ins_len < 0) { + int sret = balance_level(trans, root, p, + level); + if (sret) + return sret; + b = p->nodes[level]; + if (!b) { + btrfs_release_path(p); + goto again; + } + slot = p->slots[level]; + BUG_ON(btrfs_header_nritems(b) == 1); + } + */ + /* this is only true while dropping a snapshot */ + if (level == lowest_level) + break; + + b = read_node_slot(fs_info, b, slot); + if (!extent_buffer_uptodate(b)) + return -EIO; + } else { + p->slots[level] = slot; + /* + if (ins_len > 0 && + ins_len > btrfs_leaf_free_space(b)) { + int sret = split_leaf(trans, root, key, + p, ins_len, ret == 0); + BUG_ON(sret > 0); + if (sret) + return sret; + } + */ + return ret; + } + } + return 1; +} + +/* + * Helper to use instead of search slot if no exact match is needed but + * instead the next or previous item should be returned. + * When find_higher is true, the next higher item is returned, the next lower + * otherwise. + * When return_any and find_higher are both true, and no higher item is found, + * return the next lower instead. + * When return_any is true and find_higher is false, and no lower item is found, + * return the next higher instead. + * It returns 0 if any item is found, 1 if none is found (tree empty), and + * < 0 on error + */ +int btrfs_search_slot_for_read(struct btrfs_root *root, + const struct btrfs_key *key, + struct btrfs_path *p, int find_higher, + int return_any) +{ + int ret; + struct extent_buffer *leaf; + +again: + ret = btrfs_search_slot(NULL, root, key, p, 0, 0); + if (ret <= 0) + return ret; + /* + * A return value of 1 means the path is at the position where the item + * should be inserted. Normally this is the next bigger item, but in + * case the previous item is the last in a leaf, path points to the + * first free slot in the previous leaf, i.e. at an invalid item. + */ + leaf = p->nodes[0]; + + if (find_higher) { + if (p->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, p); + if (ret <= 0) + return ret; + if (!return_any) + return 1; + /* + * No higher item found, return the next lower instead + */ + return_any = 0; + find_higher = 0; + btrfs_release_path(p); + goto again; + } + } else { + if (p->slots[0] == 0) { + ret = btrfs_prev_leaf(root, p); + if (ret < 0) + return ret; + if (!ret) { + leaf = p->nodes[0]; + if (p->slots[0] == btrfs_header_nritems(leaf)) + p->slots[0]--; + return 0; + } + if (!return_any) + return 1; + /* + * No lower item found, return the next higher instead + */ + return_any = 0; + find_higher = 1; + btrfs_release_path(p); + goto again; + } else { + --p->slots[0]; + } + } + return 0; +} + +/* + * how many bytes are required to store the items in a leaf. start + * and nr indicate which items in the leaf to check. This totals up the + * space used both by the item structs and the item data + */ +static int leaf_space_used(struct extent_buffer *l, int start, int nr) +{ + int data_len; + int nritems = btrfs_header_nritems(l); + int end = min(nritems, start + nr) - 1; + + if (!nr) + return 0; + data_len = btrfs_item_end_nr(l, start); + data_len = data_len - btrfs_item_offset_nr(l, end); + data_len += sizeof(struct btrfs_item) * nr; + WARN_ON(data_len < 0); + return data_len; +} + +/* + * The space between the end of the leaf items and + * the start of the leaf data. IOW, how much room + * the leaf has left for both items and data + */ +int btrfs_leaf_free_space(struct extent_buffer *leaf) +{ + int nritems = btrfs_header_nritems(leaf); + u32 leaf_data_size; + int ret; + + BUG_ON(leaf->fs_info && leaf->fs_info->nodesize != leaf->len); + leaf_data_size = __BTRFS_LEAF_DATA_SIZE(leaf->len); + ret = leaf_data_size - leaf_space_used(leaf, 0 ,nritems); + if (ret < 0) { + printk("leaf free space ret %d, leaf data size %u, used %d nritems %d\n", + ret, leaf_data_size, leaf_space_used(leaf, 0, nritems), + nritems); + } + return ret; +} + +/* + * walk up the tree as far as required to find the previous leaf. + * returns 0 if it found something or 1 if there are no lesser leaves. + * returns < 0 on io errors. + */ +int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) +{ + int slot; + int level = 1; + struct extent_buffer *c; + struct extent_buffer *next = NULL; + struct btrfs_fs_info *fs_info = root->fs_info; + + while(level < BTRFS_MAX_LEVEL) { + if (!path->nodes[level]) return 1; - slot = p.slots[level]; - if ((dir > 0 && slot + dir >= p.nodes[level]->header.nritems) - || (dir < 0 && !slot)) + slot = path->slots[level]; + c = path->nodes[level]; + if (slot == 0) { level++; - else + if (level == BTRFS_MAX_LEVEL) + return 1; + continue; + } + slot--; + + next = read_node_slot(fs_info, c, slot); + if (!extent_buffer_uptodate(next)) { + if (IS_ERR(next)) + return PTR_ERR(next); + return -EIO; + } + break; + } + path->slots[level] = slot; + while(1) { + level--; + c = path->nodes[level]; + free_extent_buffer(c); + slot = btrfs_header_nritems(next); + if (slot != 0) + slot--; + path->nodes[level] = next; + path->slots[level] = slot; + if (!level) + break; + next = read_node_slot(fs_info, next, slot); + if (!extent_buffer_uptodate(next)) { + if (IS_ERR(next)) + return PTR_ERR(next); + return -EIO; + } + } + return 0; +} + +/* + * Walk up the tree as far as necessary to find the next sibling tree block. + * More generic version of btrfs_next_leaf(), as it could find sibling nodes + * if @path->lowest_level is not 0. + * + * returns 0 if it found something or 1 if there are no greater leaves. + * returns < 0 on io errors. + */ +int btrfs_next_sibling_tree_block(struct btrfs_fs_info *fs_info, + struct btrfs_path *path) +{ + int slot; + int level = path->lowest_level + 1; + struct extent_buffer *c; + struct extent_buffer *next = NULL; + + BUG_ON(path->lowest_level + 1 >= BTRFS_MAX_LEVEL); + do { + if (!path->nodes[level]) + return 1; + + slot = path->slots[level] + 1; + c = path->nodes[level]; + if (slot >= btrfs_header_nritems(c)) { + level++; + if (level == BTRFS_MAX_LEVEL) + return 1; + continue; + } + + next = read_node_slot(fs_info, c, slot); + if (!extent_buffer_uptodate(next)) + return -EIO; + break; + } while (level < BTRFS_MAX_LEVEL); + path->slots[level] = slot; + while(1) { + level--; + c = path->nodes[level]; + free_extent_buffer(c); + path->nodes[level] = next; + path->slots[level] = 0; + if (level == path->lowest_level) + break; + next = read_node_slot(fs_info, next, 0); + if (!extent_buffer_uptodate(next)) + return -EIO; + } + return 0; +} + +int btrfs_previous_item(struct btrfs_root *root, + struct btrfs_path *path, u64 min_objectid, + int type) +{ + struct btrfs_key found_key; + struct extent_buffer *leaf; + u32 nritems; + int ret; + + while(1) { + if (path->slots[0] == 0) { + ret = btrfs_prev_leaf(root, path); + if (ret != 0) + return ret; + } else { + path->slots[0]--; + } + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + if (nritems == 0) + return 1; + if (path->slots[0] == nritems) + path->slots[0]--; + + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid < min_objectid) + break; + if (found_key.type == type) + return 0; + if (found_key.objectid == min_objectid && + found_key.type < type) break; } - - if (level == BTRFS_MAX_LEVEL) - return 1; - - p.slots[level] = slot + dir; - level--; - from_level = level; - - while (level >= 0) { - u64 logical, physical; - - slot = p.slots[level + 1]; - logical = p.nodes[level + 1]->node.ptrs[slot].blockptr; - physical = btrfs_map_logical_to_physical(logical); - if (physical == -1ULL) - goto err; - - if (read_tree_node(physical, &p.nodes[level])) - goto err; - - if (dir > 0) - p.slots[level] = 0; - else - p.slots[level] = p.nodes[level]->header.nritems - 1; - level--; - } - - /* Free rewritten nodes in path */ - for (i = 0; i <= from_level; ++i) - free(path->nodes[i]); - - *path = p; - return 0; - -err: - /* Free rewritten nodes in p */ - for (i = level + 1; i <= from_level; ++i) - free(p.nodes[i]); - return -1; -} - -int btrfs_prev_slot(struct btrfs_path *p) -{ - if (!p->slots[0]) - return jump_leaf(p, -1); - - p->slots[0]--; - return 0; -} - -int btrfs_next_slot(struct btrfs_path *p) -{ - struct btrfs_leaf *leaf = &p->nodes[0]->leaf; - - if (p->slots[0] + 1 >= leaf->header.nritems) - return jump_leaf(p, 1); - - p->slots[0]++; - return 0; + return 1; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 65c152a52f..219c410b18 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -11,28 +11,23 @@ #include #include -#include "btrfs_tree.h" - -#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */ +#include +#include +#include +#include +#include "kernel-shared/btrfs_tree.h" +#include "crypto/hash.h" +#include "compat.h" +#include "extent-io.h" #define BTRFS_MAX_MIRRORS 3 -#define BTRFS_MAX_LEVEL 8 - -#define BTRFS_COMPAT_EXTENT_TREE_V0 - /* * the max metadata block size. This limit is somewhat artificial, * but the memmove costs go through the roof for larger blocks. */ #define BTRFS_MAX_METADATA_BLOCKSIZE 65536 -/* - * we can actually store much bigger names, but lets not confuse the rest - * of linux - */ -#define BTRFS_NAME_LEN 255 - /* * Theoretical limit is larger, but we keep this down to a sane * value. That should limit greatly the possibility of collisions on @@ -40,11 +35,32 @@ */ #define BTRFS_LINK_MAX 65535U -static const int btrfs_csum_sizes[] = { 4 }; - /* four bytes for CRC32 */ #define BTRFS_EMPTY_DIR_SIZE 0 +struct btrfs_mapping_tree { + struct cache_tree cache_tree; +}; + +static inline unsigned long btrfs_chunk_item_size(int num_stripes) +{ + BUG_ON(num_stripes == 0); + return sizeof(struct btrfs_chunk) + + sizeof(struct btrfs_stripe) * (num_stripes - 1); +} + +#define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) +#define BTRFS_LEAF_DATA_SIZE(fs_info) \ + (__BTRFS_LEAF_DATA_SIZE(fs_info->nodesize)) + +struct btrfs_path { + struct extent_buffer *nodes[BTRFS_MAX_LEVEL]; + int slots[BTRFS_MAX_LEVEL]; + + /* keep some upper locks as we walk down */ + u8 lowest_level; +}; + /* ioprio of readahead is set to idle */ #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)) @@ -52,6 +68,92 @@ static const int btrfs_csum_sizes[] = { 4 }; #define BTRFS_MAX_EXTENT_SIZE SZ_128M +enum btrfs_tree_block_status { + BTRFS_TREE_BLOCK_CLEAN, + BTRFS_TREE_BLOCK_INVALID_NRITEMS, + BTRFS_TREE_BLOCK_INVALID_PARENT_KEY, + BTRFS_TREE_BLOCK_BAD_KEY_ORDER, + BTRFS_TREE_BLOCK_INVALID_LEVEL, + BTRFS_TREE_BLOCK_INVALID_FREE_SPACE, + BTRFS_TREE_BLOCK_INVALID_OFFSETS, +}; + +struct btrfs_root { + struct extent_buffer *node; + struct btrfs_root_item root_item; + struct btrfs_key root_key; + struct btrfs_fs_info *fs_info; + u64 objectid; + u64 last_trans; + + int ref_cows; + int track_dirty; + + u32 type; + u64 last_inode_alloc; + + struct rb_node rb_node; +}; + +struct btrfs_trans_handle; +struct btrfs_device; +struct btrfs_fs_devices; +struct btrfs_fs_info { + u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; + u8 *new_chunk_tree_uuid; + struct btrfs_root *fs_root; + struct btrfs_root *tree_root; + struct btrfs_root *chunk_root; + struct btrfs_root *csum_root; + + struct rb_root fs_root_tree; + + struct extent_io_tree extent_cache; + + /* logical->physical extent mapping */ + struct btrfs_mapping_tree mapping_tree; + + u64 last_trans_committed; + + struct btrfs_super_block *super_copy; + + struct btrfs_fs_devices *fs_devices; + + /* Cached block sizes */ + u32 nodesize; + u32 sectorsize; + u32 stripesize; +}; + +static inline u32 BTRFS_MAX_ITEM_SIZE(const struct btrfs_fs_info *info) +{ + return BTRFS_LEAF_DATA_SIZE(info) - sizeof(struct btrfs_item); +} + +static inline u32 BTRFS_NODEPTRS_PER_BLOCK(const struct btrfs_fs_info *info) +{ + return BTRFS_LEAF_DATA_SIZE(info) / sizeof(struct btrfs_key_ptr); +} + +static inline u32 BTRFS_NODEPTRS_PER_EXTENT_BUFFER(const struct extent_buffer *eb) +{ + BUG_ON(eb->fs_info && eb->fs_info->nodesize != eb->len); + return __BTRFS_LEAF_DATA_SIZE(eb->len) / sizeof(struct btrfs_key_ptr); +} + +#define BTRFS_FILE_EXTENT_INLINE_DATA_START \ + (offsetof(struct btrfs_file_extent_item, disk_bytenr)) +static inline u32 BTRFS_MAX_INLINE_DATA_SIZE(const struct btrfs_fs_info *info) +{ + return BTRFS_MAX_ITEM_SIZE(info) - + BTRFS_FILE_EXTENT_INLINE_DATA_START; +} + +static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info) +{ + return BTRFS_MAX_ITEM_SIZE(info) - sizeof(struct btrfs_dir_item); +} + /* * File system states */ @@ -61,195 +163,1040 @@ static const int btrfs_csum_sizes[] = { 4 }; #define BTRFS_FS_STATE_DEV_REPLACING 3 #define BTRFS_FS_STATE_DUMMY_FS_INFO 4 -#define BTRFS_BACKREF_REV_MAX 256 -#define BTRFS_BACKREF_REV_SHIFT 56 -#define BTRFS_BACKREF_REV_MASK (((u64)BTRFS_BACKREF_REV_MAX - 1) << \ - BTRFS_BACKREF_REV_SHIFT) +#define read_eb_member(eb, ptr, type, member, result) ( \ + read_extent_buffer(eb, (char *)(result), \ + ((unsigned long)(ptr)) + \ + offsetof(type, member), \ + sizeof(((type *)0)->member))) -#define BTRFS_OLD_BACKREF_REV 0 -#define BTRFS_MIXED_BACKREF_REV 1 +#define write_eb_member(eb, ptr, type, member, result) ( \ + write_extent_buffer(eb, (char *)(result), \ + ((unsigned long)(ptr)) + \ + offsetof(type, member), \ + sizeof(((type *)0)->member))) + +#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(const struct extent_buffer *eb) \ +{ \ + const struct btrfs_header *h = (struct btrfs_header *)eb->data; \ + return le##bits##_to_cpu(h->member); \ +} \ +static inline void btrfs_set_##name(struct extent_buffer *eb, \ + u##bits val) \ +{ \ + struct btrfs_header *h = (struct btrfs_header *)eb->data; \ + h->member = cpu_to_le##bits(val); \ +} + +#define BTRFS_SETGET_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(const struct extent_buffer *eb, \ + const type *s) \ +{ \ + unsigned long offset = (unsigned long)s; \ + const type *p = (type *) (eb->data + offset); \ + return get_unaligned_le##bits(&p->member); \ +} \ +static inline void btrfs_set_##name(struct extent_buffer *eb, \ + type *s, u##bits val) \ +{ \ + unsigned long offset = (unsigned long)s; \ + type *p = (type *) (eb->data + offset); \ + put_unaligned_le##bits(val, &p->member); \ +} + +#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(const type *s) \ +{ \ + return le##bits##_to_cpu(s->member); \ +} \ +static inline void btrfs_set_##name(type *s, u##bits val) \ +{ \ + s->member = cpu_to_le##bits(val); \ +} + +BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64); +BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64); +BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64); +BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32); +BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32); +BTRFS_SETGET_FUNCS(device_start_offset, struct btrfs_dev_item, + start_offset, 64); +BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32); +BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64); +BTRFS_SETGET_FUNCS(device_group, struct btrfs_dev_item, dev_group, 32); +BTRFS_SETGET_FUNCS(device_seek_speed, struct btrfs_dev_item, seek_speed, 8); +BTRFS_SETGET_FUNCS(device_bandwidth, struct btrfs_dev_item, bandwidth, 8); +BTRFS_SETGET_FUNCS(device_generation, struct btrfs_dev_item, generation, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item, + total_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_bytes_used, struct btrfs_dev_item, + bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_io_align, struct btrfs_dev_item, + io_align, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_io_width, struct btrfs_dev_item, + io_width, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_sector_size, struct btrfs_dev_item, + sector_size, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_group, struct btrfs_dev_item, + dev_group, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_seek_speed, struct btrfs_dev_item, + seek_speed, 8); +BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item, + bandwidth, 8); +BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item, + generation, 64); + +static inline char *btrfs_device_uuid(struct btrfs_dev_item *d) +{ + return (char *)d + offsetof(struct btrfs_dev_item, uuid); +} + +static inline char *btrfs_device_fsid(struct btrfs_dev_item *d) +{ + return (char *)d + offsetof(struct btrfs_dev_item, fsid); +} + +BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64); +BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64); +BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64); +BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32); +BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32); +BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32); +BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64); +BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16); +BTRFS_SETGET_FUNCS(chunk_sub_stripes, struct btrfs_chunk, sub_stripes, 16); +BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64); +BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64); + +static inline char *btrfs_stripe_dev_uuid(struct btrfs_stripe *s) +{ + return (char *)s + offsetof(struct btrfs_stripe, dev_uuid); +} + +BTRFS_SETGET_STACK_FUNCS(stack_chunk_length, struct btrfs_chunk, length, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk, + stripe_len, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk, + io_align, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk, + io_width, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk, + sector_size, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk, + num_stripes, 16); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_sub_stripes, struct btrfs_chunk, + sub_stripes, 16); +BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64); + +static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c, + int nr) +{ + unsigned long offset = (unsigned long)c; + offset += offsetof(struct btrfs_chunk, stripe); + offset += nr * sizeof(struct btrfs_stripe); + return (struct btrfs_stripe *)offset; +} + +static inline char *btrfs_stripe_dev_uuid_nr(struct btrfs_chunk *c, int nr) +{ + return btrfs_stripe_dev_uuid(btrfs_stripe_nr(c, nr)); +} + +static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr) +{ + return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr)); +} + +static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr, + u64 val) +{ + btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val); +} + +static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr) +{ + return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr)); +} + +static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr, + u64 val) +{ + btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val); +} + +/* struct btrfs_block_group_item */ +BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, + used, 64); +BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item, + used, 64); +BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid, + struct btrfs_block_group_item, chunk_objectid, 64); + +BTRFS_SETGET_FUNCS(disk_block_group_chunk_objectid, + struct btrfs_block_group_item, chunk_objectid, 64); +BTRFS_SETGET_FUNCS(disk_block_group_flags, + struct btrfs_block_group_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(block_group_flags, + struct btrfs_block_group_item, flags, 64); + +/* struct btrfs_free_space_info */ +BTRFS_SETGET_FUNCS(free_space_extent_count, struct btrfs_free_space_info, + extent_count, 32); +BTRFS_SETGET_FUNCS(free_space_flags, struct btrfs_free_space_info, flags, 32); + +/* struct btrfs_inode_ref */ +BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); +BTRFS_SETGET_STACK_FUNCS(stack_inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); +BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64); + +/* struct btrfs_inode_extref */ +BTRFS_SETGET_FUNCS(inode_extref_parent, struct btrfs_inode_extref, + parent_objectid, 64); +BTRFS_SETGET_FUNCS(inode_extref_name_len, struct btrfs_inode_extref, + name_len, 16); +BTRFS_SETGET_FUNCS(inode_extref_index, struct btrfs_inode_extref, index, 64); + +/* struct btrfs_inode_item */ +BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); +BTRFS_SETGET_FUNCS(inode_sequence, struct btrfs_inode_item, sequence, 64); +BTRFS_SETGET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64); +BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64); +BTRFS_SETGET_FUNCS(inode_nbytes, struct btrfs_inode_item, nbytes, 64); +BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64); +BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32); +BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32); +BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32); +BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); +BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64); +BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, + struct btrfs_inode_item, generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, + struct btrfs_inode_item, sequence, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, + struct btrfs_inode_item, transid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_size, + struct btrfs_inode_item, size, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, + struct btrfs_inode_item, nbytes, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, + struct btrfs_inode_item, block_group, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, + struct btrfs_inode_item, nlink, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, + struct btrfs_inode_item, uid, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, + struct btrfs_inode_item, gid, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, + struct btrfs_inode_item, mode, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, + struct btrfs_inode_item, rdev, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, + struct btrfs_inode_item, flags, 64); + +static inline struct btrfs_timespec * +btrfs_inode_atime(struct btrfs_inode_item *inode_item) +{ + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, atime); + return (struct btrfs_timespec *)ptr; +} + +static inline struct btrfs_timespec * +btrfs_inode_mtime(struct btrfs_inode_item *inode_item) +{ + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, mtime); + return (struct btrfs_timespec *)ptr; +} + +static inline struct btrfs_timespec * +btrfs_inode_ctime(struct btrfs_inode_item *inode_item) +{ + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, ctime); + return (struct btrfs_timespec *)ptr; +} + +static inline struct btrfs_timespec * +btrfs_inode_otime(struct btrfs_inode_item *inode_item) +{ + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, otime); + return (struct btrfs_timespec *)ptr; +} + +BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); +BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); +BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, + sec, 64); +BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, + nsec, 32); + +/* struct btrfs_dev_extent */ +BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent, + chunk_tree, 64); +BTRFS_SETGET_FUNCS(dev_extent_chunk_objectid, struct btrfs_dev_extent, + chunk_objectid, 64); +BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent, + chunk_offset, 64); +BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_dev_extent_length, struct btrfs_dev_extent, + length, 64); + +static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev) +{ + unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid); + return (u8 *)((unsigned long)dev + ptr); +} + + +/* struct btrfs_extent_item */ +BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64); +BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, refs, 64); +BTRFS_SETGET_FUNCS(extent_generation, struct btrfs_extent_item, + generation, 64); +BTRFS_SETGET_FUNCS(extent_flags, struct btrfs_extent_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(stack_extent_flags, struct btrfs_extent_item, flags, 64); + +BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32); + +BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8); + +static inline void btrfs_tree_block_key(struct extent_buffer *eb, + struct btrfs_tree_block_info *item, + struct btrfs_disk_key *key) +{ + read_eb_member(eb, item, struct btrfs_tree_block_info, key, key); +} + +static inline void btrfs_set_tree_block_key(struct extent_buffer *eb, + struct btrfs_tree_block_info *item, + struct btrfs_disk_key *key) +{ + write_eb_member(eb, item, struct btrfs_tree_block_info, key, key); +} + +BTRFS_SETGET_FUNCS(extent_data_ref_root, struct btrfs_extent_data_ref, + root, 64); +BTRFS_SETGET_FUNCS(extent_data_ref_objectid, struct btrfs_extent_data_ref, + objectid, 64); +BTRFS_SETGET_FUNCS(extent_data_ref_offset, struct btrfs_extent_data_ref, + offset, 64); +BTRFS_SETGET_FUNCS(extent_data_ref_count, struct btrfs_extent_data_ref, + count, 32); + +BTRFS_SETGET_FUNCS(shared_data_ref_count, struct btrfs_shared_data_ref, + count, 32); + +BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref, + type, 8); +BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref, + offset, 64); +BTRFS_SETGET_STACK_FUNCS(stack_extent_inline_ref_type, + struct btrfs_extent_inline_ref, type, 8); +BTRFS_SETGET_STACK_FUNCS(stack_extent_inline_ref_offset, + struct btrfs_extent_inline_ref, offset, 64); + +static inline u32 btrfs_extent_inline_ref_size(int type) +{ + if (type == BTRFS_TREE_BLOCK_REF_KEY || + type == BTRFS_SHARED_BLOCK_REF_KEY) + return sizeof(struct btrfs_extent_inline_ref); + if (type == BTRFS_SHARED_DATA_REF_KEY) + return sizeof(struct btrfs_shared_data_ref) + + sizeof(struct btrfs_extent_inline_ref); + if (type == BTRFS_EXTENT_DATA_REF_KEY) + return sizeof(struct btrfs_extent_data_ref) + + offsetof(struct btrfs_extent_inline_ref, offset); + BUG(); + return 0; +} + +BTRFS_SETGET_FUNCS(ref_root_v0, struct btrfs_extent_ref_v0, root, 64); +BTRFS_SETGET_FUNCS(ref_generation_v0, struct btrfs_extent_ref_v0, + generation, 64); +BTRFS_SETGET_FUNCS(ref_objectid_v0, struct btrfs_extent_ref_v0, objectid, 64); +BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32); + +/* struct btrfs_node */ +BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); +BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64); + +static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr) +{ + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr); +} + +static inline void btrfs_set_node_blockptr(struct extent_buffer *eb, + int nr, u64 val) +{ + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val); +} + +static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr) +{ + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr); +} + +static inline void btrfs_set_node_ptr_generation(struct extent_buffer *eb, + int nr, u64 val) +{ + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + btrfs_set_key_generation(eb, (struct btrfs_key_ptr *)ptr, val); +} + +static inline unsigned long btrfs_node_key_ptr_offset(int nr) +{ + return offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; +} + +static inline void btrfs_node_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) +{ + unsigned long ptr; + ptr = btrfs_node_key_ptr_offset(nr); + read_eb_member(eb, (struct btrfs_key_ptr *)ptr, + struct btrfs_key_ptr, key, disk_key); +} + +static inline void btrfs_set_node_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) +{ + unsigned long ptr; + ptr = btrfs_node_key_ptr_offset(nr); + write_eb_member(eb, (struct btrfs_key_ptr *)ptr, + struct btrfs_key_ptr, key, disk_key); +} + +/* struct btrfs_item */ +BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32); +BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32); + +static inline unsigned long btrfs_item_nr_offset(int nr) +{ + return offsetof(struct btrfs_leaf, items) + + sizeof(struct btrfs_item) * nr; +} + +static inline struct btrfs_item *btrfs_item_nr(int nr) +{ + return (struct btrfs_item *)btrfs_item_nr_offset(nr); +} + +static inline u32 btrfs_item_end(struct extent_buffer *eb, + struct btrfs_item *item) +{ + return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item); +} + +static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr) +{ + return btrfs_item_end(eb, btrfs_item_nr(nr)); +} + +static inline u32 btrfs_item_offset_nr(const struct extent_buffer *eb, int nr) +{ + return btrfs_item_offset(eb, btrfs_item_nr(nr)); +} + +static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr) +{ + return btrfs_item_size(eb, btrfs_item_nr(nr)); +} + +static inline void btrfs_item_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) +{ + struct btrfs_item *item = btrfs_item_nr(nr); + read_eb_member(eb, item, struct btrfs_item, key, disk_key); +} + +static inline void btrfs_set_item_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) +{ + struct btrfs_item *item = btrfs_item_nr(nr); + write_eb_member(eb, item, struct btrfs_item, key, disk_key); +} + +BTRFS_SETGET_FUNCS(dir_log_end, struct btrfs_dir_log_item, end, 64); /* - * every tree block (leaf or node) starts with this header. + * struct btrfs_root_ref */ -struct btrfs_header { - /* these first four must match the super block */ - __u8 csum[BTRFS_CSUM_SIZE]; - __u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ - __u64 bytenr; /* which block this node is supposed to live in */ - __u64 flags; +BTRFS_SETGET_FUNCS(root_ref_dirid, struct btrfs_root_ref, dirid, 64); +BTRFS_SETGET_FUNCS(root_ref_sequence, struct btrfs_root_ref, sequence, 64); +BTRFS_SETGET_FUNCS(root_ref_name_len, struct btrfs_root_ref, name_len, 16); - /* allowed to be different from the super from here on down */ - __u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; - __u64 generation; - __u64 owner; - __u32 nritems; - __u8 level; -} __attribute__ ((__packed__)); +BTRFS_SETGET_STACK_FUNCS(stack_root_ref_dirid, struct btrfs_root_ref, dirid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_root_ref_sequence, struct btrfs_root_ref, sequence, 64); +BTRFS_SETGET_STACK_FUNCS(stack_root_ref_name_len, struct btrfs_root_ref, name_len, 16); + +/* struct btrfs_dir_item */ +BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16); +BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); +BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); +BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_dir_data_len, struct btrfs_dir_item, data_len, 16); +BTRFS_SETGET_STACK_FUNCS(stack_dir_type, struct btrfs_dir_item, type, 8); +BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item, name_len, 16); +BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item, transid, 64); + +static inline void btrfs_dir_item_key(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_disk_key *key) +{ + read_eb_member(eb, item, struct btrfs_dir_item, location, key); +} + +static inline void btrfs_set_dir_item_key(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_disk_key *key) +{ + write_eb_member(eb, item, struct btrfs_dir_item, location, key); +} + +/* struct btrfs_free_space_header */ +BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header, + num_entries, 64); +BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header, + num_bitmaps, 64); +BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header, + generation, 64); + +static inline void btrfs_free_space_key(struct extent_buffer *eb, + struct btrfs_free_space_header *h, + struct btrfs_disk_key *key) +{ + read_eb_member(eb, h, struct btrfs_free_space_header, location, key); +} + +static inline void btrfs_set_free_space_key(struct extent_buffer *eb, + struct btrfs_free_space_header *h, + struct btrfs_disk_key *key) +{ + write_eb_member(eb, h, struct btrfs_free_space_header, location, key); +} + +/* struct btrfs_disk_key */ +BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, + objectid, 64); +BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64); +BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8); + +static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, + struct btrfs_disk_key *disk) +{ + cpu->offset = le64_to_cpu(disk->offset); + cpu->type = disk->type; + cpu->objectid = le64_to_cpu(disk->objectid); +} + +static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk, + const struct btrfs_key *cpu) +{ + disk->offset = cpu_to_le64(cpu->offset); + disk->type = cpu->type; + disk->objectid = cpu_to_le64(cpu->objectid); +} + +static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb, + struct btrfs_key *key, int nr) +{ + struct btrfs_disk_key disk_key; + btrfs_node_key(eb, &disk_key, nr); + btrfs_disk_key_to_cpu(key, &disk_key); +} + +static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb, + struct btrfs_key *key, int nr) +{ + struct btrfs_disk_key disk_key; + btrfs_item_key(eb, &disk_key, nr); + btrfs_disk_key_to_cpu(key, &disk_key); +} + +static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_key *key) +{ + struct btrfs_disk_key disk_key; + btrfs_dir_item_key(eb, item, &disk_key); + btrfs_disk_key_to_cpu(key, &disk_key); +} + +/* struct btrfs_header */ +BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64); +BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header, + generation, 64); +BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64); +BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32); +BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64); +BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8); +BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(stack_header_nritems, struct btrfs_header, nritems, + 32); +BTRFS_SETGET_STACK_FUNCS(stack_header_owner, struct btrfs_header, owner, 64); +BTRFS_SETGET_STACK_FUNCS(stack_header_generation, struct btrfs_header, + generation, 64); + +static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag) +{ + return (btrfs_header_flags(eb) & flag) == flag; +} + +static inline int btrfs_set_header_flag(struct extent_buffer *eb, u64 flag) +{ + u64 flags = btrfs_header_flags(eb); + btrfs_set_header_flags(eb, flags | flag); + return (flags & flag) == flag; +} + +static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag) +{ + u64 flags = btrfs_header_flags(eb); + btrfs_set_header_flags(eb, flags & ~flag); + return (flags & flag) == flag; +} + +static inline int btrfs_header_backref_rev(struct extent_buffer *eb) +{ + u64 flags = btrfs_header_flags(eb); + return flags >> BTRFS_BACKREF_REV_SHIFT; +} + +static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb, + int rev) +{ + u64 flags = btrfs_header_flags(eb); + flags &= ~BTRFS_BACKREF_REV_MASK; + flags |= (u64)rev << BTRFS_BACKREF_REV_SHIFT; + btrfs_set_header_flags(eb, flags); +} + +static inline unsigned long btrfs_header_fsid(void) +{ + return offsetof(struct btrfs_header, fsid); +} + +static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb) +{ + return offsetof(struct btrfs_header, chunk_tree_uuid); +} + +static inline u8 *btrfs_header_csum(struct extent_buffer *eb) +{ + unsigned long ptr = offsetof(struct btrfs_header, csum); + return (u8 *)ptr; +} + +static inline int btrfs_is_leaf(struct extent_buffer *eb) +{ + return (btrfs_header_level(eb) == 0); +} + +/* struct btrfs_root_item */ +BTRFS_SETGET_FUNCS(disk_root_generation, struct btrfs_root_item, + generation, 64); +BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32); +BTRFS_SETGET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64); +BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8); + +BTRFS_SETGET_STACK_FUNCS(root_generation, struct btrfs_root_item, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8); +BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64); +BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32); +BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); +BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, + last_snapshot, 64); +BTRFS_SETGET_STACK_FUNCS(root_generation_v2, struct btrfs_root_item, + generation_v2, 64); +BTRFS_SETGET_STACK_FUNCS(root_ctransid, struct btrfs_root_item, + ctransid, 64); +BTRFS_SETGET_STACK_FUNCS(root_otransid, struct btrfs_root_item, + otransid, 64); +BTRFS_SETGET_STACK_FUNCS(root_stransid, struct btrfs_root_item, + stransid, 64); +BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item, + rtransid, 64); + +static inline struct btrfs_timespec* btrfs_root_ctime( + struct btrfs_root_item *root_item) +{ + unsigned long ptr = (unsigned long)root_item; + ptr += offsetof(struct btrfs_root_item, ctime); + return (struct btrfs_timespec *)ptr; +} + +static inline struct btrfs_timespec* btrfs_root_otime( + struct btrfs_root_item *root_item) +{ + unsigned long ptr = (unsigned long)root_item; + ptr += offsetof(struct btrfs_root_item, otime); + return (struct btrfs_timespec *)ptr; +} + +static inline struct btrfs_timespec* btrfs_root_stime( + struct btrfs_root_item *root_item) +{ + unsigned long ptr = (unsigned long)root_item; + ptr += offsetof(struct btrfs_root_item, stime); + return (struct btrfs_timespec *)ptr; +} + +static inline struct btrfs_timespec* btrfs_root_rtime( + struct btrfs_root_item *root_item) +{ + unsigned long ptr = (unsigned long)root_item; + ptr += offsetof(struct btrfs_root_item, rtime); + return (struct btrfs_timespec *)ptr; +} + +/* struct btrfs_root_backup */ +BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup, + tree_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_tree_root_gen, struct btrfs_root_backup, + tree_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_tree_root_level, struct btrfs_root_backup, + tree_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_chunk_root, struct btrfs_root_backup, + chunk_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_gen, struct btrfs_root_backup, + chunk_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_level, struct btrfs_root_backup, + chunk_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_extent_root, struct btrfs_root_backup, + extent_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_extent_root_gen, struct btrfs_root_backup, + extent_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_extent_root_level, struct btrfs_root_backup, + extent_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_fs_root, struct btrfs_root_backup, + fs_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_fs_root_gen, struct btrfs_root_backup, + fs_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_fs_root_level, struct btrfs_root_backup, + fs_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_dev_root, struct btrfs_root_backup, + dev_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_dev_root_gen, struct btrfs_root_backup, + dev_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_dev_root_level, struct btrfs_root_backup, + dev_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_csum_root, struct btrfs_root_backup, + csum_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_csum_root_gen, struct btrfs_root_backup, + csum_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_csum_root_level, struct btrfs_root_backup, + csum_root_level, 8); +BTRFS_SETGET_STACK_FUNCS(backup_total_bytes, struct btrfs_root_backup, + total_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup, + bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup, + num_devices, 64); + +/* struct btrfs_super_block */ + +BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64); +BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); +BTRFS_SETGET_STACK_FUNCS(super_sys_array_size, + struct btrfs_super_block, sys_chunk_array_size, 32); +BTRFS_SETGET_STACK_FUNCS(super_chunk_root_generation, + struct btrfs_super_block, chunk_root_generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block, + root_level, 8); +BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block, + chunk_root, 64); +BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block, + chunk_root_level, 8); +BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block, + log_root, 64); +BTRFS_SETGET_STACK_FUNCS(super_log_root_transid, struct btrfs_super_block, + log_root_transid, 64); +BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block, + log_root_level, 8); +BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block, + total_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block, + bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block, + sectorsize, 32); +BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, + nodesize, 32); +BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block, + stripesize, 32); +BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, + root_dir_objectid, 64); +BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block, + num_devices, 64); +BTRFS_SETGET_STACK_FUNCS(super_compat_flags, struct btrfs_super_block, + compat_flags, 64); +BTRFS_SETGET_STACK_FUNCS(super_compat_ro_flags, struct btrfs_super_block, + compat_ro_flags, 64); +BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block, + incompat_flags, 64); +BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, + csum_type, 16); +BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block, + cache_generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block, + uuid_tree_generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64); + +static inline unsigned long btrfs_leaf_data(struct extent_buffer *l) +{ + return offsetof(struct btrfs_leaf, items); +} + +/* struct btrfs_file_extent_item */ +BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_type, struct btrfs_file_extent_item, type, 8); + +static inline unsigned long btrfs_file_extent_inline_start(struct + btrfs_file_extent_item *e) +{ + unsigned long offset = (unsigned long)e; + offset += offsetof(struct btrfs_file_extent_item, disk_bytenr); + return offset; +} + +static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) +{ + return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize; +} + +BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, + disk_bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr, struct btrfs_file_extent_item, + disk_bytenr, 64); +BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation, struct btrfs_file_extent_item, + generation, 64); +BTRFS_SETGET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item, + disk_num_bytes, 64); +BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item, + offset, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_offset, struct btrfs_file_extent_item, + offset, 64); +BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item, + num_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes, struct btrfs_file_extent_item, + num_bytes, 64); +BTRFS_SETGET_FUNCS(file_extent_ram_bytes, struct btrfs_file_extent_item, + ram_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_ram_bytes, struct btrfs_file_extent_item, + ram_bytes, 64); +BTRFS_SETGET_FUNCS(file_extent_compression, struct btrfs_file_extent_item, + compression, 8); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression, struct btrfs_file_extent_item, + compression, 8); +BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item, + encryption, 8); +BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item, + other_encoding, 16); + +/* btrfs_qgroup_status_item */ +BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item, + version, 64); +BTRFS_SETGET_FUNCS(qgroup_status_generation, struct btrfs_qgroup_status_item, + generation, 64); +BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item, + flags, 64); +BTRFS_SETGET_FUNCS(qgroup_status_rescan, struct btrfs_qgroup_status_item, + rescan, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_version, + struct btrfs_qgroup_status_item, version, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_generation, + struct btrfs_qgroup_status_item, generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_flags, + struct btrfs_qgroup_status_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_rescan, + struct btrfs_qgroup_status_item, rescan, 64); + +/* btrfs_qgroup_info_item */ +BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item, + generation, 64); +BTRFS_SETGET_FUNCS(qgroup_info_referenced, struct btrfs_qgroup_info_item, + rfer, 64); +BTRFS_SETGET_FUNCS(qgroup_info_referenced_compressed, + struct btrfs_qgroup_info_item, rfer_cmpr, 64); +BTRFS_SETGET_FUNCS(qgroup_info_exclusive, struct btrfs_qgroup_info_item, + excl, 64); +BTRFS_SETGET_FUNCS(qgroup_info_exclusive_compressed, + struct btrfs_qgroup_info_item, excl_cmpr, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_generation, + struct btrfs_qgroup_info_item, generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_referenced, + struct btrfs_qgroup_info_item, rfer, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_referenced_compressed, + struct btrfs_qgroup_info_item, rfer_cmpr, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_exclusive, + struct btrfs_qgroup_info_item, excl, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_exclusive_compressed, + struct btrfs_qgroup_info_item, excl_cmpr, 64); + +/* btrfs_qgroup_limit_item */ +BTRFS_SETGET_FUNCS(qgroup_limit_flags, struct btrfs_qgroup_limit_item, + flags, 64); +BTRFS_SETGET_FUNCS(qgroup_limit_max_referenced, struct btrfs_qgroup_limit_item, + max_rfer, 64); +BTRFS_SETGET_FUNCS(qgroup_limit_max_exclusive, struct btrfs_qgroup_limit_item, + max_excl, 64); +BTRFS_SETGET_FUNCS(qgroup_limit_rsv_referenced, struct btrfs_qgroup_limit_item, + rsv_rfer, 64); +BTRFS_SETGET_FUNCS(qgroup_limit_rsv_exclusive, struct btrfs_qgroup_limit_item, + rsv_excl, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_flags, + struct btrfs_qgroup_limit_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_max_referenced, + struct btrfs_qgroup_limit_item, max_rfer, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_max_exclusive, + struct btrfs_qgroup_limit_item, max_excl, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_rsv_referenced, + struct btrfs_qgroup_limit_item, rsv_rfer, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_rsv_exclusive, + struct btrfs_qgroup_limit_item, rsv_excl, 64); + +/* btrfs_balance_item */ +BTRFS_SETGET_FUNCS(balance_item_flags, struct btrfs_balance_item, flags, 64); + +static inline struct btrfs_disk_balance_args* btrfs_balance_item_data( + struct extent_buffer *eb, struct btrfs_balance_item *bi) +{ + unsigned long offset = (unsigned long)bi; + struct btrfs_balance_item *p; + p = (struct btrfs_balance_item *)(eb->data + offset); + return &p->data; +} + +static inline struct btrfs_disk_balance_args* btrfs_balance_item_meta( + struct extent_buffer *eb, struct btrfs_balance_item *bi) +{ + unsigned long offset = (unsigned long)bi; + struct btrfs_balance_item *p; + p = (struct btrfs_balance_item *)(eb->data + offset); + return &p->meta; +} + +static inline struct btrfs_disk_balance_args* btrfs_balance_item_sys( + struct extent_buffer *eb, struct btrfs_balance_item *bi) +{ + unsigned long offset = (unsigned long)bi; + struct btrfs_balance_item *p; + p = (struct btrfs_balance_item *)(eb->data + offset); + return &p->sys; +} + +static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb, + const struct btrfs_dev_stats_item *ptr, + int index) +{ + u64 val; + + read_extent_buffer(eb, &val, + offsetof(struct btrfs_dev_stats_item, values) + + ((unsigned long)ptr) + (index * sizeof(u64)), + sizeof(val)); + return val; +} /* - * this is a very generous portion of the super block, giving us - * room to translate 14 chunks with 3 stripes each. + * this returns the number of bytes used by the item on disk, minus the + * size of any extent headers. If a file is compressed on disk, this is + * the compressed size */ -#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048 +static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb, + struct btrfs_item *e) +{ + unsigned long offset; + offset = offsetof(struct btrfs_file_extent_item, disk_bytenr); + return btrfs_item_size(eb, e) - offset; +} + +#define btrfs_fs_incompat(fs_info, opt) \ + __btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt) + +static inline bool __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag) +{ + struct btrfs_super_block *disk_super; + disk_super = fs_info->super_copy; + return !!(btrfs_super_incompat_flags(disk_super) & flag); +} + +#define btrfs_fs_compat_ro(fs_info, opt) \ + __btrfs_fs_compat_ro((fs_info), BTRFS_FEATURE_COMPAT_RO_##opt) + +static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag) +{ + struct btrfs_super_block *disk_super; + disk_super = fs_info->super_copy; + return !!(btrfs_super_compat_ro_flags(disk_super) & flag); +} + +/* helper function to cast into the data area of the leaf. */ +#define btrfs_item_ptr(leaf, slot, type) \ + ((type *)(btrfs_leaf_data(leaf) + \ + btrfs_item_offset_nr(leaf, slot))) + +#define btrfs_item_ptr_offset(leaf, slot) \ + ((unsigned long)(btrfs_leaf_data(leaf) + \ + btrfs_item_offset_nr(leaf, slot))) + +static inline u64 btrfs_name_hash(const char *name, int len) +{ + return (u64)crc32c((u32)~1, (u8 *)name, len); +} /* - * just in case we somehow lose the roots and are not able to mount, - * we store an array of the roots from previous transactions - * in the super. + * Figure the key offset of an extended inode ref */ -#define BTRFS_NUM_BACKUP_ROOTS 4 -struct btrfs_root_backup { - __u64 tree_root; - __u64 tree_root_gen; - - __u64 chunk_root; - __u64 chunk_root_gen; - - __u64 extent_root; - __u64 extent_root_gen; - - __u64 fs_root; - __u64 fs_root_gen; - - __u64 dev_root; - __u64 dev_root_gen; - - __u64 csum_root; - __u64 csum_root_gen; - - __u64 total_bytes; - __u64 bytes_used; - __u64 num_devices; - /* future */ - __u64 unused_64[4]; - - __u8 tree_root_level; - __u8 chunk_root_level; - __u8 extent_root_level; - __u8 fs_root_level; - __u8 dev_root_level; - __u8 csum_root_level; - /* future and to align */ - __u8 unused_8[10]; -} __attribute__ ((__packed__)); - -/* - * the super block basically lists the main trees of the FS - * it currently lacks any block count etc etc - */ -struct btrfs_super_block { - __u8 csum[BTRFS_CSUM_SIZE]; - /* the first 4 fields must match struct btrfs_header */ - __u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ - __u64 bytenr; /* this block number */ - __u64 flags; - - /* allowed to be different from the btrfs_header from here own down */ - __u64 magic; - __u64 generation; - __u64 root; - __u64 chunk_root; - __u64 log_root; - - /* this will help find the new super based on the log root */ - __u64 log_root_transid; - __u64 total_bytes; - __u64 bytes_used; - __u64 root_dir_objectid; - __u64 num_devices; - __u32 sectorsize; - __u32 nodesize; - __u32 __unused_leafsize; - __u32 stripesize; - __u32 sys_chunk_array_size; - __u64 chunk_root_generation; - __u64 compat_flags; - __u64 compat_ro_flags; - __u64 incompat_flags; - __u16 csum_type; - __u8 root_level; - __u8 chunk_root_level; - __u8 log_root_level; - struct btrfs_dev_item dev_item; - - char label[BTRFS_LABEL_SIZE]; - - __u64 cache_generation; - __u64 uuid_tree_generation; - - /* future expansion */ - __u64 reserved[30]; - __u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; - struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; -} __attribute__ ((__packed__)); - -/* - * Compat flags that we support. If any incompat flags are set other than the - * ones specified below then we will fail to mount - */ -#define BTRFS_FEATURE_COMPAT_SUPP 0ULL -#define BTRFS_FEATURE_COMPAT_SAFE_SET 0ULL -#define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL - -#define BTRFS_FEATURE_COMPAT_RO_SUPP \ - (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE | \ - BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID) - -#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL -#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL - -#define BTRFS_FEATURE_INCOMPAT_SUPP \ - (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ - BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ - BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ - BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ - BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ - BTRFS_FEATURE_INCOMPAT_RAID56 | \ - BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \ - BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \ - BTRFS_FEATURE_INCOMPAT_NO_HOLES) - -#define BTRFS_FEATURE_INCOMPAT_SAFE_SET \ - (BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) -#define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR 0ULL - -/* - * A leaf is full of items. offset and size tell us where to find - * the item in the leaf (relative to the start of the data area) - */ -struct btrfs_item { - struct btrfs_key key; - __u32 offset; - __u32 size; -} __attribute__ ((__packed__)); - -/* - * leaves have an item area and a data area: - * [item0, item1....itemN] [free space] [dataN...data1, data0] - * - * The data is separate from the items to get the keys closer together - * during searches. - */ -struct btrfs_leaf { - struct btrfs_header header; - struct btrfs_item items[]; -} __attribute__ ((__packed__)); - -/* - * all non-leaf blocks are nodes, they hold only keys and pointers to - * other blocks - */ -struct btrfs_key_ptr { - struct btrfs_key key; - __u64 blockptr; - __u64 generation; -} __attribute__ ((__packed__)); - -struct btrfs_node { - struct btrfs_header header; - struct btrfs_key_ptr ptrs[]; -} __attribute__ ((__packed__)); +static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name, + int len) +{ + return crc32(parent_objectid, (u8 *)name, len); +} union btrfs_tree_node { struct btrfs_header header; @@ -257,77 +1204,95 @@ union btrfs_tree_node { struct btrfs_node node; }; -typedef __u8 u8; -typedef __u16 u16; -typedef __u32 u32; -typedef __u64 u64; - -struct btrfs_path { - union btrfs_tree_node *nodes[BTRFS_MAX_LEVEL]; - u32 slots[BTRFS_MAX_LEVEL]; -}; - -struct btrfs_root { - u64 objectid; - u64 bytenr; - u64 root_dirid; -}; - -int btrfs_comp_keys(struct btrfs_key *, struct btrfs_key *); -int btrfs_comp_keys_type(struct btrfs_key *, struct btrfs_key *); -int btrfs_bin_search(union btrfs_tree_node *, struct btrfs_key *, int *); -void btrfs_free_path(struct btrfs_path *); -int btrfs_search_tree(const struct btrfs_root *, struct btrfs_key *, - struct btrfs_path *); -int btrfs_prev_slot(struct btrfs_path *); -int btrfs_next_slot(struct btrfs_path *); - -static inline struct btrfs_key *btrfs_path_leaf_key(struct btrfs_path *p) { - return &p->nodes[0]->leaf.items[p->slots[0]].key; -} - -static inline struct btrfs_key * -btrfs_search_tree_key_type(const struct btrfs_root *root, u64 objectid, - u8 type, struct btrfs_path *path) -{ - struct btrfs_key key, *res; - - key.objectid = objectid; - key.type = type; - key.offset = 0; - - if (btrfs_search_tree(root, &key, path)) - return NULL; - - res = btrfs_path_leaf_key(path); - if (btrfs_comp_keys_type(&key, res)) { - btrfs_free_path(path); - return NULL; - } - - return res; -} - -static inline u32 btrfs_path_item_size(struct btrfs_path *p) -{ - return p->nodes[0]->leaf.items[p->slots[0]].size; -} - -static inline void *btrfs_leaf_data(struct btrfs_leaf *leaf, u32 slot) -{ - return ((u8 *) leaf) + sizeof(struct btrfs_header) - + leaf->items[slot].offset; -} - -static inline void *btrfs_path_leaf_data(struct btrfs_path *p) -{ - return btrfs_leaf_data(&p->nodes[0]->leaf, p->slots[0]); -} - -#define btrfs_item_ptr(l,s,t) \ - ((t *) btrfs_leaf_data((l),(s))) - #define btrfs_path_item_ptr(p,t) \ ((t *) btrfs_path_leaf_data((p))) +u16 btrfs_super_csum_size(const struct btrfs_super_block *s); +const char *btrfs_super_csum_name(u16 csum_type); +u16 btrfs_csum_type_size(u16 csum_type); +size_t btrfs_super_num_csums(void); + +/* root-tree.c */ +int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, + struct btrfs_root_item *item, struct btrfs_key *key); + +/* dir-item.c */ +struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + const char *name, int name_len, + int mod); +typedef int (*btrfs_iter_dir_callback_t)(struct btrfs_root *root, + struct extent_buffer *eb, + struct btrfs_dir_item *di); +int btrfs_iter_dir(struct btrfs_root *root, u64 ino, + btrfs_iter_dir_callback_t callback); +/* inode.c */ +int btrfs_lookup_path(struct btrfs_root *root, u64 ino, const char *filename, + struct btrfs_root **root_ret, u64 *ino_ret, + u8 *type_ret, int symlink_limit); +int btrfs_read_extent_inline(struct btrfs_path *path, + struct btrfs_file_extent_item *fi, char *dest); +int btrfs_read_extent_reg(struct btrfs_path *path, + struct btrfs_file_extent_item *fi, u64 offset, + int len, char *dest); + +/* ctree.c */ +int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2); +enum btrfs_tree_block_status +btrfs_check_node(struct btrfs_fs_info *fs_info, + struct btrfs_disk_key *parent_key, struct extent_buffer *buf); +enum btrfs_tree_block_status +btrfs_check_leaf(struct btrfs_fs_info *fs_info, + struct btrfs_disk_key *parent_key, struct extent_buffer *buf); +struct extent_buffer *read_node_slot(struct btrfs_fs_info *fs_info, + struct extent_buffer *parent, int slot); +int btrfs_previous_item(struct btrfs_root *root, + struct btrfs_path *path, u64 min_objectid, + int type); +int btrfs_next_sibling_tree_block(struct btrfs_fs_info *fs_info, + struct btrfs_path *path); +/* + * Walk up the tree as far as necessary to find the next leaf. + * + * returns 0 if it found something or 1 if there are no greater leaves. + * returns < 0 on io errors. + */ +static inline int btrfs_next_leaf(struct btrfs_root *root, + struct btrfs_path *path) +{ + path->lowest_level = 0; + return btrfs_next_sibling_tree_block(root->fs_info, path); +} + +static inline int btrfs_next_item(struct btrfs_root *root, + struct btrfs_path *p) +{ + ++p->slots[0]; + if (p->slots[0] >= btrfs_header_nritems(p->nodes[0])) + return btrfs_next_leaf(root, p); + return 0; +} + +int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); +int btrfs_leaf_free_space(struct extent_buffer *leaf); +int btrfs_search_slot(struct btrfs_trans_handle *trans, + struct btrfs_root *root, const struct btrfs_key *key, + struct btrfs_path *p, int ins_len, int cow); +int btrfs_search_slot_for_read(struct btrfs_root *root, + const struct btrfs_key *key, + struct btrfs_path *p, int find_higher, + int return_any); +void btrfs_release_path(struct btrfs_path *p); +struct btrfs_path *btrfs_alloc_path(void); +void btrfs_free_path(struct btrfs_path *p); +static inline void btrfs_init_path(struct btrfs_path *p) +{ + memset(p, 0, sizeof(*p)); +} +int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key, + int *slot); +int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path, + u64 iobjectid, u64 ioff, u8 key_type, + struct btrfs_key *found_key); #endif /* __BTRFS_CTREE_H__ */ diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 63b5bf0a86..aab197a6d5 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -6,119 +6,163 @@ */ #include "btrfs.h" +#include "disk-io.h" -static int verify_dir_item(struct btrfs_dir_item *item, u32 start, u32 total) +static int verify_dir_item(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_dir_item *dir_item) { - u16 max_len = BTRFS_NAME_LEN; - u32 end; + u16 namelen = BTRFS_NAME_LEN; + u8 type = btrfs_dir_type(leaf, dir_item); - if (item->type >= BTRFS_FT_MAX) { - printf("%s: invalid dir item type: %i\n", __func__, item->type); + if (type == BTRFS_FT_XATTR) + namelen = XATTR_NAME_MAX; + + if (btrfs_dir_name_len(leaf, dir_item) > namelen) { + fprintf(stderr, "invalid dir item name len: %u\n", + (unsigned)btrfs_dir_data_len(leaf, dir_item)); return 1; } - if (item->type == BTRFS_FT_XATTR) - max_len = 255; /* XATTR_NAME_MAX */ - - end = start + sizeof(*item) + item->name_len; - if (item->name_len > max_len || end > total) { - printf("%s: invalid dir item name len: %u\n", __func__, - item->name_len); + /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */ + if ((btrfs_dir_data_len(leaf, dir_item) + + btrfs_dir_name_len(leaf, dir_item)) > + BTRFS_MAX_XATTR_SIZE(root->fs_info)) { + fprintf(stderr, "invalid dir item name + data len: %u + %u\n", + (unsigned)btrfs_dir_name_len(leaf, dir_item), + (unsigned)btrfs_dir_data_len(leaf, dir_item)); return 1; } return 0; } -static struct btrfs_dir_item * -btrfs_match_dir_item_name(struct btrfs_path *path, const char *name, - int name_len) +struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, + struct btrfs_path *path, + const char *name, int name_len) { - struct btrfs_dir_item *item; - u32 total_len, cur = 0, this_len; - const char *name_ptr; + struct btrfs_dir_item *dir_item; + unsigned long name_ptr; + u32 total_len; + u32 cur = 0; + u32 this_len; + struct extent_buffer *leaf; - item = btrfs_path_item_ptr(path, struct btrfs_dir_item); + leaf = path->nodes[0]; + dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); + total_len = btrfs_item_size_nr(leaf, path->slots[0]); + if (verify_dir_item(root, leaf, dir_item)) + return NULL; - total_len = btrfs_path_item_size(path); - - while (cur < total_len) { - btrfs_dir_item_to_cpu(item); - this_len = sizeof(*item) + item->name_len + item->data_len; - name_ptr = (const char *) (item + 1); - - if (verify_dir_item(item, cur, total_len)) + while(cur < total_len) { + this_len = sizeof(*dir_item) + + btrfs_dir_name_len(leaf, dir_item) + + btrfs_dir_data_len(leaf, dir_item); + if (this_len > (total_len - cur)) { + fprintf(stderr, "invalid dir item size\n"); return NULL; - if (item->name_len == name_len && !memcmp(name_ptr, name, - name_len)) - return item; + } + + name_ptr = (unsigned long)(dir_item + 1); + + if (btrfs_dir_name_len(leaf, dir_item) == name_len && + memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) + return dir_item; cur += this_len; - item = (struct btrfs_dir_item *) ((u8 *) item + this_len); + dir_item = (struct btrfs_dir_item *)((char *)dir_item + + this_len); } - return NULL; } -int btrfs_lookup_dir_item(const struct btrfs_root *root, u64 dir, - const char *name, int name_len, - struct btrfs_dir_item *item) +struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + const char *name, int name_len, + int mod) { - struct btrfs_path path; + int ret; struct btrfs_key key; - struct btrfs_dir_item *res = NULL; + int ins_len = mod < 0 ? -1 : 0; + int cow = mod != 0; + struct btrfs_key found_key; + struct extent_buffer *leaf; key.objectid = dir; key.type = BTRFS_DIR_ITEM_KEY; + key.offset = btrfs_name_hash(name, name_len); - if (btrfs_search_tree(root, &key, &path)) - return -1; + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); + if (ret < 0) + return ERR_PTR(ret); + if (ret > 0) { + if (path->slots[0] == 0) + return NULL; + path->slots[0]--; + } - if (btrfs_comp_keys_type(&key, btrfs_path_leaf_key(&path))) - goto out; + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - res = btrfs_match_dir_item_name(&path, name, name_len); - if (res) - *item = *res; -out: - btrfs_free_path(&path); - return res ? 0 : -1; + if (found_key.objectid != dir || + found_key.type != BTRFS_DIR_ITEM_KEY || + found_key.offset != key.offset) + return NULL; + + return btrfs_match_dir_item_name(root, path, name, name_len); } -int btrfs_readdir(const struct btrfs_root *root, u64 dir, - btrfs_readdir_callback_t callback) +int btrfs_iter_dir(struct btrfs_root *root, u64 ino, + btrfs_iter_dir_callback_t callback) { struct btrfs_path path; - struct btrfs_key key, *found_key; - struct btrfs_dir_item *item; - int res = 0; + struct btrfs_key key; + int ret; - key.objectid = dir; + btrfs_init_path(&path); + key.objectid = ino; key.type = BTRFS_DIR_INDEX_KEY; key.offset = 0; - if (btrfs_search_tree(root, &key, &path)) - return -1; - + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + return ret; + /* Should not happen */ + if (ret == 0) { + ret = -EUCLEAN; + goto out; + } + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { + ret = btrfs_next_leaf(root, &path); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + goto out; + } + } do { - found_key = btrfs_path_leaf_key(&path); - if (btrfs_comp_keys_type(&key, found_key)) + struct btrfs_dir_item *di; + + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + if (key.objectid != ino || key.type != BTRFS_DIR_INDEX_KEY) break; + di = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_dir_item); + if (verify_dir_item(root, path.nodes[0], di)) { + ret = -EUCLEAN; + goto out; + } + ret = callback(root, path.nodes[0], di); + if (ret < 0) + goto out; + } while (!(ret = btrfs_next_item(root, &path))); - item = btrfs_path_item_ptr(&path, struct btrfs_dir_item); - btrfs_dir_item_to_cpu(item); - - if (verify_dir_item(item, 0, sizeof(*item) + item->name_len)) - continue; - if (item->type == BTRFS_FT_XATTR) - continue; - - if (callback(root, item)) - break; - } while (!(res = btrfs_next_slot(&path))); - - btrfs_free_path(&path); - - return res < 0 ? -1 : 0; + if (ret > 0) + ret = 0; +out: + btrfs_release_path(&path); + return ret; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c new file mode 100644 index 0000000000..01e7cee520 --- /dev/null +++ b/fs/btrfs/disk-io.c @@ -0,0 +1,1062 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include +#include +#include +#include +#include "kernel-shared/btrfs_tree.h" +#include "common/rbtree-utils.h" +#include "disk-io.h" +#include "ctree.h" +#include "btrfs.h" +#include "volumes.h" +#include "extent-io.h" +#include "crypto/hash.h" + +/* specified errno for check_tree_block */ +#define BTRFS_BAD_BYTENR (-1) +#define BTRFS_BAD_FSID (-2) +#define BTRFS_BAD_LEVEL (-3) +#define BTRFS_BAD_NRITEMS (-4) + +/* Calculate max possible nritems for a leaf/node */ +static u32 max_nritems(u8 level, u32 nodesize) +{ + + if (level == 0) + return ((nodesize - sizeof(struct btrfs_header)) / + sizeof(struct btrfs_item)); + return ((nodesize - sizeof(struct btrfs_header)) / + sizeof(struct btrfs_key_ptr)); +} + +static int check_tree_block(struct btrfs_fs_info *fs_info, + struct extent_buffer *buf) +{ + + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + u32 nodesize = fs_info->nodesize; + bool fsid_match = false; + int ret = BTRFS_BAD_FSID; + + if (buf->start != btrfs_header_bytenr(buf)) + return BTRFS_BAD_BYTENR; + if (btrfs_header_level(buf) >= BTRFS_MAX_LEVEL) + return BTRFS_BAD_LEVEL; + if (btrfs_header_nritems(buf) > max_nritems(btrfs_header_level(buf), + nodesize)) + return BTRFS_BAD_NRITEMS; + + /* Only leaf can be empty */ + if (btrfs_header_nritems(buf) == 0 && + btrfs_header_level(buf) != 0) + return BTRFS_BAD_NRITEMS; + + while (fs_devices) { + /* + * Checking the incompat flag is only valid for the current + * fs. For seed devices it's forbidden to have their uuid + * changed so reading ->fsid in this case is fine + */ + if (fs_devices == fs_info->fs_devices && + btrfs_fs_incompat(fs_info, METADATA_UUID)) + fsid_match = !memcmp_extent_buffer(buf, + fs_devices->metadata_uuid, + btrfs_header_fsid(), + BTRFS_FSID_SIZE); + else + fsid_match = !memcmp_extent_buffer(buf, + fs_devices->fsid, + btrfs_header_fsid(), + BTRFS_FSID_SIZE); + + + if (fsid_match) { + ret = 0; + break; + } + fs_devices = fs_devices->seed; + } + return ret; +} + +static void print_tree_block_error(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, + int err) +{ + char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'}; + char found_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'}; + u8 buf[BTRFS_UUID_SIZE]; + + if (!err) + return; + + fprintf(stderr, "bad tree block %llu, ", eb->start); + switch (err) { + case BTRFS_BAD_FSID: + read_extent_buffer(eb, buf, btrfs_header_fsid(), + BTRFS_UUID_SIZE); + uuid_unparse(buf, found_uuid); + uuid_unparse(fs_info->fs_devices->metadata_uuid, fs_uuid); + fprintf(stderr, "fsid mismatch, want=%s, have=%s\n", + fs_uuid, found_uuid); + break; + case BTRFS_BAD_BYTENR: + fprintf(stderr, "bytenr mismatch, want=%llu, have=%llu\n", + eb->start, btrfs_header_bytenr(eb)); + break; + case BTRFS_BAD_LEVEL: + fprintf(stderr, "bad level, %u > %d\n", + btrfs_header_level(eb), BTRFS_MAX_LEVEL); + break; + case BTRFS_BAD_NRITEMS: + fprintf(stderr, "invalid nr_items: %u\n", + btrfs_header_nritems(eb)); + break; + } +} + +int btrfs_csum_data(u16 csum_type, const u8 *data, u8 *out, size_t len) +{ + memset(out, 0, BTRFS_CSUM_SIZE); + + switch (csum_type) { + case BTRFS_CSUM_TYPE_CRC32: + return hash_crc32c(data, len, out); + case BTRFS_CSUM_TYPE_XXHASH: + return hash_xxhash(data, len, out); + case BTRFS_CSUM_TYPE_SHA256: + return hash_sha256(data, len, out); + default: + printf("Unknown csum type %d\n", csum_type); + return -EINVAL; + } +} + +/* + * Check if the super is valid: + * - nodesize/sectorsize - minimum, maximum, alignment + * - tree block starts - alignment + * - number of devices - something sane + * - sys array size - maximum + */ +static int btrfs_check_super(struct btrfs_super_block *sb) +{ + u8 result[BTRFS_CSUM_SIZE]; + u16 csum_type; + int csum_size; + u8 *metadata_uuid; + + if (btrfs_super_magic(sb) != BTRFS_MAGIC) + return -EIO; + + csum_type = btrfs_super_csum_type(sb); + if (csum_type >= btrfs_super_num_csums()) { + error("unsupported checksum algorithm %u", csum_type); + return -EIO; + } + csum_size = btrfs_super_csum_size(sb); + + btrfs_csum_data(csum_type, (u8 *)sb + BTRFS_CSUM_SIZE, + result, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); + + if (memcmp(result, sb->csum, csum_size)) { + error("superblock checksum mismatch"); + return -EIO; + } + if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { + error("tree_root level too big: %d >= %d", + btrfs_super_root_level(sb), BTRFS_MAX_LEVEL); + goto error_out; + } + if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) { + error("chunk_root level too big: %d >= %d", + btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL); + goto error_out; + } + if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) { + error("log_root level too big: %d >= %d", + btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL); + goto error_out; + } + + if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) { + error("tree_root block unaligned: %llu", btrfs_super_root(sb)); + goto error_out; + } + if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) { + error("chunk_root block unaligned: %llu", + btrfs_super_chunk_root(sb)); + goto error_out; + } + if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) { + error("log_root block unaligned: %llu", + btrfs_super_log_root(sb)); + goto error_out; + } + if (btrfs_super_nodesize(sb) < 4096) { + error("nodesize too small: %u < 4096", + btrfs_super_nodesize(sb)); + goto error_out; + } + if (!IS_ALIGNED(btrfs_super_nodesize(sb), 4096)) { + error("nodesize unaligned: %u", btrfs_super_nodesize(sb)); + goto error_out; + } + if (btrfs_super_sectorsize(sb) < 4096) { + error("sectorsize too small: %u < 4096", + btrfs_super_sectorsize(sb)); + goto error_out; + } + if (!IS_ALIGNED(btrfs_super_sectorsize(sb), 4096)) { + error("sectorsize unaligned: %u", btrfs_super_sectorsize(sb)); + goto error_out; + } + if (btrfs_super_total_bytes(sb) == 0) { + error("invalid total_bytes 0"); + goto error_out; + } + if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) { + error("invalid bytes_used %llu", btrfs_super_bytes_used(sb)); + goto error_out; + } + if ((btrfs_super_stripesize(sb) != 4096) + && (btrfs_super_stripesize(sb) != btrfs_super_sectorsize(sb))) { + error("invalid stripesize %u", btrfs_super_stripesize(sb)); + goto error_out; + } + + if (btrfs_super_incompat_flags(sb) & BTRFS_FEATURE_INCOMPAT_METADATA_UUID) + metadata_uuid = sb->metadata_uuid; + else + metadata_uuid = sb->fsid; + + if (memcmp(metadata_uuid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) { + char fsid[BTRFS_UUID_UNPARSED_SIZE]; + char dev_fsid[BTRFS_UUID_UNPARSED_SIZE]; + + uuid_unparse(sb->metadata_uuid, fsid); + uuid_unparse(sb->dev_item.fsid, dev_fsid); + error("dev_item UUID does not match fsid: %s != %s", + dev_fsid, fsid); + goto error_out; + } + + /* + * Hint to catch really bogus numbers, bitflips or so + */ + if (btrfs_super_num_devices(sb) > (1UL << 31)) { + error("suspicious number of devices: %llu", + btrfs_super_num_devices(sb)); + } + + if (btrfs_super_num_devices(sb) == 0) { + error("number of devices is 0"); + goto error_out; + } + + /* + * Obvious sys_chunk_array corruptions, it must hold at least one key + * and one chunk + */ + if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) { + error("system chunk array too big %u > %u", + btrfs_super_sys_array_size(sb), + BTRFS_SYSTEM_CHUNK_ARRAY_SIZE); + goto error_out; + } + if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) + + sizeof(struct btrfs_chunk)) { + error("system chunk array too small %u < %zu", + btrfs_super_sys_array_size(sb), + sizeof(struct btrfs_disk_key) + + sizeof(struct btrfs_chunk)); + goto error_out; + } + + return 0; + +error_out: + error("superblock checksum matches but it has invalid members"); + return -EIO; +} + +/* + * btrfs_read_dev_super - read a valid primary superblock from a block device + * @desc,@part: file descriptor of the device + * @sb: buffer where the superblock is going to be read in + * + * Unlike the btrfs-progs/kernel version, here we ony care about the first + * super block, thus it's much simpler. + */ +int btrfs_read_dev_super(struct blk_desc *desc, struct disk_partition *part, + struct btrfs_super_block *sb) +{ + char tmp[BTRFS_SUPER_INFO_SIZE]; + struct btrfs_super_block *buf = (struct btrfs_super_block *)tmp; + int ret; + + ret = __btrfs_devread(desc, part, tmp, BTRFS_SUPER_INFO_SIZE, + BTRFS_SUPER_INFO_OFFSET); + if (ret < BTRFS_SUPER_INFO_SIZE) + return -EIO; + + if (btrfs_super_bytenr(buf) != BTRFS_SUPER_INFO_OFFSET) + return -EIO; + + if (btrfs_check_super(buf)) + return -EIO; + + memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE); + return 0; +} + +static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, + int verify, int silent, u16 csum_type) +{ + u8 result[BTRFS_CSUM_SIZE]; + u32 len; + + len = buf->len - BTRFS_CSUM_SIZE; + btrfs_csum_data(csum_type, (u8 *)buf->data + BTRFS_CSUM_SIZE, + result, len); + + if (verify) { + if (memcmp_extent_buffer(buf, result, 0, csum_size)) { + /* FIXME: format */ + if (!silent) + printk("checksum verify failed on %llu found %08X wanted %08X\n", + (unsigned long long)buf->start, + result[0], + buf->data[0]); + return 1; + } + } else { + write_extent_buffer(buf, result, 0, csum_size); + } + return 0; +} + +int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, int verify, + u16 csum_type) +{ + return __csum_tree_block_size(buf, csum_size, verify, 0, csum_type); +} + +static int csum_tree_block(struct btrfs_fs_info *fs_info, + struct extent_buffer *buf, int verify) +{ + u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); + u16 csum_type = btrfs_super_csum_type(fs_info->super_copy); + + return csum_tree_block_size(buf, csum_size, verify, csum_type); +} + +struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info, + u64 bytenr, u32 blocksize) +{ + return find_extent_buffer(&fs_info->extent_cache, + bytenr, blocksize); +} + +struct extent_buffer* btrfs_find_create_tree_block( + struct btrfs_fs_info *fs_info, u64 bytenr) +{ + return alloc_extent_buffer(fs_info, bytenr, fs_info->nodesize); +} + +static int verify_parent_transid(struct extent_io_tree *io_tree, + struct extent_buffer *eb, u64 parent_transid, + int ignore) +{ + int ret; + + if (!parent_transid || btrfs_header_generation(eb) == parent_transid) + return 0; + + if (extent_buffer_uptodate(eb) && + btrfs_header_generation(eb) == parent_transid) { + ret = 0; + goto out; + } + printk("parent transid verify failed on %llu wanted %llu found %llu\n", + (unsigned long long)eb->start, + (unsigned long long)parent_transid, + (unsigned long long)btrfs_header_generation(eb)); + if (ignore) { + eb->flags |= EXTENT_BAD_TRANSID; + printk("Ignoring transid failure\n"); + return 0; + } + + ret = 1; +out: + clear_extent_buffer_uptodate(eb); + return ret; + +} + +int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror) +{ + unsigned long offset = 0; + struct btrfs_multi_bio *multi = NULL; + struct btrfs_device *device; + int ret = 0; + u64 read_len; + unsigned long bytes_left = eb->len; + + while (bytes_left) { + read_len = bytes_left; + device = NULL; + + ret = btrfs_map_block(info, READ, eb->start + offset, + &read_len, &multi, mirror, NULL); + if (ret) { + printk("Couldn't map the block %Lu\n", eb->start + offset); + kfree(multi); + return -EIO; + } + device = multi->stripes[0].dev; + + if (!device->desc || !device->part) { + kfree(multi); + return -EIO; + } + + if (read_len > bytes_left) + read_len = bytes_left; + + ret = read_extent_from_disk(device->desc, device->part, + multi->stripes[0].physical, eb, + offset, read_len); + kfree(multi); + multi = NULL; + + if (ret) + return -EIO; + offset += read_len; + bytes_left -= read_len; + } + return 0; +} + +struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, + u64 parent_transid) +{ + int ret; + struct extent_buffer *eb; + u64 best_transid = 0; + u32 sectorsize = fs_info->sectorsize; + int mirror_num = 1; + int good_mirror = 0; + int candidate_mirror = 0; + int num_copies; + int ignore = 0; + + /* + * Don't even try to create tree block for unaligned tree block + * bytenr. + * Such unaligned tree block will free overlapping extent buffer, + * causing use-after-free bugs for fuzzed images. + */ + if (bytenr < sectorsize || !IS_ALIGNED(bytenr, sectorsize)) { + error("tree block bytenr %llu is not aligned to sectorsize %u", + bytenr, sectorsize); + return ERR_PTR(-EIO); + } + + eb = btrfs_find_create_tree_block(fs_info, bytenr); + if (!eb) + return ERR_PTR(-ENOMEM); + + if (btrfs_buffer_uptodate(eb, parent_transid)) + return eb; + + num_copies = btrfs_num_copies(fs_info, eb->start, eb->len); + while (1) { + ret = read_whole_eb(fs_info, eb, mirror_num); + if (ret == 0 && csum_tree_block(fs_info, eb, 1) == 0 && + check_tree_block(fs_info, eb) == 0 && + verify_parent_transid(&fs_info->extent_cache, eb, + parent_transid, ignore) == 0) { + /* + * check_tree_block() is less strict to allow btrfs + * check to get raw eb with bad key order and fix it. + * But we still need to try to get a good copy if + * possible, or bad key order can go into tools like + * btrfs ins dump-tree. + */ + if (btrfs_header_level(eb)) + ret = btrfs_check_node(fs_info, NULL, eb); + else + ret = btrfs_check_leaf(fs_info, NULL, eb); + if (!ret || candidate_mirror == mirror_num) { + btrfs_set_buffer_uptodate(eb); + return eb; + } + if (candidate_mirror <= 0) + candidate_mirror = mirror_num; + } + if (ignore) { + if (candidate_mirror > 0) { + mirror_num = candidate_mirror; + continue; + } + if (check_tree_block(fs_info, eb)) + print_tree_block_error(fs_info, eb, + check_tree_block(fs_info, eb)); + else + fprintf(stderr, "Csum didn't match\n"); + ret = -EIO; + break; + } + if (num_copies == 1) { + ignore = 1; + continue; + } + if (btrfs_header_generation(eb) > best_transid) { + best_transid = btrfs_header_generation(eb); + good_mirror = mirror_num; + } + mirror_num++; + if (mirror_num > num_copies) { + if (candidate_mirror > 0) + mirror_num = candidate_mirror; + else + mirror_num = good_mirror; + ignore = 1; + continue; + } + } + /* + * We failed to read this tree block, it be should deleted right now + * to avoid stale cache populate the cache. + */ + free_extent_buffer(eb); + return ERR_PTR(ret); +} + +int read_extent_data(struct btrfs_fs_info *fs_info, char *data, u64 logical, + u64 *len, int mirror) +{ + u64 offset = 0; + struct btrfs_multi_bio *multi = NULL; + struct btrfs_device *device; + int ret = 0; + u64 max_len = *len; + + ret = btrfs_map_block(fs_info, READ, logical, len, &multi, mirror, + NULL); + if (ret) { + fprintf(stderr, "Couldn't map the block %llu\n", + logical + offset); + goto err; + } + device = multi->stripes[0].dev; + + if (*len > max_len) + *len = max_len; + if (!device->desc || !device->part) { + ret = -EIO; + goto err; + } + + ret = __btrfs_devread(device->desc, device->part, data, *len, + multi->stripes[0].physical); + if (ret != *len) + ret = -EIO; + else + ret = 0; +err: + kfree(multi); + return ret; +} + +void btrfs_setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, + u64 objectid) +{ + root->node = NULL; + root->track_dirty = 0; + + root->fs_info = fs_info; + root->objectid = objectid; + root->last_trans = 0; + root->last_inode_alloc = 0; + + memset(&root->root_key, 0, sizeof(root->root_key)); + memset(&root->root_item, 0, sizeof(root->root_item)); + root->root_key.objectid = objectid; +} + +static int find_and_setup_root(struct btrfs_root *tree_root, + struct btrfs_fs_info *fs_info, + u64 objectid, struct btrfs_root *root) +{ + int ret; + u64 generation; + + btrfs_setup_root(root, fs_info, objectid); + ret = btrfs_find_last_root(tree_root, objectid, + &root->root_item, &root->root_key); + if (ret) + return ret; + + generation = btrfs_root_generation(&root->root_item); + root->node = read_tree_block(fs_info, + btrfs_root_bytenr(&root->root_item), generation); + if (!extent_buffer_uptodate(root->node)) + return -EIO; + + return 0; +} + +int btrfs_free_fs_root(struct btrfs_root *root) +{ + if (root->node) + free_extent_buffer(root->node); + kfree(root); + return 0; +} + +static void __free_fs_root(struct rb_node *node) +{ + struct btrfs_root *root; + + root = container_of(node, struct btrfs_root, rb_node); + btrfs_free_fs_root(root); +} + +FREE_RB_BASED_TREE(fs_roots, __free_fs_root); + +struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) +{ + struct btrfs_root *root; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_path *path; + struct extent_buffer *l; + u64 generation; + int ret = 0; + + root = calloc(1, sizeof(*root)); + if (!root) + return ERR_PTR(-ENOMEM); + if (location->offset == (u64)-1) { + ret = find_and_setup_root(tree_root, fs_info, + location->objectid, root); + if (ret) { + free(root); + return ERR_PTR(ret); + } + goto insert; + } + + btrfs_setup_root(root, fs_info, + location->objectid); + + path = btrfs_alloc_path(); + if (!path) { + free(root); + return ERR_PTR(-ENOMEM); + } + + ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); + if (ret != 0) { + if (ret > 0) + ret = -ENOENT; + goto out; + } + l = path->nodes[0]; + read_extent_buffer(l, &root->root_item, + btrfs_item_ptr_offset(l, path->slots[0]), + sizeof(root->root_item)); + memcpy(&root->root_key, location, sizeof(*location)); + + /* If this root is already an orphan, no need to read */ + if (btrfs_root_refs(&root->root_item) == 0) { + ret = -ENOENT; + goto out; + } + ret = 0; +out: + btrfs_free_path(path); + if (ret) { + free(root); + return ERR_PTR(ret); + } + generation = btrfs_root_generation(&root->root_item); + root->node = read_tree_block(fs_info, + btrfs_root_bytenr(&root->root_item), generation); + if (!extent_buffer_uptodate(root->node)) { + free(root); + return ERR_PTR(-EIO); + } +insert: + root->ref_cows = 1; + return root; +} + +static int btrfs_fs_roots_compare_objectids(struct rb_node *node, + void *data) +{ + u64 objectid = *((u64 *)data); + struct btrfs_root *root; + + root = rb_entry(node, struct btrfs_root, rb_node); + if (objectid > root->objectid) + return 1; + else if (objectid < root->objectid) + return -1; + else + return 0; +} + +int btrfs_fs_roots_compare_roots(struct rb_node *node1, struct rb_node *node2) +{ + struct btrfs_root *root; + + root = rb_entry(node2, struct btrfs_root, rb_node); + return btrfs_fs_roots_compare_objectids(node1, (void *)&root->objectid); +} + +struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) +{ + struct btrfs_root *root; + struct rb_node *node; + int ret; + u64 objectid = location->objectid; + + if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) + return fs_info->tree_root; + if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID) + return fs_info->chunk_root; + if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) + return fs_info->csum_root; + BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID || + location->offset != (u64)-1); + + node = rb_search(&fs_info->fs_root_tree, (void *)&objectid, + btrfs_fs_roots_compare_objectids, NULL); + if (node) + return container_of(node, struct btrfs_root, rb_node); + + root = btrfs_read_fs_root_no_cache(fs_info, location); + if (IS_ERR(root)) + return root; + + ret = rb_insert(&fs_info->fs_root_tree, &root->rb_node, + btrfs_fs_roots_compare_roots); + BUG_ON(ret); + return root; +} + +void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) +{ + free(fs_info->tree_root); + free(fs_info->chunk_root); + free(fs_info->csum_root); + free(fs_info->super_copy); + free(fs_info); +} + +struct btrfs_fs_info *btrfs_new_fs_info(void) +{ + struct btrfs_fs_info *fs_info; + + fs_info = calloc(1, sizeof(struct btrfs_fs_info)); + if (!fs_info) + return NULL; + + fs_info->tree_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->chunk_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->csum_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->super_copy = calloc(1, BTRFS_SUPER_INFO_SIZE); + + if (!fs_info->tree_root || !fs_info->chunk_root || + !fs_info->csum_root || !fs_info->super_copy) + goto free_all; + + extent_io_tree_init(&fs_info->extent_cache); + + fs_info->fs_root_tree = RB_ROOT; + cache_tree_init(&fs_info->mapping_tree.cache_tree); + + mutex_init(&fs_info->fs_mutex); + + return fs_info; +free_all: + btrfs_free_fs_info(fs_info); + return NULL; +} + +static int setup_root_or_create_block(struct btrfs_fs_info *fs_info, + struct btrfs_root *info_root, + u64 objectid, char *str) +{ + struct btrfs_root *root = fs_info->tree_root; + int ret; + + ret = find_and_setup_root(root, fs_info, objectid, info_root); + if (ret) { + error("could not setup %s tree", str); + return -EIO; + } + + return 0; +} + +int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info) +{ + struct btrfs_super_block *sb = fs_info->super_copy; + struct btrfs_root *root; + struct btrfs_key key; + u64 root_tree_bytenr; + u64 generation; + int ret; + + root = fs_info->tree_root; + btrfs_setup_root(root, fs_info, BTRFS_ROOT_TREE_OBJECTID); + generation = btrfs_super_generation(sb); + + root_tree_bytenr = btrfs_super_root(sb); + + root->node = read_tree_block(fs_info, root_tree_bytenr, generation); + if (!extent_buffer_uptodate(root->node)) { + fprintf(stderr, "Couldn't read tree root\n"); + return -EIO; + } + + ret = setup_root_or_create_block(fs_info, fs_info->csum_root, + BTRFS_CSUM_TREE_OBJECTID, "csum"); + if (ret) + return ret; + fs_info->csum_root->track_dirty = 1; + + fs_info->last_trans_committed = generation; + + key.objectid = BTRFS_FS_TREE_OBJECTID; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + fs_info->fs_root = btrfs_read_fs_root(fs_info, &key); + + if (IS_ERR(fs_info->fs_root)) + return -EIO; + return 0; +} + +void btrfs_release_all_roots(struct btrfs_fs_info *fs_info) +{ + if (fs_info->csum_root) + free_extent_buffer(fs_info->csum_root->node); + if (fs_info->tree_root) + free_extent_buffer(fs_info->tree_root->node); + if (fs_info->chunk_root) + free_extent_buffer(fs_info->chunk_root->node); +} + +static void free_map_lookup(struct cache_extent *ce) +{ + struct map_lookup *map; + + map = container_of(ce, struct map_lookup, ce); + kfree(map); +} + +FREE_EXTENT_CACHE_BASED_TREE(mapping_cache, free_map_lookup); + +void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info) +{ + free_mapping_cache_tree(&fs_info->mapping_tree.cache_tree); + extent_io_tree_cleanup(&fs_info->extent_cache); +} + +static int btrfs_scan_fs_devices(struct blk_desc *desc, + struct disk_partition *part, + struct btrfs_fs_devices **fs_devices) +{ + u64 total_devs; + int ret; + + if (round_up(BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET, + desc->blksz) > (part->size << desc->log2blksz)) { + error("superblock end %u is larger than device size " LBAFU, + BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET, + part->size << desc->log2blksz); + return -EINVAL; + } + + ret = btrfs_scan_one_device(desc, part, fs_devices, &total_devs); + if (ret) { + fprintf(stderr, "No valid Btrfs found\n"); + return ret; + } + return 0; +} + +int btrfs_check_fs_compatibility(struct btrfs_super_block *sb) +{ + u64 features; + + features = btrfs_super_incompat_flags(sb) & + ~BTRFS_FEATURE_INCOMPAT_SUPP; + if (features) { + printk("couldn't open because of unsupported " + "option features (%llx).\n", + (unsigned long long)features); + return -ENOTSUPP; + } + + features = btrfs_super_incompat_flags(sb); + if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) { + features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; + btrfs_set_super_incompat_flags(sb, features); + } + + return 0; +} + +static int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info) +{ + struct btrfs_super_block *sb = fs_info->super_copy; + u64 chunk_root_bytenr; + u64 generation; + int ret; + + btrfs_setup_root(fs_info->chunk_root, fs_info, + BTRFS_CHUNK_TREE_OBJECTID); + + ret = btrfs_read_sys_array(fs_info); + if (ret) + return ret; + + generation = btrfs_super_chunk_root_generation(sb); + chunk_root_bytenr = btrfs_super_chunk_root(sb); + + fs_info->chunk_root->node = read_tree_block(fs_info, + chunk_root_bytenr, + generation); + if (!extent_buffer_uptodate(fs_info->chunk_root->node)) { + error("cannot read chunk root"); + return -EIO; + } + + ret = btrfs_read_chunk_tree(fs_info); + if (ret) { + fprintf(stderr, "Couldn't read chunk tree\n"); + return ret; + } + return 0; +} + +struct btrfs_fs_info *open_ctree_fs_info(struct blk_desc *desc, + struct disk_partition *part) +{ + struct btrfs_fs_info *fs_info; + struct btrfs_super_block *disk_super; + struct btrfs_fs_devices *fs_devices = NULL; + struct extent_buffer *eb; + int ret; + + fs_info = btrfs_new_fs_info(); + if (!fs_info) { + fprintf(stderr, "Failed to allocate memory for fs_info\n"); + return NULL; + } + + ret = btrfs_scan_fs_devices(desc, part, &fs_devices); + if (ret) + goto out; + + fs_info->fs_devices = fs_devices; + + ret = btrfs_open_devices(fs_devices); + if (ret) + goto out; + + disk_super = fs_info->super_copy; + ret = btrfs_read_dev_super(desc, part, disk_super); + if (ret) { + printk("No valid btrfs found\n"); + goto out_devices; + } + + if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_CHANGING_FSID) { + fprintf(stderr, "ERROR: Filesystem UUID change in progress\n"); + goto out_devices; + } + + ASSERT(!memcmp(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE)); + if (btrfs_fs_incompat(fs_info, METADATA_UUID)) + ASSERT(!memcmp(disk_super->metadata_uuid, + fs_devices->metadata_uuid, BTRFS_FSID_SIZE)); + + fs_info->sectorsize = btrfs_super_sectorsize(disk_super); + fs_info->nodesize = btrfs_super_nodesize(disk_super); + fs_info->stripesize = btrfs_super_stripesize(disk_super); + + ret = btrfs_check_fs_compatibility(fs_info->super_copy); + if (ret) + goto out_devices; + + ret = btrfs_setup_chunk_tree_and_device_map(fs_info); + if (ret) + goto out_chunk; + + /* Chunk tree root is unable to read, return directly */ + if (!fs_info->chunk_root) + return fs_info; + + eb = fs_info->chunk_root->node; + read_extent_buffer(eb, fs_info->chunk_tree_uuid, + btrfs_header_chunk_tree_uuid(eb), + BTRFS_UUID_SIZE); + + ret = btrfs_setup_all_roots(fs_info); + if (ret) + goto out_chunk; + + return fs_info; + +out_chunk: + btrfs_release_all_roots(fs_info); + btrfs_cleanup_all_caches(fs_info); +out_devices: + btrfs_close_devices(fs_devices); +out: + btrfs_free_fs_info(fs_info); + return NULL; +} + +int close_ctree_fs_info(struct btrfs_fs_info *fs_info) +{ + int ret; + int err = 0; + + free_fs_roots_tree(&fs_info->fs_root_tree); + + btrfs_release_all_roots(fs_info); + ret = btrfs_close_devices(fs_info->fs_devices); + btrfs_cleanup_all_caches(fs_info); + btrfs_free_fs_info(fs_info); + if (!err) + err = ret; + return err; +} + +int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) +{ + int ret; + + ret = extent_buffer_uptodate(buf); + if (!ret) + return ret; + + ret = verify_parent_transid(&buf->fs_info->extent_cache, buf, + parent_transid, 1); + return !ret; +} + +int btrfs_set_buffer_uptodate(struct extent_buffer *eb) +{ + return set_extent_buffer_uptodate(eb); +} diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h new file mode 100644 index 0000000000..a347912078 --- /dev/null +++ b/fs/btrfs/disk-io.h @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0+ +#ifndef __BTRFS_DISK_IO_H__ +#define __BTRFS_DISK_IO_H__ + +#include +#include +#include "ctree.h" +#include "disk-io.h" + +#define BTRFS_SUPER_INFO_OFFSET SZ_64K +#define BTRFS_SUPER_INFO_SIZE SZ_4K + +/* From btrfs-progs */ +int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror); +struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, + u64 parent_transid); + +int read_extent_data(struct btrfs_fs_info *fs_info, char *data, u64 logical, + u64 *len, int mirror); +struct extent_buffer* btrfs_find_create_tree_block( + struct btrfs_fs_info *fs_info, u64 bytenr); +struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info, + u64 bytenr, u32 blocksize); +struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info, + struct btrfs_key *location); +struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *location); + +void btrfs_setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, + u64 objectid); + +void btrfs_free_fs_info(struct btrfs_fs_info *fs_info); +struct btrfs_fs_info *btrfs_new_fs_info(void); +int btrfs_check_fs_compatibility(struct btrfs_super_block *sb); +int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info); +void btrfs_release_all_roots(struct btrfs_fs_info *fs_info); +void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info); + +struct btrfs_fs_info *open_ctree_fs_info(struct blk_desc *desc, + struct disk_partition *part); +int close_ctree_fs_info(struct btrfs_fs_info *fs_info); + +int btrfs_read_dev_super(struct blk_desc *desc, struct disk_partition *part, + struct btrfs_super_block *sb); +int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); +int btrfs_set_buffer_uptodate(struct extent_buffer *buf); +int btrfs_csum_data(u16 csum_type, const u8 *data, u8 *out, size_t len); +int csum_tree_block_size(struct extent_buffer *buf, u16 csum_sectorsize, + int verify, u16 csum_type); +#endif diff --git a/fs/btrfs/extent-cache.c b/fs/btrfs/extent-cache.c new file mode 100644 index 0000000000..bc8cf3a522 --- /dev/null +++ b/fs/btrfs/extent-cache.c @@ -0,0 +1,318 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Crossported from the same named file of btrfs-progs. + * + * Minor modification to include headers. + */ +#include +#include +#include +#include +#include +#include "extent-cache.h" +#include "common/rbtree-utils.h" + +struct cache_extent_search_range { + u64 objectid; + u64 start; + u64 size; +}; + +static int cache_tree_comp_range(struct rb_node *node, void *data) +{ + struct cache_extent *entry; + struct cache_extent_search_range *range; + + range = (struct cache_extent_search_range *)data; + entry = rb_entry(node, struct cache_extent, rb_node); + + if (entry->start + entry->size <= range->start) + return 1; + else if (range->start + range->size <= entry->start) + return -1; + else + return 0; +} + +static int cache_tree_comp_nodes(struct rb_node *node1, struct rb_node *node2) +{ + struct cache_extent *entry; + struct cache_extent_search_range range; + + entry = rb_entry(node2, struct cache_extent, rb_node); + range.start = entry->start; + range.size = entry->size; + + return cache_tree_comp_range(node1, (void *)&range); +} + +static int cache_tree_comp_range2(struct rb_node *node, void *data) +{ + struct cache_extent *entry; + struct cache_extent_search_range *range; + + range = (struct cache_extent_search_range *)data; + entry = rb_entry(node, struct cache_extent, rb_node); + + if (entry->objectid < range->objectid) + return 1; + else if (entry->objectid > range->objectid) + return -1; + else if (entry->start + entry->size <= range->start) + return 1; + else if (range->start + range->size <= entry->start) + return -1; + else + return 0; +} + +static int cache_tree_comp_nodes2(struct rb_node *node1, struct rb_node *node2) +{ + struct cache_extent *entry; + struct cache_extent_search_range range; + + entry = rb_entry(node2, struct cache_extent, rb_node); + range.objectid = entry->objectid; + range.start = entry->start; + range.size = entry->size; + + return cache_tree_comp_range2(node1, (void *)&range); +} + +void cache_tree_init(struct cache_tree *tree) +{ + tree->root = RB_ROOT; +} + +static struct cache_extent *alloc_cache_extent(u64 start, u64 size) +{ + struct cache_extent *pe = malloc(sizeof(*pe)); + + if (!pe) + return pe; + + pe->objectid = 0; + pe->start = start; + pe->size = size; + return pe; +} + +int add_cache_extent(struct cache_tree *tree, u64 start, u64 size) +{ + struct cache_extent *pe = alloc_cache_extent(start, size); + int ret; + + if (!pe) + return -ENOMEM; + + ret = insert_cache_extent(tree, pe); + if (ret) + free(pe); + + return ret; +} + +int insert_cache_extent(struct cache_tree *tree, struct cache_extent *pe) +{ + return rb_insert(&tree->root, &pe->rb_node, cache_tree_comp_nodes); +} + +int insert_cache_extent2(struct cache_tree *tree, struct cache_extent *pe) +{ + return rb_insert(&tree->root, &pe->rb_node, cache_tree_comp_nodes2); +} + +struct cache_extent *lookup_cache_extent(struct cache_tree *tree, + u64 start, u64 size) +{ + struct rb_node *node; + struct cache_extent *entry; + struct cache_extent_search_range range; + + range.start = start; + range.size = size; + node = rb_search(&tree->root, &range, cache_tree_comp_range, NULL); + if (!node) + return NULL; + + entry = rb_entry(node, struct cache_extent, rb_node); + return entry; +} + +struct cache_extent *lookup_cache_extent2(struct cache_tree *tree, + u64 objectid, u64 start, u64 size) +{ + struct rb_node *node; + struct cache_extent *entry; + struct cache_extent_search_range range; + + range.objectid = objectid; + range.start = start; + range.size = size; + node = rb_search(&tree->root, &range, cache_tree_comp_range2, NULL); + if (!node) + return NULL; + + entry = rb_entry(node, struct cache_extent, rb_node); + return entry; +} + +struct cache_extent *search_cache_extent(struct cache_tree *tree, u64 start) +{ + struct rb_node *next; + struct rb_node *node; + struct cache_extent *entry; + struct cache_extent_search_range range; + + range.start = start; + range.size = 1; + node = rb_search(&tree->root, &range, cache_tree_comp_range, &next); + if (!node) + node = next; + if (!node) + return NULL; + + entry = rb_entry(node, struct cache_extent, rb_node); + return entry; +} + +struct cache_extent *search_cache_extent2(struct cache_tree *tree, + u64 objectid, u64 start) +{ + struct rb_node *next; + struct rb_node *node; + struct cache_extent *entry; + struct cache_extent_search_range range; + + range.objectid = objectid; + range.start = start; + range.size = 1; + node = rb_search(&tree->root, &range, cache_tree_comp_range2, &next); + if (!node) + node = next; + if (!node) + return NULL; + + entry = rb_entry(node, struct cache_extent, rb_node); + return entry; +} + +struct cache_extent *first_cache_extent(struct cache_tree *tree) +{ + struct rb_node *node = rb_first(&tree->root); + + if (!node) + return NULL; + return rb_entry(node, struct cache_extent, rb_node); +} + +struct cache_extent *last_cache_extent(struct cache_tree *tree) +{ + struct rb_node *node = rb_last(&tree->root); + + if (!node) + return NULL; + return rb_entry(node, struct cache_extent, rb_node); +} + +struct cache_extent *prev_cache_extent(struct cache_extent *pe) +{ + struct rb_node *node = rb_prev(&pe->rb_node); + + if (!node) + return NULL; + return rb_entry(node, struct cache_extent, rb_node); +} + +struct cache_extent *next_cache_extent(struct cache_extent *pe) +{ + struct rb_node *node = rb_next(&pe->rb_node); + + if (!node) + return NULL; + return rb_entry(node, struct cache_extent, rb_node); +} + +void remove_cache_extent(struct cache_tree *tree, struct cache_extent *pe) +{ + rb_erase(&pe->rb_node, &tree->root); +} + +void cache_tree_free_extents(struct cache_tree *tree, + free_cache_extent free_func) +{ + struct cache_extent *ce; + + while ((ce = first_cache_extent(tree))) { + remove_cache_extent(tree, ce); + free_func(ce); + } +} + +static void free_extent_cache(struct cache_extent *pe) +{ + free(pe); +} + +void free_extent_cache_tree(struct cache_tree *tree) +{ + cache_tree_free_extents(tree, free_extent_cache); +} + +int add_merge_cache_extent(struct cache_tree *tree, u64 start, u64 size) +{ + struct cache_extent *cache; + struct cache_extent *next = NULL; + struct cache_extent *prev = NULL; + int next_merged = 0; + int prev_merged = 0; + int ret = 0; + + if (cache_tree_empty(tree)) + goto insert; + + cache = search_cache_extent(tree, start); + if (!cache) { + /* + * Either the tree is completely empty, or the no range after + * start. + * Either way, the last cache_extent should be prev. + */ + prev = last_cache_extent(tree); + } else if (start <= cache->start) { + next = cache; + prev = prev_cache_extent(cache); + } else { + prev = cache; + next = next_cache_extent(cache); + } + + /* + * Ensure the range to be inserted won't cover with existings + * Or we will need extra loop to do merge + */ + BUG_ON(next && start + size > next->start); + BUG_ON(prev && prev->start + prev->size > start); + + if (next && start + size == next->start) { + next_merged = 1; + next->size = next->start + next->size - start; + next->start = start; + } + if (prev && prev->start + prev->size == start) { + prev_merged = 1; + if (next_merged) { + next->size = next->start + next->size - prev->start; + next->start = prev->start; + remove_cache_extent(tree, prev); + free(prev); + } else { + prev->size = start + size - prev->start; + } + } +insert: + if (!prev_merged && !next_merged) + ret = add_cache_extent(tree, start, size); + return ret; +} diff --git a/fs/btrfs/extent-cache.h b/fs/btrfs/extent-cache.h new file mode 100644 index 0000000000..2fee81a66e --- /dev/null +++ b/fs/btrfs/extent-cache.h @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Crossported from the same named file of btrfs-progs. + * + * Minor modification to include headers. + */ +#ifndef __BTRFS_EXTENT_CACHE_H__ +#define __BTRFS_EXTENT_CACHE_H__ + +#include +#include + +struct cache_tree { + struct rb_root root; +}; + +struct cache_extent { + struct rb_node rb_node; + u64 objectid; + u64 start; + u64 size; +}; + +void cache_tree_init(struct cache_tree *tree); + +struct cache_extent *first_cache_extent(struct cache_tree *tree); +struct cache_extent *last_cache_extent(struct cache_tree *tree); +struct cache_extent *prev_cache_extent(struct cache_extent *pe); +struct cache_extent *next_cache_extent(struct cache_extent *pe); + +/* + * Find a cache_extent which covers start. + * + * If not found, return next cache_extent if possible. + */ +struct cache_extent *search_cache_extent(struct cache_tree *tree, u64 start); + +/* + * Find a cache_extent which restrictly covers start. + * + * If not found, return NULL. + */ +struct cache_extent *lookup_cache_extent(struct cache_tree *tree, + u64 start, u64 size); + +/* + * Add an non-overlap extent into cache tree + * + * If [start, start+size) overlap with existing one, it will return -EEXIST. + */ +int add_cache_extent(struct cache_tree *tree, u64 start, u64 size); + +/* + * Same with add_cache_extent, but with cache_extent strcut. + */ +int insert_cache_extent(struct cache_tree *tree, struct cache_extent *pe); +void remove_cache_extent(struct cache_tree *tree, struct cache_extent *pe); + +static inline int cache_tree_empty(struct cache_tree *tree) +{ + return RB_EMPTY_ROOT(&tree->root); +} + +typedef void (*free_cache_extent)(struct cache_extent *pe); + +void cache_tree_free_extents(struct cache_tree *tree, + free_cache_extent free_func); + +#define FREE_EXTENT_CACHE_BASED_TREE(name, free_func) \ +static void free_##name##_tree(struct cache_tree *tree) \ +{ \ + cache_tree_free_extents(tree, free_func); \ +} + +void free_extent_cache_tree(struct cache_tree *tree); + +/* + * Search a cache_extent with same objectid, and covers start. + * + * If not found, return next if possible. + */ +struct cache_extent *search_cache_extent2(struct cache_tree *tree, + u64 objectid, u64 start); +/* + * Search a cache_extent with same objectid, and covers the range + * [start, start + size) + * + * If not found, return next cache_extent if possible. + */ +struct cache_extent *lookup_cache_extent2(struct cache_tree *tree, + u64 objectid, u64 start, u64 size); +int insert_cache_extent2(struct cache_tree *tree, struct cache_extent *pe); + +/* + * Insert a cache_extent range [start, start + size). + * + * This function may merge with existing cache_extent. + * NOTE: caller must ensure the inserted range won't cover with any existing + * range. + */ +int add_merge_cache_extent(struct cache_tree *tree, u64 start, u64 size); + +#endif diff --git a/fs/btrfs/extent-io.c b/fs/btrfs/extent-io.c index 2e4599cf64..774e29eb60 100644 --- a/fs/btrfs/extent-io.c +++ b/fs/btrfs/extent-io.c @@ -5,122 +5,805 @@ * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz */ -#include "btrfs.h" +#include +#include #include #include +#include "btrfs.h" +#include "ctree.h" +#include "extent-io.h" +#include "disk-io.h" -u64 btrfs_read_extent_inline(struct btrfs_path *path, - struct btrfs_file_extent_item *extent, u64 offset, - u64 size, char *out) +void extent_io_tree_init(struct extent_io_tree *tree) { - u32 clen, dlen, orig_size = size, res; - const char *cbuf; - char *dbuf; - const int data_off = offsetof(struct btrfs_file_extent_item, - disk_bytenr); + cache_tree_init(&tree->state); + cache_tree_init(&tree->cache); + tree->cache_size = 0; +} - clen = btrfs_path_item_size(path) - data_off; - cbuf = (const char *) extent + data_off; - dlen = extent->ram_bytes; +static struct extent_state *alloc_extent_state(void) +{ + struct extent_state *state; - if (offset > dlen) - return -1ULL; + state = malloc(sizeof(*state)); + if (!state) + return NULL; + state->cache_node.objectid = 0; + state->refs = 1; + state->state = 0; + state->xprivate = 0; + return state; +} - if (size > dlen - offset) - size = dlen - offset; +static void btrfs_free_extent_state(struct extent_state *state) +{ + state->refs--; + BUG_ON(state->refs < 0); + if (state->refs == 0) + free(state); +} - if (extent->compression == BTRFS_COMPRESS_NONE) { - memcpy(out, cbuf + offset, size); - return size; +static void free_extent_state_func(struct cache_extent *cache) +{ + struct extent_state *es; + + es = container_of(cache, struct extent_state, cache_node); + btrfs_free_extent_state(es); +} + +static void free_extent_buffer_final(struct extent_buffer *eb); +void extent_io_tree_cleanup(struct extent_io_tree *tree) +{ + cache_tree_free_extents(&tree->state, free_extent_state_func); +} + +static inline void update_extent_state(struct extent_state *state) +{ + state->cache_node.start = state->start; + state->cache_node.size = state->end + 1 - state->start; +} + +/* + * Utility function to look for merge candidates inside a given range. + * Any extents with matching state are merged together into a single + * extent in the tree. Extents with EXTENT_IO in their state field are + * not merged + */ +static int merge_state(struct extent_io_tree *tree, + struct extent_state *state) +{ + struct extent_state *other; + struct cache_extent *other_node; + + if (state->state & EXTENT_IOBITS) + return 0; + + other_node = prev_cache_extent(&state->cache_node); + if (other_node) { + other = container_of(other_node, struct extent_state, + cache_node); + if (other->end == state->start - 1 && + other->state == state->state) { + state->start = other->start; + update_extent_state(state); + remove_cache_extent(&tree->state, &other->cache_node); + btrfs_free_extent_state(other); + } } + other_node = next_cache_extent(&state->cache_node); + if (other_node) { + other = container_of(other_node, struct extent_state, + cache_node); + if (other->start == state->end + 1 && + other->state == state->state) { + other->start = state->start; + update_extent_state(other); + remove_cache_extent(&tree->state, &state->cache_node); + btrfs_free_extent_state(state); + } + } + return 0; +} - if (dlen > orig_size) { - dbuf = malloc(dlen); - if (!dbuf) - return -1ULL; +/* + * insert an extent_state struct into the tree. 'bits' are set on the + * struct before it is inserted. + */ +static int insert_state(struct extent_io_tree *tree, + struct extent_state *state, u64 start, u64 end, + int bits) +{ + int ret; + + BUG_ON(end < start); + state->state |= bits; + state->start = start; + state->end = end; + update_extent_state(state); + ret = insert_cache_extent(&tree->state, &state->cache_node); + BUG_ON(ret); + merge_state(tree, state); + return 0; +} + +/* + * split a given extent state struct in two, inserting the preallocated + * struct 'prealloc' as the newly created second half. 'split' indicates an + * offset inside 'orig' where it should be split. + */ +static int split_state(struct extent_io_tree *tree, struct extent_state *orig, + struct extent_state *prealloc, u64 split) +{ + int ret; + prealloc->start = orig->start; + prealloc->end = split - 1; + prealloc->state = orig->state; + update_extent_state(prealloc); + orig->start = split; + update_extent_state(orig); + ret = insert_cache_extent(&tree->state, &prealloc->cache_node); + BUG_ON(ret); + return 0; +} + +/* + * clear some bits on a range in the tree. + */ +static int clear_state_bit(struct extent_io_tree *tree, + struct extent_state *state, int bits) +{ + int ret = state->state & bits; + + state->state &= ~bits; + if (state->state == 0) { + remove_cache_extent(&tree->state, &state->cache_node); + btrfs_free_extent_state(state); } else { - dbuf = out; + merge_state(tree, state); } - - res = btrfs_decompress(extent->compression, cbuf, clen, dbuf, dlen); - if (res == -1 || res != dlen) - goto err; - - if (dlen > orig_size) { - memcpy(out, dbuf + offset, size); - free(dbuf); - } else if (offset) { - memmove(out, dbuf + offset, size); - } - - return size; - -err: - if (dlen > orig_size) - free(dbuf); - return -1ULL; + return ret; } -u64 btrfs_read_extent_reg(struct btrfs_path *path, - struct btrfs_file_extent_item *extent, u64 offset, - u64 size, char *out) +/* + * extent_buffer_bitmap_set - set an area of a bitmap + * @eb: the extent buffer + * @start: offset of the bitmap item in the extent buffer + * @pos: bit number of the first bit + * @len: number of bits to set + */ +void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start, + unsigned long pos, unsigned long len) { - u64 physical, clen, dlen, orig_size = size; - u32 res; - char *cbuf, *dbuf; + u8 *p = (u8 *)eb->data + start + BIT_BYTE(pos); + const unsigned int size = pos + len; + int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE); + u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos); - clen = extent->disk_num_bytes; - dlen = extent->num_bytes; - - if (offset > dlen) - return -1ULL; - - if (size > dlen - offset) - size = dlen - offset; - - /* sparse extent */ - if (extent->disk_bytenr == 0) { - memset(out, 0, size); - return size; + while (len >= bits_to_set) { + *p |= mask_to_set; + len -= bits_to_set; + bits_to_set = BITS_PER_BYTE; + mask_to_set = ~0; + p++; } - - physical = btrfs_map_logical_to_physical(extent->disk_bytenr); - if (physical == -1ULL) - return -1ULL; - - if (extent->compression == BTRFS_COMPRESS_NONE) { - physical += extent->offset + offset; - if (!btrfs_devread(physical, size, out)) - return -1ULL; - - return size; + if (len) { + mask_to_set &= BITMAP_LAST_BYTE_MASK(size); + *p |= mask_to_set; } - - cbuf = malloc_cache_aligned(dlen > size ? clen + dlen : clen); - if (!cbuf) - return -1ULL; - - if (dlen > orig_size) - dbuf = cbuf + clen; - else - dbuf = out; - - if (!btrfs_devread(physical, clen, cbuf)) - goto err; - - res = btrfs_decompress(extent->compression, cbuf, clen, dbuf, dlen); - if (res == -1) - goto err; - - if (dlen > orig_size) - memcpy(out, dbuf + offset, size); - else - memmove(out, dbuf + offset, size); - - free(cbuf); - return res; - -err: - free(cbuf); - return -1ULL; +} + +/* + * extent_buffer_bitmap_clear - clear an area of a bitmap + * @eb: the extent buffer + * @start: offset of the bitmap item in the extent buffer + * @pos: bit number of the first bit + * @len: number of bits to clear + */ +void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start, + unsigned long pos, unsigned long len) +{ + u8 *p = (u8 *)eb->data + start + BIT_BYTE(pos); + const unsigned int size = pos + len; + int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE); + u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos); + + while (len >= bits_to_clear) { + *p &= ~mask_to_clear; + len -= bits_to_clear; + bits_to_clear = BITS_PER_BYTE; + mask_to_clear = ~0; + p++; + } + if (len) { + mask_to_clear &= BITMAP_LAST_BYTE_MASK(size); + *p &= ~mask_to_clear; + } +} + +/* + * clear some bits on a range in the tree. + */ +int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct cache_extent *node; + u64 last_end; + int err; + int set = 0; + +again: + if (!prealloc) { + prealloc = alloc_extent_state(); + if (!prealloc) + return -ENOMEM; + } + + /* + * this search will find the extents that end after + * our range starts + */ + node = search_cache_extent(&tree->state, start); + if (!node) + goto out; + state = container_of(node, struct extent_state, cache_node); + if (state->start > end) + goto out; + last_end = state->end; + + /* + * | ---- desired range ---- | + * | state | or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip + * bits on second half. + * + * If the extent we found extends past our range, we + * just split and search again. It'll get split again + * the next time though. + * + * If the extent we found is inside our range, we clear + * the desired bit on it. + */ + if (state->start < start) { + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + set |= clear_state_bit(tree, state, bits); + if (last_end == (u64)-1) + goto out; + start = last_end + 1; + } else { + start = state->start; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and clear the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + set |= clear_state_bit(tree, prealloc, bits); + prealloc = NULL; + goto out; + } + + start = state->end + 1; + set |= clear_state_bit(tree, state, bits); + if (last_end == (u64)-1) + goto out; + start = last_end + 1; + goto search_again; +out: + if (prealloc) + btrfs_free_extent_state(prealloc); + return set; + +search_again: + if (start > end) + goto out; + goto again; +} + +/* + * set some bits on a range in the tree. + */ +int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct cache_extent *node; + int err = 0; + u64 last_start; + u64 last_end; +again: + if (!prealloc) { + prealloc = alloc_extent_state(); + if (!prealloc) + return -ENOMEM; + } + + /* + * this search will find the extents that end after + * our range starts + */ + node = search_cache_extent(&tree->state, start); + if (!node) { + err = insert_state(tree, prealloc, start, end, bits); + BUG_ON(err == -EEXIST); + prealloc = NULL; + goto out; + } + + state = container_of(node, struct extent_state, cache_node); + last_start = state->start; + last_end = state->end; + + /* + * | ---- desired range ---- | + * | state | + * + * Just lock what we found and keep going + */ + if (state->start == start && state->end <= end) { + state->state |= bits; + merge_state(tree, state); + if (last_end == (u64)-1) + goto out; + start = last_end + 1; + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip bits on + * second half. + * + * If the extent we found extends past our + * range, we just split and search again. It'll get split + * again the next time though. + * + * If the extent we found is inside our range, we set the + * desired bit on it. + */ + if (state->start < start) { + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + state->state |= bits; + start = state->end + 1; + merge_state(tree, state); + if (last_end == (u64)-1) + goto out; + start = last_end + 1; + } else { + start = state->start; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | or | state | + * + * There's a hole, we need to insert something in it and + * ignore the extent we found. + */ + if (state->start > start) { + u64 this_end; + if (end < last_start) + this_end = end; + else + this_end = last_start -1; + err = insert_state(tree, prealloc, start, this_end, + bits); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + start = this_end + 1; + goto search_again; + } + /* + * | ---- desired range ---- | + * | ---------- state ---------- | + * We need to split the extent, and set the bit + * on the first half + */ + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + state->state |= bits; + merge_state(tree, prealloc); + prealloc = NULL; +out: + if (prealloc) + btrfs_free_extent_state(prealloc); + return err; +search_again: + if (start > end) + goto out; + goto again; +} + +int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end) +{ + return set_extent_bits(tree, start, end, EXTENT_DIRTY); +} + +int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end) +{ + return clear_extent_bits(tree, start, end, EXTENT_DIRTY); +} + +int find_first_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits) +{ + struct cache_extent *node; + struct extent_state *state; + int ret = 1; + + /* + * this search will find all the extents that end after + * our range starts. + */ + node = search_cache_extent(&tree->state, start); + if (!node) + goto out; + + while(1) { + state = container_of(node, struct extent_state, cache_node); + if (state->end >= start && (state->state & bits)) { + *start_ret = state->start; + *end_ret = state->end; + ret = 0; + break; + } + node = next_cache_extent(node); + if (!node) + break; + } +out: + return ret; +} + +int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int filled) +{ + struct extent_state *state = NULL; + struct cache_extent *node; + int bitset = 0; + + node = search_cache_extent(&tree->state, start); + while (node && start <= end) { + state = container_of(node, struct extent_state, cache_node); + + if (filled && state->start > start) { + bitset = 0; + break; + } + if (state->start > end) + break; + if (state->state & bits) { + bitset = 1; + if (!filled) + break; + } else if (filled) { + bitset = 0; + break; + } + start = state->end + 1; + if (start > end) + break; + node = next_cache_extent(node); + if (!node) { + if (filled) + bitset = 0; + break; + } + } + return bitset; +} + +int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) +{ + struct cache_extent *node; + struct extent_state *state; + int ret = 0; + + node = search_cache_extent(&tree->state, start); + if (!node) { + ret = -ENOENT; + goto out; + } + state = container_of(node, struct extent_state, cache_node); + if (state->start != start) { + ret = -ENOENT; + goto out; + } + state->xprivate = private; +out: + return ret; +} + +int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) +{ + struct cache_extent *node; + struct extent_state *state; + int ret = 0; + + node = search_cache_extent(&tree->state, start); + if (!node) { + ret = -ENOENT; + goto out; + } + state = container_of(node, struct extent_state, cache_node); + if (state->start != start) { + ret = -ENOENT; + goto out; + } + *private = state->xprivate; +out: + return ret; +} + +static struct extent_buffer *__alloc_extent_buffer(struct btrfs_fs_info *info, + u64 bytenr, u32 blocksize) +{ + struct extent_buffer *eb; + + eb = calloc(1, sizeof(struct extent_buffer)); + if (!eb) + return NULL; + eb->data = malloc_cache_aligned(blocksize); + if (!eb->data) { + free(eb); + return NULL; + } + + eb->start = bytenr; + eb->len = blocksize; + eb->refs = 1; + eb->flags = 0; + eb->cache_node.start = bytenr; + eb->cache_node.size = blocksize; + eb->fs_info = info; + memset_extent_buffer(eb, 0, 0, blocksize); + + return eb; +} + +struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) +{ + struct extent_buffer *new; + + new = __alloc_extent_buffer(src->fs_info, src->start, src->len); + if (!new) + return NULL; + + copy_extent_buffer(new, src, 0, 0, src->len); + new->flags |= EXTENT_BUFFER_DUMMY; + + return new; +} + +static void free_extent_buffer_final(struct extent_buffer *eb) +{ + BUG_ON(eb->refs); + if (!(eb->flags & EXTENT_BUFFER_DUMMY)) { + struct extent_io_tree *tree = &eb->fs_info->extent_cache; + + remove_cache_extent(&tree->cache, &eb->cache_node); + BUG_ON(tree->cache_size < eb->len); + tree->cache_size -= eb->len; + } + free(eb->data); + free(eb); +} + +static void free_extent_buffer_internal(struct extent_buffer *eb, bool free_now) +{ + if (!eb || IS_ERR(eb)) + return; + + eb->refs--; + BUG_ON(eb->refs < 0); + if (eb->refs == 0) { + if (eb->flags & EXTENT_DIRTY) { + error( + "dirty eb leak (aborted trans): start %llu len %u", + eb->start, eb->len); + } + if (eb->flags & EXTENT_BUFFER_DUMMY || free_now) + free_extent_buffer_final(eb); + } +} + +void free_extent_buffer(struct extent_buffer *eb) +{ + free_extent_buffer_internal(eb, 1); +} + +struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, + u64 bytenr, u32 blocksize) +{ + struct extent_buffer *eb = NULL; + struct cache_extent *cache; + + cache = lookup_cache_extent(&tree->cache, bytenr, blocksize); + if (cache && cache->start == bytenr && + cache->size == blocksize) { + eb = container_of(cache, struct extent_buffer, cache_node); + eb->refs++; + } + return eb; +} + +struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree, + u64 start) +{ + struct extent_buffer *eb = NULL; + struct cache_extent *cache; + + cache = search_cache_extent(&tree->cache, start); + if (cache) { + eb = container_of(cache, struct extent_buffer, cache_node); + eb->refs++; + } + return eb; +} + +struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, + u64 bytenr, u32 blocksize) +{ + struct extent_buffer *eb; + struct extent_io_tree *tree = &fs_info->extent_cache; + struct cache_extent *cache; + + cache = lookup_cache_extent(&tree->cache, bytenr, blocksize); + if (cache && cache->start == bytenr && + cache->size == blocksize) { + eb = container_of(cache, struct extent_buffer, cache_node); + eb->refs++; + } else { + int ret; + + if (cache) { + eb = container_of(cache, struct extent_buffer, + cache_node); + free_extent_buffer(eb); + } + eb = __alloc_extent_buffer(fs_info, bytenr, blocksize); + if (!eb) + return NULL; + ret = insert_cache_extent(&tree->cache, &eb->cache_node); + if (ret) { + free(eb); + return NULL; + } + tree->cache_size += blocksize; + } + return eb; +} + +/* + * Allocate a dummy extent buffer which won't be inserted into extent buffer + * cache. + * + * This mostly allows super block read write using existing eb infrastructure + * without pulluting the eb cache. + * + * This is especially important to avoid injecting eb->start == SZ_64K, as + * fuzzed image could have invalid tree bytenr covers super block range, + * and cause ref count underflow. + */ +struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, + u64 bytenr, u32 blocksize) +{ + struct extent_buffer *ret; + + ret = __alloc_extent_buffer(fs_info, bytenr, blocksize); + if (!ret) + return NULL; + + ret->flags |= EXTENT_BUFFER_DUMMY; + + return ret; +} + +int read_extent_from_disk(struct blk_desc *desc, struct disk_partition *part, + u64 physical, struct extent_buffer *eb, + unsigned long offset, unsigned long len) +{ + int ret; + + ret = __btrfs_devread(desc, part, eb->data + offset, len, physical); + if (ret < 0) + goto out; + if (ret != len) { + ret = -EIO; + goto out; + } + ret = 0; +out: + return ret; +} + +int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv, + unsigned long start, unsigned long len) +{ + return memcmp(eb->data + start, ptrv, len); +} + +void read_extent_buffer(const struct extent_buffer *eb, void *dst, + unsigned long start, unsigned long len) +{ + memcpy(dst, eb->data + start, len); +} + +void write_extent_buffer(struct extent_buffer *eb, const void *src, + unsigned long start, unsigned long len) +{ + memcpy(eb->data + start, src, len); +} + +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len) +{ + memcpy(dst->data + dst_offset, src->data + src_offset, len); +} + +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + memmove(dst->data + dst_offset, dst->data + src_offset, len); +} + +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len) +{ + memset(eb->data + start, c, len); +} + +int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start, + unsigned long nr) +{ + return le_test_bit(nr, (u8 *)eb->data + start); +} + +int set_extent_buffer_dirty(struct extent_buffer *eb) +{ + struct extent_io_tree *tree = &eb->fs_info->extent_cache; + if (!(eb->flags & EXTENT_DIRTY)) { + eb->flags |= EXTENT_DIRTY; + set_extent_dirty(tree, eb->start, eb->start + eb->len - 1); + extent_buffer_get(eb); + } + return 0; +} + +int clear_extent_buffer_dirty(struct extent_buffer *eb) +{ + struct extent_io_tree *tree = &eb->fs_info->extent_cache; + if (eb->flags & EXTENT_DIRTY) { + eb->flags &= ~EXTENT_DIRTY; + clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1); + free_extent_buffer(eb); + } + return 0; } diff --git a/fs/btrfs/extent-io.h b/fs/btrfs/extent-io.h new file mode 100644 index 0000000000..6b0c87da96 --- /dev/null +++ b/fs/btrfs/extent-io.h @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Crossported from btrfs-progs/extent_io.h + * + * Modification includes: + * - extent_buffer:data + * Use pointer to provide better alignment. + * - Remove max_cache_size related interfaces + * Includes free_extent_buffer_nocache() + * As we don't cache eb in U-boot. + * - Include headers + * + * Write related functions are kept as we still need to modify dummy extent + * buffers even in RO environment. + */ +#ifndef __BTRFS_EXTENT_IO_H__ +#define __BTRFS_EXTENT_IO_H__ + +#include +#include +#include +#include +#include +#include "extent-cache.h" + +#define EXTENT_DIRTY (1U << 0) +#define EXTENT_WRITEBACK (1U << 1) +#define EXTENT_UPTODATE (1U << 2) +#define EXTENT_LOCKED (1U << 3) +#define EXTENT_NEW (1U << 4) +#define EXTENT_DELALLOC (1U << 5) +#define EXTENT_DEFRAG (1U << 6) +#define EXTENT_DEFRAG_DONE (1U << 7) +#define EXTENT_BUFFER_FILLED (1U << 8) +#define EXTENT_CSUM (1U << 9) +#define EXTENT_BAD_TRANSID (1U << 10) +#define EXTENT_BUFFER_DUMMY (1U << 11) +#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) + +#define BLOCK_GROUP_DATA (1U << 1) +#define BLOCK_GROUP_METADATA (1U << 2) +#define BLOCK_GROUP_SYSTEM (1U << 4) + +/* + * The extent buffer bitmap operations are done with byte granularity instead of + * word granularity for two reasons: + * 1. The bitmaps must be little-endian on disk. + * 2. Bitmap items are not guaranteed to be aligned to a word and therefore a + * single word in a bitmap may straddle two pages in the extent buffer. + */ +#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE) +#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1) +#define BITMAP_FIRST_BYTE_MASK(start) \ + ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK) +#define BITMAP_LAST_BYTE_MASK(nbits) \ + (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1))) + +static inline int le_test_bit(int nr, const u8 *addr) +{ + return 1U & (addr[BIT_BYTE(nr)] >> (nr & (BITS_PER_BYTE-1))); +} + +struct btrfs_fs_info; + +struct extent_io_tree { + struct cache_tree state; + struct cache_tree cache; + u64 cache_size; +}; + +struct extent_state { + struct cache_extent cache_node; + u64 start; + u64 end; + int refs; + unsigned long state; + u64 xprivate; +}; + +struct extent_buffer { + struct cache_extent cache_node; + u64 start; + u32 len; + int refs; + u32 flags; + struct btrfs_fs_info *fs_info; + char *data; +}; + +static inline void extent_buffer_get(struct extent_buffer *eb) +{ + eb->refs++; +} + +void extent_io_tree_init(struct extent_io_tree *tree); +void extent_io_tree_cleanup(struct extent_io_tree *tree); +int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits); +int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits); +int find_first_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits); +int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int filled); +int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end); +int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end); +static inline int set_extent_buffer_uptodate(struct extent_buffer *eb) +{ + eb->flags |= EXTENT_UPTODATE; + return 0; +} + +static inline int clear_extent_buffer_uptodate(struct extent_buffer *eb) +{ + eb->flags &= ~EXTENT_UPTODATE; + return 0; +} + +static inline int extent_buffer_uptodate(struct extent_buffer *eb) +{ + if (!eb || IS_ERR(eb)) + return 0; + if (eb->flags & EXTENT_UPTODATE) + return 1; + return 0; +} + +int set_state_private(struct extent_io_tree *tree, u64 start, u64 xprivate); +int get_state_private(struct extent_io_tree *tree, u64 start, u64 *xprivate); +struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, + u64 bytenr, u32 blocksize); +struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree, + u64 start); +struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, + u64 bytenr, u32 blocksize); +struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); +struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, + u64 bytenr, u32 blocksize); +void free_extent_buffer(struct extent_buffer *eb); +int read_extent_from_disk(struct blk_desc *desc, struct disk_partition *part, + u64 physical, struct extent_buffer *eb, + unsigned long offset, unsigned long len); +int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv, + unsigned long start, unsigned long len); +void read_extent_buffer(const struct extent_buffer *eb, void *dst, + unsigned long start, unsigned long len); +void write_extent_buffer(struct extent_buffer *eb, const void *src, + unsigned long start, unsigned long len); +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len); +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len); +int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start, + unsigned long nr); +int set_extent_buffer_dirty(struct extent_buffer *eb); +int clear_extent_buffer_dirty(struct extent_buffer *eb); +void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start, + unsigned long pos, unsigned long len); +void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start, + unsigned long pos, unsigned long len); + +#endif diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c deleted file mode 100644 index 52a8ceaf0c..0000000000 --- a/fs/btrfs/hash.c +++ /dev/null @@ -1,38 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * BTRFS filesystem implementation for U-Boot - * - * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz - */ - -#include "btrfs.h" -#include -#include - -static u32 btrfs_crc32c_table[256]; - -void btrfs_hash_init(void) -{ - static int inited = 0; - - if (!inited) { - crc32c_init(btrfs_crc32c_table, 0x82F63B78); - inited = 1; - } -} - -u32 btrfs_crc32c(u32 crc, const void *data, size_t length) -{ - return crc32c_cal(crc, (const char *) data, length, - btrfs_crc32c_table); -} - -u32 btrfs_csum_data(char *data, u32 seed, size_t len) -{ - return btrfs_crc32c(seed, data, len); -} - -void btrfs_csum_final(u32 crc, void *result) -{ - put_unaligned(cpu_to_le32(~crc), (u32 *)result); -} diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 991c2f68c3..ff330280e0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5,193 +5,187 @@ * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz */ -#include "btrfs.h" +#include #include +#include +#include "btrfs.h" +#include "disk-io.h" +#include "volumes.h" -u64 btrfs_lookup_inode_ref(struct btrfs_root *root, u64 inr, - struct btrfs_inode_ref *refp, char *name) -{ - struct btrfs_path path; - struct btrfs_key *key; - struct btrfs_inode_ref *ref; - u64 res = -1ULL; - - key = btrfs_search_tree_key_type(root, inr, BTRFS_INODE_REF_KEY, - &path); - - if (!key) - return -1ULL; - - ref = btrfs_path_item_ptr(&path, struct btrfs_inode_ref); - btrfs_inode_ref_to_cpu(ref); - - if (refp) - *refp = *ref; - - if (name) { - if (ref->name_len > BTRFS_NAME_MAX) { - printf("%s: inode name too long: %u\n", __func__, - ref->name_len); - goto out; - } - - memcpy(name, ref + 1, ref->name_len); - } - - res = key->offset; -out: - btrfs_free_path(&path); - return res; -} - -int btrfs_lookup_inode(const struct btrfs_root *root, - struct btrfs_key *location, - struct btrfs_inode_item *item, - struct btrfs_root *new_root) -{ - struct btrfs_root tmp_root = *root; - struct btrfs_path path; - int res = -1; - - if (location->type == BTRFS_ROOT_ITEM_KEY) { - if (btrfs_find_root(location->objectid, &tmp_root, NULL)) - return -1; - - location->objectid = tmp_root.root_dirid; - location->type = BTRFS_INODE_ITEM_KEY; - location->offset = 0; - } - - if (btrfs_search_tree(&tmp_root, location, &path)) - return res; - - if (btrfs_comp_keys(location, btrfs_path_leaf_key(&path))) - goto out; - - if (item) { - *item = *btrfs_path_item_ptr(&path, struct btrfs_inode_item); - btrfs_inode_item_to_cpu(item); - } - - if (new_root) - *new_root = tmp_root; - - res = 0; - -out: - btrfs_free_path(&path); - return res; -} - -int btrfs_readlink(const struct btrfs_root *root, u64 inr, char *target) +/* + * Read the content of symlink inode @ino of @root, into @target. + * NOTE: @target will not be \0 termiated, caller should handle it properly. + * + * Return the number of read data. + * Return <0 for error. + */ +int btrfs_readlink(struct btrfs_root *root, u64 ino, char *target) { struct btrfs_path path; struct btrfs_key key; - struct btrfs_file_extent_item *extent; - const char *data_ptr; - int res = -1; + struct btrfs_file_extent_item *fi; + int ret; - key.objectid = inr; + key.objectid = ino; key.type = BTRFS_EXTENT_DATA_KEY; key.offset = 0; + btrfs_init_path(&path); - if (btrfs_search_tree(root, &key, &path)) - return -1; - - if (btrfs_comp_keys(&key, btrfs_path_leaf_key(&path))) - goto out; - - extent = btrfs_path_item_ptr(&path, struct btrfs_file_extent_item); - if (extent->type != BTRFS_FILE_EXTENT_INLINE) { - printf("%s: Extent for symlink %llu not of INLINE type\n", - __func__, inr); + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + return ret; + if (ret > 0) { + ret = -ENOENT; goto out; } - - btrfs_file_extent_item_to_cpu_inl(extent); - - if (extent->compression != BTRFS_COMPRESS_NONE) { - printf("%s: Symlink %llu extent data compressed!\n", __func__, - inr); - goto out; - } else if (extent->encryption != 0) { - printf("%s: Symlink %llu extent data encrypted!\n", __func__, - inr); - goto out; - } else if (extent->ram_bytes >= btrfs_info.sb.sectorsize) { - printf("%s: Symlink %llu extent data too long (%llu)!\n", - __func__, inr, extent->ram_bytes); + fi = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(path.nodes[0], fi) != + BTRFS_FILE_EXTENT_INLINE) { + ret = -EUCLEAN; + error("Extent for symlink %llu must be INLINE type!", ino); goto out; } - - data_ptr = (const char *) extent - + offsetof(struct btrfs_file_extent_item, disk_bytenr); - - memcpy(target, data_ptr, extent->ram_bytes); - target[extent->ram_bytes] = '\0'; - res = 0; + if (btrfs_file_extent_compression(path.nodes[0], fi) != + BTRFS_COMPRESS_NONE) { + ret = -EUCLEAN; + error("Extent for symlink %llu must not be compressed!", ino); + goto out; + } + if (btrfs_file_extent_ram_bytes(path.nodes[0], fi) >= + root->fs_info->sectorsize) { + ret = -EUCLEAN; + error("Symlink %llu extent data too large (%llu)!\n", + ino, btrfs_file_extent_ram_bytes(path.nodes[0], fi)); + goto out; + } + read_extent_buffer(path.nodes[0], target, + btrfs_file_extent_inline_start(fi), + btrfs_file_extent_ram_bytes(path.nodes[0], fi)); + ret = btrfs_file_extent_ram_bytes(path.nodes[0], fi); out: - btrfs_free_path(&path); - return res; + btrfs_release_path(&path); + return ret; } -/* inr must be a directory (for regular files with multiple hard links this - function returns only one of the parents of the file) */ -static u64 get_parent_inode(struct btrfs_root *root, u64 inr, - struct btrfs_inode_item *inode_item) +static int lookup_root_ref(struct btrfs_fs_info *fs_info, + u64 rootid, u64 *root_ret, u64 *dir_ret) { + struct btrfs_root *root = fs_info->tree_root; + struct btrfs_root_ref *root_ref; + struct btrfs_path path; struct btrfs_key key; - u64 res; + int ret; - if (inr == BTRFS_FIRST_FREE_OBJECTID) { - if (root->objectid != btrfs_info.fs_root.objectid) { - u64 parent; - struct btrfs_root_ref ref; + btrfs_init_path(&path); + key.objectid = rootid; + key.type = BTRFS_ROOT_BACKREF_KEY; + key.offset = (u64)-1; - parent = btrfs_lookup_root_ref(root->objectid, &ref, - NULL); - if (parent == -1ULL) - return -1ULL; + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + return ret; + /* Should not happen */ + if (ret == 0) { + ret = -EUCLEAN; + goto out; + } + ret = btrfs_previous_item(root, &path, rootid, BTRFS_ROOT_BACKREF_KEY); + if (ret < 0) + goto out; + if (ret > 0) { + ret = -ENOENT; + goto out; + } + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + root_ref = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_root_ref); + *root_ret = key.offset; + *dir_ret = btrfs_root_ref_dirid(path.nodes[0], root_ref); +out: + btrfs_release_path(&path); + return ret; +} - if (btrfs_find_root(parent, root, NULL)) - return -1ULL; +/* + * To get the parent inode of @ino of @root. + * + * @root_ret and @ino_ret will be filled. + * + * NOTE: This function is not reliable. It can only get one parent inode. + * The get the proper parent inode, we need a full VFS inodes stack to + * resolve properly. + */ +static int get_parent_inode(struct btrfs_root *root, u64 ino, + struct btrfs_root **root_ret, u64 *ino_ret) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_path path; + struct btrfs_key key; + int ret; - inr = ref.dirid; + if (ino == BTRFS_FIRST_FREE_OBJECTID) { + u64 parent_root = -1; + + /* It's top level already, no more parent */ + if (root->root_key.objectid == BTRFS_FS_TREE_OBJECTID) { + *root_ret = fs_info->fs_root; + *ino_ret = BTRFS_FIRST_FREE_OBJECTID; + return 0; } - if (inode_item) { - key.objectid = inr; - key.type = BTRFS_INODE_ITEM_KEY; - key.offset = 0; + ret = lookup_root_ref(fs_info, root->root_key.objectid, + &parent_root, ino_ret); + if (ret < 0) + return ret; - if (btrfs_lookup_inode(root, &key, inode_item, NULL)) - return -1ULL; - } + key.objectid = parent_root; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + *root_ret = btrfs_read_fs_root(fs_info, &key); + if (IS_ERR(*root_ret)) + return PTR_ERR(*root_ret); - return inr; + return 0; } - res = btrfs_lookup_inode_ref(root, inr, NULL, NULL); - if (res == -1ULL) - return -1ULL; + btrfs_init_path(&path); + key.objectid = ino; + key.type = BTRFS_INODE_REF_KEY; + key.offset = (u64)-1; - if (inode_item) { - key.objectid = res; - key.type = BTRFS_INODE_ITEM_KEY; - key.offset = 0; - - if (btrfs_lookup_inode(root, &key, inode_item, NULL)) - return -1ULL; + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + return ret; + /* Should not happen */ + if (ret == 0) { + ret = -EUCLEAN; + goto out; } - - return res; + ret = btrfs_previous_item(root, &path, ino, BTRFS_INODE_REF_KEY); + if (ret < 0) + goto out; + if (ret > 0) { + ret = -ENOENT; + goto out; + } + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + *root_ret = root; + *ino_ret = key.offset; +out: + btrfs_release_path(&path); + return ret; } static inline int next_length(const char *path) { int res = 0; - while (*path != '\0' && *path != '/' && res <= BTRFS_NAME_LEN) - ++res, ++path; + while (*path != '\0' && *path != '/') { + ++res; + ++path; + if (res > BTRFS_NAME_LEN) + break; + } return res; } @@ -209,175 +203,549 @@ static inline const char *skip_current_directories(const char *cur) return cur; } -u64 btrfs_lookup_path(struct btrfs_root *root, u64 inr, const char *path, - u8 *type_p, struct btrfs_inode_item *inode_item_p, - int symlink_limit) +/* + * Resolve one filename of @ino of @root. + * + * key_ret: The child key (either INODE_ITEM or ROOT_ITEM type) + * type_ret: BTRFS_FT_* of the child inode. + * + * Return 0 with above members filled. + * Return <0 for error. + */ +static int resolve_one_filename(struct btrfs_root *root, u64 ino, + const char *name, int namelen, + struct btrfs_key *key_ret, u8 *type_ret) { - struct btrfs_dir_item item; - struct btrfs_inode_item inode_item; - u8 type = BTRFS_FT_DIR; - int len, have_inode = 0; - const char *cur = path; + struct btrfs_dir_item *dir_item; + struct btrfs_path path; + int ret = 0; - if (*cur == '/') { - ++cur; - inr = root->root_dirid; + btrfs_init_path(&path); + + dir_item = btrfs_lookup_dir_item(NULL, root, &path, ino, name, + namelen, 0); + if (IS_ERR(dir_item)) { + ret = PTR_ERR(dir_item); + goto out; } - do { + btrfs_dir_item_key_to_cpu(path.nodes[0], dir_item, key_ret); + *type_ret = btrfs_dir_type(path.nodes[0], dir_item); +out: + btrfs_release_path(&path); + return ret; +} + +/* + * Resolve a full path @filename. The start point is @ino of @root. + * + * The result will be filled into @root_ret, @ino_ret and @type_ret. + */ +int btrfs_lookup_path(struct btrfs_root *root, u64 ino, const char *filename, + struct btrfs_root **root_ret, u64 *ino_ret, + u8 *type_ret, int symlink_limit) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_root *next_root; + struct btrfs_key key; + const char *cur = filename; + u64 next_ino; + u8 next_type; + u8 type; + int len; + int ret = 0; + + /* If the path is absolute path, also search from fs root */ + if (*cur == '/') { + root = fs_info->fs_root; + ino = btrfs_root_dirid(&root->root_item); + type = BTRFS_FT_DIR; + } + + while (*cur != '\0') { cur = skip_current_directories(cur); len = next_length(cur); if (len > BTRFS_NAME_LEN) { - printf("%s: Name too long at \"%.*s\"\n", __func__, + error("%s: Name too long at \"%.*s\"", __func__, BTRFS_NAME_LEN, cur); - return -1ULL; + return -ENAMETOOLONG; } if (len == 1 && cur[0] == '.') break; if (len == 2 && cur[0] == '.' && cur[1] == '.') { - cur += 2; - inr = get_parent_inode(root, inr, &inode_item); - if (inr == -1ULL) - return -1ULL; - - type = BTRFS_FT_DIR; - continue; + /* Go one level up */ + ret = get_parent_inode(root, ino, &next_root, &next_ino); + if (ret < 0) + return ret; + root = next_root; + ino = next_ino; + goto next; } if (!*cur) break; - - if (btrfs_lookup_dir_item(root, inr, cur, len, &item)) - return -1ULL; - type = item.type; - have_inode = 1; - if (btrfs_lookup_inode(root, &item.location, &inode_item, root)) - return -1ULL; + ret = resolve_one_filename(root, ino, cur, len, &key, &type); + if (ret < 0) + return ret; + + if (key.type == BTRFS_ROOT_ITEM_KEY) { + /* Child inode is a subvolume */ + + next_root = btrfs_read_fs_root(fs_info, &key); + if (IS_ERR(next_root)) + return PTR_ERR(next_root); + root = next_root; + ino = btrfs_root_dirid(&root->root_item); + } else if (type == BTRFS_FT_SYMLINK && symlink_limit >= 0) { + /* Child inode is a symlink */ - if (item.type == BTRFS_FT_SYMLINK && symlink_limit >= 0) { char *target; - if (!symlink_limit) { - printf("%s: Too much symlinks!\n", __func__); - return -1ULL; + if (symlink_limit == 0) { + error("%s: Too much symlinks!", __func__); + return -EMLINK; } - - target = malloc(min(inode_item.size + 1, - (u64) btrfs_info.sb.sectorsize)); + target = malloc(fs_info->sectorsize); if (!target) - return -1ULL; - - if (btrfs_readlink(root, item.location.objectid, - target)) { + return -ENOMEM; + ret = btrfs_readlink(root, key.objectid, target); + if (ret < 0) { free(target); - return -1ULL; + return ret; } + target[ret] = '\0'; - inr = btrfs_lookup_path(root, inr, target, &type, - &inode_item, symlink_limit - 1); - - free(target); - - if (inr == -1ULL) - return -1ULL; - } else if (item.type != BTRFS_FT_DIR && cur[len]) { - printf("%s: \"%.*s\" not a directory\n", __func__, - (int) (cur - path + len), path); - return -1ULL; + ret = btrfs_lookup_path(root, ino, target, &next_root, + &next_ino, &next_type, + symlink_limit); + if (ret < 0) + return ret; + root = next_root; + ino = next_ino; + type = next_type; } else { - inr = item.location.objectid; + /* Child inode is an inode */ + ino = key.objectid; } - +next: cur += len; - } while (*cur); - - if (type_p) - *type_p = type; - - if (inode_item_p) { - if (!have_inode) { - struct btrfs_key key; - - key.objectid = inr; - key.type = BTRFS_INODE_ITEM_KEY; - key.offset = 0; - - if (btrfs_lookup_inode(root, &key, &inode_item, NULL)) - return -1ULL; - } - - *inode_item_p = inode_item; } - return inr; + if (!ret) { + *root_ret = root; + *ino_ret = ino; + *type_ret = type; + } + + return ret; } -u64 btrfs_file_read(const struct btrfs_root *root, u64 inr, u64 offset, - u64 size, char *buf) +/* + * Read out inline extent. + * + * Since inline extent should only exist for offset 0, no need for extra + * parameters. + * Truncating should be handled by the caller. + * + * Return the number of bytes read. + * Return <0 for error. + */ +int btrfs_read_extent_inline(struct btrfs_path *path, + struct btrfs_file_extent_item *fi, char *dest) { + struct extent_buffer *leaf = path->nodes[0]; + int slot = path->slots[0]; + char *cbuf = NULL; + char *dbuf = NULL; + u32 csize; + u32 dsize; + int ret; + + csize = btrfs_file_extent_inline_item_len(leaf, btrfs_item_nr(slot)); + if (btrfs_file_extent_compression(leaf, fi) == BTRFS_COMPRESS_NONE) { + /* Uncompressed, just read it out */ + read_extent_buffer(leaf, dest, + btrfs_file_extent_inline_start(fi), + csize); + return csize; + } + + /* Compressed extent, prepare the compressed and data buffer */ + dsize = btrfs_file_extent_ram_bytes(leaf, fi); + cbuf = malloc(csize); + dbuf = malloc(dsize); + if (!cbuf || !dbuf) { + ret = -ENOMEM; + goto out; + } + read_extent_buffer(leaf, cbuf, btrfs_file_extent_inline_start(fi), + csize); + ret = btrfs_decompress(btrfs_file_extent_compression(leaf, fi), + cbuf, csize, dbuf, dsize); + if (ret < 0 || ret != dsize) { + ret = -EIO; + goto out; + } + memcpy(dest, dbuf, dsize); + ret = dsize; +out: + free(cbuf); + free(dbuf); + return ret; +} + +/* + * Read out regular extent. + * + * Truncating should be handled by the caller. + * + * @offset and @len should not cross the extent boundary. + * Return the number of bytes read. + * Return <0 for error. + */ +int btrfs_read_extent_reg(struct btrfs_path *path, + struct btrfs_file_extent_item *fi, u64 offset, + int len, char *dest) +{ + struct extent_buffer *leaf = path->nodes[0]; + struct btrfs_fs_info *fs_info = leaf->fs_info; + struct btrfs_key key; + u64 extent_num_bytes; + u64 disk_bytenr; + u64 read; + char *cbuf = NULL; + char *dbuf = NULL; + u32 csize; + u32 dsize; + bool finished = false; + int num_copies; + int i; + int slot = path->slots[0]; + int ret; + + btrfs_item_key_to_cpu(leaf, &key, slot); + extent_num_bytes = btrfs_file_extent_num_bytes(leaf, fi); + ASSERT(IS_ALIGNED(offset, fs_info->sectorsize) && + IS_ALIGNED(len, fs_info->sectorsize)); + ASSERT(offset >= key.offset && + offset + len <= key.offset + extent_num_bytes); + + /* Preallocated or hole , fill @dest with zero */ + if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_PREALLOC || + btrfs_file_extent_disk_bytenr(leaf, fi) == 0) { + memset(dest, 0, len); + return len; + } + + if (btrfs_file_extent_compression(leaf, fi) == BTRFS_COMPRESS_NONE) { + u64 logical; + + logical = btrfs_file_extent_disk_bytenr(leaf, fi) + + btrfs_file_extent_offset(leaf, fi) + + offset - key.offset; + read = len; + + num_copies = btrfs_num_copies(fs_info, logical, len); + for (i = 1; i <= num_copies; i++) { + ret = read_extent_data(fs_info, dest, logical, &read, i); + if (ret < 0 || read != len) + continue; + finished = true; + break; + } + if (!finished) + return -EIO; + return len; + } + + csize = btrfs_file_extent_disk_num_bytes(leaf, fi); + dsize = btrfs_file_extent_ram_bytes(leaf, fi); + disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + num_copies = btrfs_num_copies(fs_info, disk_bytenr, csize); + + cbuf = malloc_cache_aligned(csize); + dbuf = malloc_cache_aligned(dsize); + if (!cbuf || !dbuf) { + ret = -ENOMEM; + goto out; + } + /* For compressed extent, we must read the whole on-disk extent */ + for (i = 1; i <= num_copies; i++) { + read = csize; + ret = read_extent_data(fs_info, cbuf, disk_bytenr, + &read, i); + if (ret < 0 || read != csize) + continue; + finished = true; + break; + } + if (!finished) { + ret = -EIO; + goto out; + } + + ret = btrfs_decompress(btrfs_file_extent_compression(leaf, fi), cbuf, + csize, dbuf, dsize); + if (ret != dsize) { + ret = -EIO; + goto out; + } + /* Then copy the needed part */ + memcpy(dest, dbuf + btrfs_file_extent_offset(leaf, fi), len); + ret = len; +out: + free(cbuf); + free(dbuf); + return ret; +} + +/* + * Get the first file extent that covers bytenr @file_offset. + * + * @file_offset must be aligned to sectorsize. + * + * return 0 for found, and path points to the file extent. + * return >0 for not found, and fill @next_offset. + * @next_offset can be 0 if there is no next file extent. + * return <0 for error. + */ +static int lookup_data_extent(struct btrfs_root *root, struct btrfs_path *path, + u64 ino, u64 file_offset, u64 *next_offset) +{ + struct btrfs_key key; + struct btrfs_file_extent_item *fi; + u8 extent_type; + int ret = 0; + + ASSERT(IS_ALIGNED(file_offset, root->fs_info->sectorsize)); + key.objectid = ino; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = file_offset; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + /* Error or we're already at the file extent */ + if (ret <= 0) + return ret; + if (ret > 0) { + /* Check previous file extent */ + ret = btrfs_previous_item(root, path, ino, + BTRFS_EXTENT_DATA_KEY); + if (ret < 0) + return ret; + if (ret > 0) + goto check_next; + } + /* Now the key.offset must be smaller than @file_offset */ + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.objectid != ino || + key.type != BTRFS_EXTENT_DATA_KEY) + goto check_next; + + fi = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + extent_type = btrfs_file_extent_type(path->nodes[0], fi); + if (extent_type == BTRFS_FILE_EXTENT_INLINE) { + if (file_offset == 0) + return 0; + /* Inline extent should be the only extent, no next extent. */ + *next_offset = 0; + return 1; + } + + /* This file extent covers @file_offset */ + if (key.offset <= file_offset && key.offset + + btrfs_file_extent_num_bytes(path->nodes[0], fi) > file_offset) + return 0; +check_next: + ret = btrfs_next_item(root, path); + if (ret < 0) + return ret; + if (ret > 0) { + *next_offset = 0; + return 1; + } + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + fi = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + /* Next next data extent */ + if (key.objectid != ino || + key.type != BTRFS_EXTENT_DATA_KEY) { + *next_offset = 0; + return 1; + } + /* Current file extent already beyond @file_offset */ + if (key.offset > file_offset) { + *next_offset = key.offset; + return 1; + } + /* This file extent covers @file_offset */ + if (key.offset <= file_offset && key.offset + + btrfs_file_extent_num_bytes(path->nodes[0], fi) > file_offset) + return 0; + /* This file extent ends before @file_offset, check next */ + ret = btrfs_next_item(root, path); + if (ret < 0) + return ret; + if (ret > 0) { + *next_offset = 0; + return 1; + } + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.type != BTRFS_EXTENT_DATA_KEY || key.objectid != ino) { + *next_offset = 0; + return 1; + } + *next_offset = key.offset; + return 1; +} + +static int read_and_truncate_page(struct btrfs_path *path, + struct btrfs_file_extent_item *fi, + int start, int len, char *dest) +{ + struct extent_buffer *leaf = path->nodes[0]; + struct btrfs_fs_info *fs_info = leaf->fs_info; + u64 aligned_start = round_down(start, fs_info->sectorsize); + u8 extent_type; + char *buf; + int page_off = start - aligned_start; + int page_len = fs_info->sectorsize - page_off; + int ret; + + ASSERT(start + len <= aligned_start + fs_info->sectorsize); + buf = malloc_cache_aligned(fs_info->sectorsize); + if (!buf) + return -ENOMEM; + + extent_type = btrfs_file_extent_type(leaf, fi); + if (extent_type == BTRFS_FILE_EXTENT_INLINE) { + ret = btrfs_read_extent_inline(path, fi, buf); + memcpy(dest, buf + page_off, min(page_len, ret)); + free(buf); + return len; + } + + ret = btrfs_read_extent_reg(path, fi, + round_down(start, fs_info->sectorsize), + fs_info->sectorsize, buf); + if (ret < 0) { + free(buf); + return ret; + } + memcpy(dest, buf + page_off, page_len); + free(buf); + return len; +} + +int btrfs_file_read(struct btrfs_root *root, u64 ino, u64 file_offset, u64 len, + char *dest) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_file_extent_item *fi; struct btrfs_path path; struct btrfs_key key; - struct btrfs_file_extent_item *extent; - int res = 0; - u64 rd, rd_all = -1ULL; + u64 aligned_start = round_down(file_offset, fs_info->sectorsize); + u64 aligned_end = round_down(file_offset + len, fs_info->sectorsize); + u64 next_offset; + u64 cur = aligned_start; + int ret = 0; - key.objectid = inr; - key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = offset; + btrfs_init_path(&path); - if (btrfs_search_tree(root, &key, &path)) - return -1ULL; + /* Set the whole dest all zero, so we won't need to bother holes */ + memset(dest, 0, len); - if (btrfs_comp_keys(&key, btrfs_path_leaf_key(&path)) < 0) { - if (btrfs_prev_slot(&path)) - goto out; - - if (btrfs_comp_keys_type(&key, btrfs_path_leaf_key(&path))) + /* Read out the leading unaligned part */ + if (aligned_start != file_offset) { + ret = lookup_data_extent(root, &path, ino, aligned_start, + &next_offset); + if (ret < 0) goto out; + if (ret == 0) { + /* Read the unaligned part out*/ + fi = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_file_extent_item); + ret = read_and_truncate_page(&path, fi, file_offset, + round_up(file_offset, fs_info->sectorsize) - + file_offset, dest); + if (ret < 0) + goto out; + cur += fs_info->sectorsize; + } else { + /* The whole file is a hole */ + if (!next_offset) { + memset(dest, 0, len); + return len; + } + cur = next_offset; + } } - rd_all = 0; + /* Read the aligned part */ + while (cur < aligned_end) { + u64 extent_num_bytes; + u8 type; - do { - if (btrfs_comp_keys_type(&key, btrfs_path_leaf_key(&path))) - break; - - extent = btrfs_path_item_ptr(&path, - struct btrfs_file_extent_item); - - if (extent->type == BTRFS_FILE_EXTENT_INLINE) { - btrfs_file_extent_item_to_cpu_inl(extent); - rd = btrfs_read_extent_inline(&path, extent, offset, - size, buf); - } else { - btrfs_file_extent_item_to_cpu(extent); - rd = btrfs_read_extent_reg(&path, extent, offset, size, - buf); + btrfs_release_path(&path); + ret = lookup_data_extent(root, &path, ino, cur, &next_offset); + if (ret < 0) + goto out; + if (ret > 0) { + /* No next, direct exit */ + if (!next_offset) { + ret = 0; + goto out; + } } - - if (rd == -1ULL) { - printf("%s: Error reading extent\n", __func__); - rd_all = -1; + fi = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_file_extent_item); + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + type = btrfs_file_extent_type(path.nodes[0], fi); + if (type == BTRFS_FILE_EXTENT_INLINE) { + ret = btrfs_read_extent_inline(&path, fi, dest); goto out; } + /* Skip holes, as we have zeroed the dest */ + if (type == BTRFS_FILE_EXTENT_PREALLOC || + btrfs_file_extent_disk_bytenr(path.nodes[0], fi) == 0) { + cur = key.offset + btrfs_file_extent_num_bytes( + path.nodes[0], fi); + continue; + } - offset = 0; - buf += rd; - rd_all += rd; - size -= rd; - - if (!size) - break; - } while (!(res = btrfs_next_slot(&path))); - - if (res) - return -1ULL; + /* Read the remaining part of the extent */ + extent_num_bytes = btrfs_file_extent_num_bytes(path.nodes[0], + fi); + ret = btrfs_read_extent_reg(&path, fi, cur, + min(extent_num_bytes, aligned_end - cur), + dest + cur - file_offset); + if (ret < 0) + goto out; + cur += min(extent_num_bytes, aligned_end - cur); + } + /* Read the tailing unaligned part*/ + if (file_offset + len != aligned_end) { + btrfs_release_path(&path); + ret = lookup_data_extent(root, &path, ino, aligned_end, + &next_offset); + /* <0 is error, >0 means no extent */ + if (ret) + goto out; + fi = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_file_extent_item); + ret = read_and_truncate_page(&path, fi, aligned_end, + file_offset + len - aligned_end, + dest + aligned_end - file_offset); + } out: - btrfs_free_path(&path); - return rd_all; + btrfs_release_path(&path); + if (ret < 0) + return ret; + return len; } diff --git a/fs/btrfs/kernel-shared/btrfs_tree.h b/fs/btrfs/kernel-shared/btrfs_tree.h new file mode 100644 index 0000000000..6a76d1e456 --- /dev/null +++ b/fs/btrfs/kernel-shared/btrfs_tree.h @@ -0,0 +1,1333 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copied from kernel/include/uapi/linux/btrfs_btree.h. + * + * Only modified the header. + */ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __BTRFS_TREE_H__ +#define __BTRFS_TREE_H__ + +#include + +#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */ + +/* + * The max metadata block size (node size). + * + * This limit is somewhat artificial. The memmove and tree block locking cost + * go up with larger node size. + */ +#define BTRFS_MAX_METADATA_BLOCKSIZE 65536 + +/* + * We can actually store much bigger names, but lets not confuse the rest + * of linux. + * + * btrfs_dir_item::name_len follows this limitation. + */ +#define BTRFS_NAME_LEN 255 + +/* + * Objectids start from here. + * + * Check btrfs_disk_key for the meaning of objectids. + */ + +/* + * Root tree holds pointers to all of the tree roots. + * Without special mention, the root tree contains the root bytenr of all other + * trees, except the chunk tree and the log tree. + * + * The super block contains the root bytenr of this tree. + */ +#define BTRFS_ROOT_TREE_OBJECTID 1ULL + +/* + * Extent tree stores information about which extents are in use, and backrefs + * for each extent. + */ +#define BTRFS_EXTENT_TREE_OBJECTID 2ULL + +/* + * Chunk tree stores btrfs logical address -> physical address mapping. + * + * The super block contains part of chunk tree for bootstrap, and contains + * the root bytenr of this tree. + */ +#define BTRFS_CHUNK_TREE_OBJECTID 3ULL + +/* + * Device tree stores info about which areas of a given device are in use, + * and physical address -> btrfs logical address mapping. + */ +#define BTRFS_DEV_TREE_OBJECTID 4ULL + +/* The fs tree is the first subvolume tree, storing files and directories. */ +#define BTRFS_FS_TREE_OBJECTID 5ULL + +/* Shows the directory objectid inside the root tree. */ +#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL + +/* Csum tree holds checksums of all the data extents. */ +#define BTRFS_CSUM_TREE_OBJECTID 7ULL + +/* Quota tree holds quota configuration and tracking. */ +#define BTRFS_QUOTA_TREE_OBJECTID 8ULL + +/* UUID tree stores items that use the BTRFS_UUID_KEY* types. */ +#define BTRFS_UUID_TREE_OBJECTID 9ULL + +/* Free space cache tree (v2 space cache) tracks free space in block groups. */ +#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL + +/* Indicates device stats in the device tree. */ +#define BTRFS_DEV_STATS_OBJECTID 0ULL + +/* For storing balance parameters in the root tree. */ +#define BTRFS_BALANCE_OBJECTID -4ULL + +/* Orhpan objectid for tracking unlinked/truncated files. */ +#define BTRFS_ORPHAN_OBJECTID -5ULL + +/* Does write ahead logging to speed up fsyncs. */ +#define BTRFS_TREE_LOG_OBJECTID -6ULL +#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL + +/* For space balancing. */ +#define BTRFS_TREE_RELOC_OBJECTID -8ULL +#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL + +/* Extent checksums, shared between the csum tree and log trees. */ +#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL + +/* For storing free space cache (v1 space cache). */ +#define BTRFS_FREE_SPACE_OBJECTID -11ULL + +/* The inode number assigned to the special inode for storing free ino cache. */ +#define BTRFS_FREE_INO_OBJECTID -12ULL + +/* Dummy objectid represents multiple objectids. */ +#define BTRFS_MULTIPLE_OBJECTIDS -255ULL + +/* All files have objectids in this range. */ +#define BTRFS_FIRST_FREE_OBJECTID 256ULL +#define BTRFS_LAST_FREE_OBJECTID -256ULL +#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL + + +/* + * The device items go into the chunk tree. + * + * The key is in the form + * (BTRFS_DEV_ITEMS_OBJECTID, BTRFS_DEV_ITEM_KEY, ) + */ +#define BTRFS_DEV_ITEMS_OBJECTID 1ULL + +#define BTRFS_BTREE_INODE_OBJECTID 1 + +#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 + +#define BTRFS_DEV_REPLACE_DEVID 0ULL + +/* + * Types start from here. + * + * Check btrfs_disk_key for details about types. + */ + +/* + * Inode items have the data typically returned from stat and store other + * info about object characteristics. + * + * There is one for every file and dir in the FS. + */ +#define BTRFS_INODE_ITEM_KEY 1 +/* reserve 2-11 close to the inode for later flexibility */ +#define BTRFS_INODE_REF_KEY 12 +#define BTRFS_INODE_EXTREF_KEY 13 +#define BTRFS_XATTR_ITEM_KEY 24 +#define BTRFS_ORPHAN_ITEM_KEY 48 + +/* + * Dir items are the name -> inode pointers in a directory. + * + * There is one for every name in a directory. + */ +#define BTRFS_DIR_LOG_ITEM_KEY 60 +#define BTRFS_DIR_LOG_INDEX_KEY 72 +#define BTRFS_DIR_ITEM_KEY 84 +#define BTRFS_DIR_INDEX_KEY 96 + +/* Stores info (position, size ...) about a data extent of a file */ +#define BTRFS_EXTENT_DATA_KEY 108 + +/* + * Extent csums are stored in a separate tree and hold csums for + * an entire extent on disk. + */ +#define BTRFS_EXTENT_CSUM_KEY 128 + +/* + * Root items point to tree roots. + * + * They are typically in the root tree used by the super block to find all the + * other trees. + */ +#define BTRFS_ROOT_ITEM_KEY 132 + +/* + * Root backrefs tie subvols and snapshots to the directory entries that + * reference them. + */ +#define BTRFS_ROOT_BACKREF_KEY 144 + +/* + * Root refs make a fast index for listing all of the snapshots and + * subvolumes referenced by a given root. They point directly to the + * directory item in the root that references the subvol. + */ +#define BTRFS_ROOT_REF_KEY 156 + +/* + * Extent items are in the extent tree. + * + * These record which blocks are used, and how many references there are. + */ +#define BTRFS_EXTENT_ITEM_KEY 168 + +/* + * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know + * the length, so we save the level in key->offset instead of the length. + */ +#define BTRFS_METADATA_ITEM_KEY 169 + +#define BTRFS_TREE_BLOCK_REF_KEY 176 + +#define BTRFS_EXTENT_DATA_REF_KEY 178 + +#define BTRFS_EXTENT_REF_V0_KEY 180 + +#define BTRFS_SHARED_BLOCK_REF_KEY 182 + +#define BTRFS_SHARED_DATA_REF_KEY 184 + +/* + * Block groups give us hints into the extent allocation trees. + * + * Stores how many free space there is in a block group. + */ +#define BTRFS_BLOCK_GROUP_ITEM_KEY 192 + +/* + * Every block group is represented in the free space tree by a free space info + * item, which stores some accounting information. It is keyed on + * (block_group_start, FREE_SPACE_INFO, block_group_length). + */ +#define BTRFS_FREE_SPACE_INFO_KEY 198 + +/* + * A free space extent tracks an extent of space that is free in a block group. + * It is keyed on (start, FREE_SPACE_EXTENT, length). + */ +#define BTRFS_FREE_SPACE_EXTENT_KEY 199 + +/* + * When a block group becomes very fragmented, we convert it to use bitmaps + * instead of extents. + * + * A free space bitmap is keyed on (start, FREE_SPACE_BITMAP, length). + * The corresponding item is a bitmap with (length / sectorsize) bits. + */ +#define BTRFS_FREE_SPACE_BITMAP_KEY 200 + +#define BTRFS_DEV_EXTENT_KEY 204 +#define BTRFS_DEV_ITEM_KEY 216 +#define BTRFS_CHUNK_ITEM_KEY 228 + +/* + * Records the overall state of the qgroups. + * + * There's only one instance of this key present, + * (0, BTRFS_QGROUP_STATUS_KEY, 0) + */ +#define BTRFS_QGROUP_STATUS_KEY 240 +/* + * Records the currently used space of the qgroup. + * + * One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid). + */ +#define BTRFS_QGROUP_INFO_KEY 242 + +/* + * Contains the user configured limits for the qgroup. + * + * One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid). + */ +#define BTRFS_QGROUP_LIMIT_KEY 244 + +/* + * Records the child-parent relationship of qgroups. For + * each relation, 2 keys are present: + * (childid, BTRFS_QGROUP_RELATION_KEY, parentid) + * (parentid, BTRFS_QGROUP_RELATION_KEY, childid) + */ +#define BTRFS_QGROUP_RELATION_KEY 246 + +/* Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY. */ +#define BTRFS_BALANCE_ITEM_KEY 248 + +/* + * The key type for tree items that are stored persistently, but do not need to + * exist for extended period of time. The items can exist in any tree. + * + * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data] + * + * Existing items: + * + * - balance status item + * (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0) + */ +#define BTRFS_TEMPORARY_ITEM_KEY 248 + +/* Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY */ +#define BTRFS_DEV_STATS_KEY 249 + +/* + * The key type for tree items that are stored persistently and usually exist + * for a long period, eg. filesystem lifetime. The item kinds can be status + * information, stats or preference values. The item can exist in any tree. + * + * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data] + * + * Existing items: + * + * - device statistics, store IO stats in the device tree, one key for all + * stats + * (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0) + */ +#define BTRFS_PERSISTENT_ITEM_KEY 249 + +/* + * Persistently stores the device replace state in the device tree. + * + * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0). + */ +#define BTRFS_DEV_REPLACE_KEY 250 + +/* + * Stores items that allow to quickly map UUIDs to something else. + * + * These items are part of the filesystem UUID tree. + * The key is built like this: + * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits). + */ +#define BTRFS_UUID_KEY_SUBVOL 251 /* for UUIDs assigned to subvols */ +#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252 /* for UUIDs assigned to + * received subvols */ + +/* + * String items are for debugging. + * + * They just store a short string of data in the FS. + */ +#define BTRFS_STRING_ITEM_KEY 253 + + + +/* 32 bytes in various csum fields */ +#define BTRFS_CSUM_SIZE 32 + +/* Csum types */ +enum btrfs_csum_type { + BTRFS_CSUM_TYPE_CRC32 = 0, + BTRFS_CSUM_TYPE_XXHASH = 1, + BTRFS_CSUM_TYPE_SHA256 = 2, + BTRFS_CSUM_TYPE_BLAKE2 = 3, +}; + +/* + * Flags definitions for directory entry item type. + * + * Used by: + * struct btrfs_dir_item.type + * + * Values 0..7 must match common file type values in fs_types.h. + */ +#define BTRFS_FT_UNKNOWN 0 +#define BTRFS_FT_REG_FILE 1 +#define BTRFS_FT_DIR 2 +#define BTRFS_FT_CHRDEV 3 +#define BTRFS_FT_BLKDEV 4 +#define BTRFS_FT_FIFO 5 +#define BTRFS_FT_SOCK 6 +#define BTRFS_FT_SYMLINK 7 +#define BTRFS_FT_XATTR 8 +#define BTRFS_FT_MAX 9 + +#define BTRFS_FSID_SIZE 16 +#define BTRFS_UUID_SIZE 16 + +/* + * The key defines the order in the tree, and so it also defines (optimal) + * block layout. + * + * Objectid and offset are interpreted based on type. + * While normally for objectid, it either represents a root number, or an + * inode number. + * + * Type tells us things about the object, and is a kind of stream selector. + * Check the following URL for full references about btrfs_disk_key/btrfs_key: + * https://btrfs.wiki.kernel.org/index.php/Btree_Items + * + * btrfs_disk_key is in disk byte order. struct btrfs_key is always + * in cpu native order. Otherwise they are identical and their sizes + * should be the same (ie both packed) + */ +struct btrfs_disk_key { + __le64 objectid; + __u8 type; + __le64 offset; +} __attribute__ ((__packed__)); + +struct btrfs_key { + __u64 objectid; + __u8 type; + __u64 offset; +} __attribute__ ((__packed__)); + +struct btrfs_dev_item { + /* The internal btrfs device id */ + __le64 devid; + + /* Size of the device */ + __le64 total_bytes; + + /* Bytes used */ + __le64 bytes_used; + + /* Optimal io alignment for this device */ + __le32 io_align; + + /* Optimal io width for this device */ + __le32 io_width; + + /* Minimal io size for this device */ + __le32 sector_size; + + /* Type and info about this device */ + __le64 type; + + /* Expected generation for this device */ + __le64 generation; + + /* + * Starting byte of this partition on the device, + * to allow for stripe alignment in the future. + */ + __le64 start_offset; + + /* Grouping information for allocation decisions */ + __le32 dev_group; + + /* Optimal seek speed 0-100 where 100 is fastest */ + __u8 seek_speed; + + /* Optimal bandwidth 0-100 where 100 is fastest */ + __u8 bandwidth; + + /* Btrfs generated uuid for this device */ + __u8 uuid[BTRFS_UUID_SIZE]; + + /* UUID of FS who owns this device */ + __u8 fsid[BTRFS_UUID_SIZE]; +} __attribute__ ((__packed__)); + +struct btrfs_stripe { + __le64 devid; + __le64 offset; + __u8 dev_uuid[BTRFS_UUID_SIZE]; +} __attribute__ ((__packed__)); + +struct btrfs_chunk { + /* Size of this chunk in bytes */ + __le64 length; + + /* Objectid of the root referencing this chunk */ + __le64 owner; + + __le64 stripe_len; + __le64 type; + + /* Optimal io alignment for this chunk */ + __le32 io_align; + + /* Optimal io width for this chunk */ + __le32 io_width; + + /* Minimal io size for this chunk */ + __le32 sector_size; + + /* + * 2^16 stripes is quite a lot, a second limit is the size of a single + * item in the btree. + */ + __le16 num_stripes; + + /* Sub stripes only matter for raid10 */ + __le16 sub_stripes; + struct btrfs_stripe stripe; + /* additional stripes go here */ +} __attribute__ ((__packed__)); + +#define BTRFS_FREE_SPACE_EXTENT 1 +#define BTRFS_FREE_SPACE_BITMAP 2 + +struct btrfs_free_space_entry { + __le64 offset; + __le64 bytes; + __u8 type; +} __attribute__ ((__packed__)); + +struct btrfs_free_space_header { + struct btrfs_disk_key location; + __le64 generation; + __le64 num_entries; + __le64 num_bitmaps; +} __attribute__ ((__packed__)); + +#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) +#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) + +/* Super block flags */ +/* Errors detected */ +#define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) + +#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) +#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) +#define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34) +#define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35) +#define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36) + + +/* + * Items in the extent tree are used to record the objectid of the + * owner of the block and the number of references. + */ +struct btrfs_extent_item { + __le64 refs; + __le64 generation; + __le64 flags; +} __attribute__ ((__packed__)); + +struct btrfs_extent_item_v0 { + __le32 refs; +} __attribute__ ((__packed__)); + + +#define BTRFS_EXTENT_FLAG_DATA (1ULL << 0) +#define BTRFS_EXTENT_FLAG_TREE_BLOCK (1ULL << 1) + +/* Use full backrefs for extent pointers in the block */ +#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8) + +/* + * This flag is only used internally by scrub and may be changed at any time + * it is only declared here to avoid collisions. + */ +#define BTRFS_EXTENT_FLAG_SUPER (1ULL << 48) + +struct btrfs_tree_block_info { + struct btrfs_disk_key key; + __u8 level; +} __attribute__ ((__packed__)); + +struct btrfs_extent_data_ref { + __le64 root; + __le64 objectid; + __le64 offset; + __le32 count; +} __attribute__ ((__packed__)); + +struct btrfs_shared_data_ref { + __le32 count; +} __attribute__ ((__packed__)); + +struct btrfs_extent_inline_ref { + __u8 type; + __le64 offset; +} __attribute__ ((__packed__)); + +/* Old style backrefs item */ +struct btrfs_extent_ref_v0 { + __le64 root; + __le64 generation; + __le64 objectid; + __le32 count; +} __attribute__ ((__packed__)); + + +/* Dev extents record used space on individual devices. + * + * The owner field points back to the chunk allocation mapping tree that + * allocated the extent. + * The chunk tree uuid field is a way to double check the owner. + */ +struct btrfs_dev_extent { + __le64 chunk_tree; + __le64 chunk_objectid; + __le64 chunk_offset; + __le64 length; + __u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; +} __attribute__ ((__packed__)); + +struct btrfs_inode_ref { + __le64 index; + __le16 name_len; + /* Name goes here */ +} __attribute__ ((__packed__)); + +struct btrfs_inode_extref { + __le64 parent_objectid; + __le64 index; + __le16 name_len; + __u8 name[0]; + /* Name goes here */ +} __attribute__ ((__packed__)); + +struct btrfs_timespec { + __le64 sec; + __le32 nsec; +} __attribute__ ((__packed__)); + +/* Inode flags */ +#define BTRFS_INODE_NODATASUM (1 << 0) +#define BTRFS_INODE_NODATACOW (1 << 1) +#define BTRFS_INODE_READONLY (1 << 2) +#define BTRFS_INODE_NOCOMPRESS (1 << 3) +#define BTRFS_INODE_PREALLOC (1 << 4) +#define BTRFS_INODE_SYNC (1 << 5) +#define BTRFS_INODE_IMMUTABLE (1 << 6) +#define BTRFS_INODE_APPEND (1 << 7) +#define BTRFS_INODE_NODUMP (1 << 8) +#define BTRFS_INODE_NOATIME (1 << 9) +#define BTRFS_INODE_DIRSYNC (1 << 10) +#define BTRFS_INODE_COMPRESS (1 << 11) + +#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31) + +#define BTRFS_INODE_FLAG_MASK \ + (BTRFS_INODE_NODATASUM | \ + BTRFS_INODE_NODATACOW | \ + BTRFS_INODE_READONLY | \ + BTRFS_INODE_NOCOMPRESS | \ + BTRFS_INODE_PREALLOC | \ + BTRFS_INODE_SYNC | \ + BTRFS_INODE_IMMUTABLE | \ + BTRFS_INODE_APPEND | \ + BTRFS_INODE_NODUMP | \ + BTRFS_INODE_NOATIME | \ + BTRFS_INODE_DIRSYNC | \ + BTRFS_INODE_COMPRESS | \ + BTRFS_INODE_ROOT_ITEM_INIT) + +struct btrfs_inode_item { + /* Nfs style generation number */ + __le64 generation; + /* Transid that last touched this inode */ + __le64 transid; + __le64 size; + __le64 nbytes; + __le64 block_group; + __le32 nlink; + __le32 uid; + __le32 gid; + __le32 mode; + __le64 rdev; + __le64 flags; + + /* Modification sequence number for NFS */ + __le64 sequence; + + /* + * A little future expansion, for more than this we can just grow the + * inode item and version it + */ + __le64 reserved[4]; + struct btrfs_timespec atime; + struct btrfs_timespec ctime; + struct btrfs_timespec mtime; + struct btrfs_timespec otime; +} __attribute__ ((__packed__)); + +struct btrfs_dir_log_item { + __le64 end; +} __attribute__ ((__packed__)); + +struct btrfs_dir_item { + struct btrfs_disk_key location; + __le64 transid; + __le16 data_len; + __le16 name_len; + __u8 type; +} __attribute__ ((__packed__)); + +#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) + +/* + * Internal in-memory flag that a subvolume has been marked for deletion but + * still visible as a directory + */ +#define BTRFS_ROOT_SUBVOL_DEAD (1ULL << 48) + +struct btrfs_root_item { + struct btrfs_inode_item inode; + __le64 generation; + __le64 root_dirid; + __le64 bytenr; + __le64 byte_limit; + __le64 bytes_used; + __le64 last_snapshot; + __le64 flags; + __le32 refs; + struct btrfs_disk_key drop_progress; + __u8 drop_level; + __u8 level; + + /* + * The following fields appear after subvol_uuids+subvol_times + * were introduced. + */ + + /* + * This generation number is used to test if the new fields are valid + * and up to date while reading the root item. Every time the root item + * is written out, the "generation" field is copied into this field. If + * anyone ever mounted the fs with an older kernel, we will have + * mismatching generation values here and thus must invalidate the + * new fields. See btrfs_update_root and btrfs_find_last_root for + * details. + * The offset of generation_v2 is also used as the start for the memset + * when invalidating the fields. + */ + __le64 generation_v2; + __u8 uuid[BTRFS_UUID_SIZE]; + __u8 parent_uuid[BTRFS_UUID_SIZE]; + __u8 received_uuid[BTRFS_UUID_SIZE]; + __le64 ctransid; /* Updated when an inode changes */ + __le64 otransid; /* Trans when created */ + __le64 stransid; /* Trans when sent. Non-zero for received subvol. */ + __le64 rtransid; /* Trans when received. Non-zero for received subvol.*/ + struct btrfs_timespec ctime; + struct btrfs_timespec otime; + struct btrfs_timespec stime; + struct btrfs_timespec rtime; + __le64 reserved[8]; /* For future */ +} __attribute__ ((__packed__)); + +/* This is used for both forward and backward root refs */ +struct btrfs_root_ref { + __le64 dirid; + __le64 sequence; + __le16 name_len; +} __attribute__ ((__packed__)); + +struct btrfs_disk_balance_args { + /* + * Profiles to operate on. + * + * SINGLE is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE. + */ + __le64 profiles; + + /* + * Usage filter + * BTRFS_BALANCE_ARGS_USAGE with a single value means '0..N' + * BTRFS_BALANCE_ARGS_USAGE_RANGE - range syntax, min..max + */ + union { + __le64 usage; + struct { + __le32 usage_min; + __le32 usage_max; + }; + }; + + /* Devid filter */ + __le64 devid; + + /* Devid subset filter [pstart..pend) */ + __le64 pstart; + __le64 pend; + + /* Btrfs virtual address space subset filter [vstart..vend) */ + __le64 vstart; + __le64 vend; + + /* + * Profile to convert to. + * + * SINGLE is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE. + */ + __le64 target; + + /* BTRFS_BALANCE_ARGS_* */ + __le64 flags; + + /* + * BTRFS_BALANCE_ARGS_LIMIT with value 'limit'. + * BTRFS_BALANCE_ARGS_LIMIT_RANGE - the extend version can use minimum + * and maximum. + */ + union { + __le64 limit; + struct { + __le32 limit_min; + __le32 limit_max; + }; + }; + + /* + * Process chunks that cross stripes_min..stripes_max devices, + * BTRFS_BALANCE_ARGS_STRIPES_RANGE. + */ + __le32 stripes_min; + __le32 stripes_max; + + __le64 unused[6]; +} __attribute__ ((__packed__)); + +/* + * Stores balance parameters to disk so that balance can be properly + * resumed after crash or unmount. + */ +struct btrfs_balance_item { + /* BTRFS_BALANCE_* */ + __le64 flags; + + struct btrfs_disk_balance_args data; + struct btrfs_disk_balance_args meta; + struct btrfs_disk_balance_args sys; + + __le64 unused[4]; +} __attribute__ ((__packed__)); + +enum { + BTRFS_FILE_EXTENT_INLINE = 0, + BTRFS_FILE_EXTENT_REG = 1, + BTRFS_FILE_EXTENT_PREALLOC = 2, + BTRFS_NR_FILE_EXTENT_TYPES = 3, +}; + +enum btrfs_compression_type { + BTRFS_COMPRESS_NONE = 0, + BTRFS_COMPRESS_ZLIB = 1, + BTRFS_COMPRESS_LZO = 2, + BTRFS_COMPRESS_ZSTD = 3, + BTRFS_NR_COMPRESS_TYPES = 4, +}; + +struct btrfs_file_extent_item { + /* Transaction id that created this extent */ + __le64 generation; + /* + * Max number of bytes to hold this extent in ram. + * + * When we split a compressed extent we can't know how big each of the + * resulting pieces will be. So, this is an upper limit on the size of + * the extent in ram instead of an exact limit. + */ + __le64 ram_bytes; + + /* + * 32 bits for the various ways we might encode the data, + * including compression and encryption. If any of these + * are set to something a given disk format doesn't understand + * it is treated like an incompat flag for reading and writing, + * but not for stat. + */ + __u8 compression; + __u8 encryption; + __le16 other_encoding; /* Spare for later use */ + + /* Are we inline data or a real extent? */ + __u8 type; + + /* + * Disk space consumed by the extent, checksum blocks are not included + * in these numbers + * + * At this offset in the structure, the inline extent data start. + */ + __le64 disk_bytenr; + __le64 disk_num_bytes; + + /* + * The logical offset inside the file extent. + * + * This allows a file extent to point into the middle of an existing + * extent on disk, sharing it between two snapshots (useful if some + * bytes in the middle of the extent have changed). + */ + __le64 offset; + + /* + * The logical number of bytes this file extent is referencing (no + * csums included). + * + * This always reflects the size uncompressed and without encoding. + */ + __le64 num_bytes; + +} __attribute__ ((__packed__)); + +struct btrfs_csum_item { + __u8 csum; +} __attribute__ ((__packed__)); + +enum btrfs_dev_stat_values { + /* Disk I/O failure stats */ + BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */ + BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */ + BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */ + + /* Stats for indirect indications for I/O failures */ + BTRFS_DEV_STAT_CORRUPTION_ERRS, /* Checksum error, bytenr error or + * contents is illegal: this is an + * indication that the block was damaged + * during read or write, or written to + * wrong location or read from wrong + * location */ + BTRFS_DEV_STAT_GENERATION_ERRS, /* An indication that blocks have not + * been written */ + + BTRFS_DEV_STAT_VALUES_MAX +}; + +struct btrfs_dev_stats_item { + /* + * Grow this item struct at the end for future enhancements and keep + * the existing values unchanged. + */ + __le64 values[BTRFS_DEV_STAT_VALUES_MAX]; +} __attribute__ ((__packed__)); + +#define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0 +#define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID 1 + +struct btrfs_dev_replace_item { + /* + * Grow this item struct at the end for future enhancements and keep + * the existing values unchanged. + */ + __le64 src_devid; + __le64 cursor_left; + __le64 cursor_right; + __le64 cont_reading_from_srcdev_mode; + + __le64 replace_state; + __le64 time_started; + __le64 time_stopped; + __le64 num_write_errors; + __le64 num_uncorrectable_read_errors; +} __attribute__ ((__packed__)); + +/* Different types of block groups (and chunks) */ +#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) +#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) +#define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2) +#define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3) +#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4) +#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5) +#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) +#define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7) +#define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8) +#define BTRFS_BLOCK_GROUP_RAID1C3 (1ULL << 9) +#define BTRFS_BLOCK_GROUP_RAID1C4 (1ULL << 10) +#define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \ + BTRFS_SPACE_INFO_GLOBAL_RSV) + +enum btrfs_raid_types { + BTRFS_RAID_RAID10, + BTRFS_RAID_RAID1, + BTRFS_RAID_DUP, + BTRFS_RAID_RAID0, + BTRFS_RAID_SINGLE, + BTRFS_RAID_RAID5, + BTRFS_RAID_RAID6, + BTRFS_RAID_RAID1C3, + BTRFS_RAID_RAID1C4, + BTRFS_NR_RAID_TYPES +}; + +#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \ + BTRFS_BLOCK_GROUP_SYSTEM | \ + BTRFS_BLOCK_GROUP_METADATA) + +#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \ + BTRFS_BLOCK_GROUP_RAID1 | \ + BTRFS_BLOCK_GROUP_RAID1C3 | \ + BTRFS_BLOCK_GROUP_RAID1C4 | \ + BTRFS_BLOCK_GROUP_RAID5 | \ + BTRFS_BLOCK_GROUP_RAID6 | \ + BTRFS_BLOCK_GROUP_DUP | \ + BTRFS_BLOCK_GROUP_RAID10) +#define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \ + BTRFS_BLOCK_GROUP_RAID6) + +#define BTRFS_BLOCK_GROUP_RAID1_MASK (BTRFS_BLOCK_GROUP_RAID1 | \ + BTRFS_BLOCK_GROUP_RAID1C3 | \ + BTRFS_BLOCK_GROUP_RAID1C4) + +/* + * We need a bit for restriper to be able to tell when chunks of type + * SINGLE are available. This "extended" profile format is used in + * fs_info->avail_*_alloc_bits (in-memory) and balance item fields + * (on-disk). The corresponding on-disk bit in chunk.type is reserved + * to avoid remappings between two formats in future. + */ +#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48) + +/* + * A fake block group type that is used to communicate global block reserve + * size to userspace via the SPACE_INFO ioctl. + */ +#define BTRFS_SPACE_INFO_GLOBAL_RSV (1ULL << 49) + +#define BTRFS_EXTENDED_PROFILE_MASK (BTRFS_BLOCK_GROUP_PROFILE_MASK | \ + BTRFS_AVAIL_ALLOC_BIT_SINGLE) + +static inline __u64 chunk_to_extended(__u64 flags) +{ + if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0) + flags |= BTRFS_AVAIL_ALLOC_BIT_SINGLE; + + return flags; +} +static inline __u64 extended_to_chunk(__u64 flags) +{ + return flags & ~BTRFS_AVAIL_ALLOC_BIT_SINGLE; +} + +struct btrfs_block_group_item { + __le64 used; + __le64 chunk_objectid; + __le64 flags; +} __attribute__ ((__packed__)); + +struct btrfs_free_space_info { + __le32 extent_count; + __le32 flags; +} __attribute__ ((__packed__)); + +#define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0) + +#define BTRFS_QGROUP_LEVEL_SHIFT 48 +static inline __u64 btrfs_qgroup_level(__u64 qgroupid) +{ + return qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT; +} + +/* Is subvolume quota turned on? */ +#define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0) + +/* Is qgroup rescan running? */ +#define BTRFS_QGROUP_STATUS_FLAG_RESCAN (1ULL << 1) + +/* + * Some qgroup entries are known to be out of date, either because the + * configuration has changed in a way that makes a rescan necessary, or + * because the fs has been mounted with a non-qgroup-aware version. + */ +#define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2) + +#define BTRFS_QGROUP_STATUS_VERSION 1 + +struct btrfs_qgroup_status_item { + __le64 version; + /* + * The generation is updated during every commit. As older + * versions of btrfs are not aware of qgroups, it will be + * possible to detect inconsistencies by checking the + * generation on mount time. + */ + __le64 generation; + + /* Flag definitions see above */ + __le64 flags; + + /* + * Only used during scanning to record the progress of the scan. + * It contains a logical address. + */ + __le64 rescan; +} __attribute__ ((__packed__)); + +struct btrfs_qgroup_info_item { + __le64 generation; + __le64 rfer; + __le64 rfer_cmpr; + __le64 excl; + __le64 excl_cmpr; +} __attribute__ ((__packed__)); + +/* + * Flags definition for qgroup limits + * + * Used by: + * struct btrfs_qgroup_limit.flags + * struct btrfs_qgroup_limit_item.flags + */ +#define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0) +#define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1) +#define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2) +#define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3) +#define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4) +#define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5) + +struct btrfs_qgroup_limit_item { + /* Only updated when any of the other values change. */ + __le64 flags; + __le64 max_rfer; + __le64 max_excl; + __le64 rsv_rfer; + __le64 rsv_excl; +} __attribute__ ((__packed__)); + +/* + * Just in case we somehow lose the roots and are not able to mount, + * we store an array of the roots from previous transactions in the super. + */ +#define BTRFS_NUM_BACKUP_ROOTS 4 +struct btrfs_root_backup { + __le64 tree_root; + __le64 tree_root_gen; + + __le64 chunk_root; + __le64 chunk_root_gen; + + __le64 extent_root; + __le64 extent_root_gen; + + __le64 fs_root; + __le64 fs_root_gen; + + __le64 dev_root; + __le64 dev_root_gen; + + __le64 csum_root; + __le64 csum_root_gen; + + __le64 total_bytes; + __le64 bytes_used; + __le64 num_devices; + /* future */ + __le64 unused_64[4]; + + u8 tree_root_level; + u8 chunk_root_level; + u8 extent_root_level; + u8 fs_root_level; + u8 dev_root_level; + u8 csum_root_level; + /* future and to align */ + u8 unused_8[10]; +} __attribute__ ((__packed__)); + +/* + * This is a very generous portion of the super block, giving us room to + * translate 14 chunks with 3 stripes each. + */ +#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048 + +#define BTRFS_LABEL_SIZE 256 + +/* The super block basically lists the main trees of the FS. */ +struct btrfs_super_block { + /* The first 4 fields must match struct btrfs_header */ + u8 csum[BTRFS_CSUM_SIZE]; + /* FS specific UUID, visible to user */ + u8 fsid[BTRFS_FSID_SIZE]; + __le64 bytenr; /* this block number */ + __le64 flags; + + /* Allowed to be different from the btrfs_header from here own down. */ + __le64 magic; + __le64 generation; + __le64 root; + __le64 chunk_root; + __le64 log_root; + + /* This will help find the new super based on the log root. */ + __le64 log_root_transid; + __le64 total_bytes; + __le64 bytes_used; + __le64 root_dir_objectid; + __le64 num_devices; + __le32 sectorsize; + __le32 nodesize; + __le32 __unused_leafsize; + __le32 stripesize; + __le32 sys_chunk_array_size; + __le64 chunk_root_generation; + __le64 compat_flags; + __le64 compat_ro_flags; + __le64 incompat_flags; + __le16 csum_type; + u8 root_level; + u8 chunk_root_level; + u8 log_root_level; + struct btrfs_dev_item dev_item; + + char label[BTRFS_LABEL_SIZE]; + + __le64 cache_generation; + __le64 uuid_tree_generation; + + /* The UUID written into btree blocks */ + u8 metadata_uuid[BTRFS_FSID_SIZE]; + + /* Future expansion */ + __le64 reserved[28]; + u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; + struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; +} __attribute__ ((__packed__)); + +/* + * Feature flags + * + * Used by: + * struct btrfs_super_block::(compat|compat_ro|incompat)_flags + * struct btrfs_ioctl_feature_flags + */ +#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE (1ULL << 0) + +/* + * Older kernels (< 4.9) on big-endian systems produced broken free space tree + * bitmaps, and btrfs-progs also used to corrupt the free space tree (versions + * < 4.7.3). If this bit is clear, then the free space tree cannot be trusted. + * btrfs-progs can also intentionally clear this bit to ask the kernel to + * rebuild the free space tree, however this might not work on older kernels + * that do not know about this bit. If not sure, clear the cache manually on + * first mount when booting older kernel versions. + */ +#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID (1ULL << 1) + +#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) +#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) +#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) +#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) +#define BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD (1ULL << 4) + +/* + * Older kernels tried to do bigger metadata blocks, but the + * code was pretty buggy. Lets not let them try anymore. + */ +#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5) + +#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6) +#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7) +#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8) +#define BTRFS_FEATURE_INCOMPAT_NO_HOLES (1ULL << 9) +#define BTRFS_FEATURE_INCOMPAT_METADATA_UUID (1ULL << 10) +#define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11) + +/* + * Compat flags that we support. + * + * If any incompat flags are set other than the ones specified below then we + * will fail to mount. + */ +#define BTRFS_FEATURE_COMPAT_SUPP 0ULL +#define BTRFS_FEATURE_COMPAT_SAFE_SET 0ULL +#define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL + +#define BTRFS_FEATURE_COMPAT_RO_SUPP \ + (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE | \ + BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID) + +#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL +#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL + +#define BTRFS_FEATURE_INCOMPAT_SUPP \ + (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ + BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ + BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ + BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ + BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ + BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD | \ + BTRFS_FEATURE_INCOMPAT_RAID56 | \ + BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \ + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \ + BTRFS_FEATURE_INCOMPAT_NO_HOLES | \ + BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \ + BTRFS_FEATURE_INCOMPAT_RAID1C34) + +#define BTRFS_FEATURE_INCOMPAT_SAFE_SET \ + (BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) +#define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR 0ULL + +#define BTRFS_BACKREF_REV_MAX 256 +#define BTRFS_BACKREF_REV_SHIFT 56 +#define BTRFS_BACKREF_REV_MASK (((u64)BTRFS_BACKREF_REV_MAX - 1) << \ + BTRFS_BACKREF_REV_SHIFT) + +#define BTRFS_OLD_BACKREF_REV 0 +#define BTRFS_MIXED_BACKREF_REV 1 + +#define BTRFS_MAX_LEVEL 8 + +/* Every tree block (leaf or node) starts with this header. */ +struct btrfs_header { + /* These first four must match the super block */ + u8 csum[BTRFS_CSUM_SIZE]; + u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ + __le64 bytenr; /* Which block this node is supposed to live in */ + __le64 flags; + + /* Allowed to be different from the super from here on down. */ + u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; + __le64 generation; + __le64 owner; + __le32 nritems; + u8 level; +} __attribute__ ((__packed__)); + +/* + * A leaf is full of items. Offset and size tell us where to find + * the item in the leaf (relative to the start of the data area). + */ +struct btrfs_item { + struct btrfs_disk_key key; + __le32 offset; + __le32 size; +} __attribute__ ((__packed__)); + +/* + * leaves have an item area and a data area: + * [item0, item1....itemN] [free space] [dataN...data1, data0] + * + * The data is separate from the items to get the keys closer together + * during searches. + */ +struct btrfs_leaf { + struct btrfs_header header; + struct btrfs_item items[]; +} __attribute__ ((__packed__)); + +/* + * All non-leaf blocks are nodes, they hold only keys and pointers to children + * blocks. + */ +struct btrfs_key_ptr { + struct btrfs_disk_key key; + __le64 blockptr; + __le64 generation; +} __attribute__ ((__packed__)); + +struct btrfs_node { + struct btrfs_header header; + struct btrfs_key_ptr ptrs[]; +} __attribute__ ((__packed__)); + +#endif /* __BTRFS_TREE_H__ */ diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c new file mode 100644 index 0000000000..a39ad72067 --- /dev/null +++ b/fs/btrfs/root-tree.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include "ctree.h" + +int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, + struct btrfs_root_item *item, struct btrfs_key *key) +{ + struct btrfs_path *path; + struct btrfs_key search_key; + struct btrfs_key found_key; + struct extent_buffer *l; + int ret; + int slot; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + search_key.objectid = objectid; + search_key.type = BTRFS_ROOT_ITEM_KEY; + search_key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); + if (ret < 0) + goto out; + if (path->slots[0] == 0) { + ret = -ENOENT; + goto out; + } + + BUG_ON(ret == 0); + l = path->nodes[0]; + slot = path->slots[0] - 1; + btrfs_item_key_to_cpu(l, &found_key, slot); + if (found_key.type != BTRFS_ROOT_ITEM_KEY || + found_key.objectid != objectid) { + ret = -ENOENT; + goto out; + } + read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), + sizeof(*item)); + memcpy(key, &found_key, sizeof(found_key)); + ret = 0; +out: + btrfs_free_path(path); + return ret; +} diff --git a/fs/btrfs/root.c b/fs/btrfs/root.c deleted file mode 100644 index 127b67fd1c..0000000000 --- a/fs/btrfs/root.c +++ /dev/null @@ -1,92 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * BTRFS filesystem implementation for U-Boot - * - * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz - */ - -#include "btrfs.h" - -static void read_root_item(struct btrfs_path *p, struct btrfs_root_item *item) -{ - u32 len; - int reset = 0; - - len = btrfs_path_item_size(p); - memcpy(item, btrfs_path_item_ptr(p, struct btrfs_root_item), len); - btrfs_root_item_to_cpu(item); - - if (len < sizeof(*item)) - reset = 1; - if (!reset && item->generation != item->generation_v2) { - if (item->generation_v2 != 0) - printf("%s: generation != generation_v2 in root item", - __func__); - reset = 1; - } - if (reset) { - memset(&item->generation_v2, 0, - sizeof(*item) - offsetof(struct btrfs_root_item, - generation_v2)); - } -} - -int btrfs_find_root(u64 objectid, struct btrfs_root *root, - struct btrfs_root_item *root_item) -{ - struct btrfs_path path; - struct btrfs_root_item my_root_item; - - if (!btrfs_search_tree_key_type(&btrfs_info.tree_root, objectid, - BTRFS_ROOT_ITEM_KEY, &path)) - return -1; - - if (!root_item) - root_item = &my_root_item; - read_root_item(&path, root_item); - - if (root) { - root->objectid = objectid; - root->bytenr = root_item->bytenr; - root->root_dirid = root_item->root_dirid; - } - - btrfs_free_path(&path); - return 0; -} - -u64 btrfs_lookup_root_ref(u64 subvolid, struct btrfs_root_ref *refp, char *name) -{ - struct btrfs_path path; - struct btrfs_key *key; - struct btrfs_root_ref *ref; - u64 res = -1ULL; - - key = btrfs_search_tree_key_type(&btrfs_info.tree_root, subvolid, - BTRFS_ROOT_BACKREF_KEY, &path); - - if (!key) - return -1ULL; - - ref = btrfs_path_item_ptr(&path, struct btrfs_root_ref); - btrfs_root_ref_to_cpu(ref); - - if (refp) - *refp = *ref; - - if (name) { - if (ref->name_len > BTRFS_VOL_NAME_MAX) { - printf("%s: volume name too long: %u\n", __func__, - ref->name_len); - goto out; - } - - memcpy(name, ref + 1, ref->name_len); - } - - res = key->offset; -out: - btrfs_free_path(&path); - return res; -} - diff --git a/fs/btrfs/subvolume.c b/fs/btrfs/subvolume.c index 06e54f3310..2815673bcd 100644 --- a/fs/btrfs/subvolume.c +++ b/fs/btrfs/subvolume.c @@ -5,126 +5,230 @@ * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz */ -#include "btrfs.h" #include +#include "ctree.h" +#include "btrfs.h" +#include "disk-io.h" -static int get_subvol_name(u64 subvolid, char *name, int max_len) +/* + * Resolve the path of ino inside subvolume @root into @path_ret. + * + * @path_ret must be at least PATH_MAX size. + */ +static int get_path_in_subvol(struct btrfs_root *root, u64 ino, char *path_ret) { - struct btrfs_root_ref rref; - struct btrfs_inode_ref iref; - struct btrfs_root root; - u64 dir; - char tmp[max(BTRFS_VOL_NAME_MAX, BTRFS_NAME_MAX)]; - char *ptr; + struct btrfs_path path; + struct btrfs_key key; + char *tmp; + u64 cur = ino; + int ret = 0; - ptr = name + max_len - 1; - *ptr = '\0'; + tmp = malloc(PATH_MAX); + if (!tmp) + return -ENOMEM; + tmp[0] = '\0'; - while (subvolid != BTRFS_FS_TREE_OBJECTID) { - subvolid = btrfs_lookup_root_ref(subvolid, &rref, tmp); + btrfs_init_path(&path); + while (cur != BTRFS_FIRST_FREE_OBJECTID) { + struct btrfs_inode_ref *iref; + int name_len; - if (subvolid == -1ULL) - return -1; + btrfs_release_path(&path); + key.objectid = cur; + key.type = BTRFS_INODE_REF_KEY; + key.offset = (u64)-1; - ptr -= rref.name_len + 1; - if (ptr < name) - goto too_long; + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + /* Impossible */ + if (ret == 0) + ret = -EUCLEAN; + if (ret < 0) + goto out; + ret = btrfs_previous_item(root, &path, cur, + BTRFS_INODE_REF_KEY); + if (ret > 0) + ret = -ENOENT; + if (ret < 0) + goto out; - memcpy(ptr + 1, tmp, rref.name_len); - *ptr = '/'; + strncpy(tmp, path_ret, PATH_MAX); + iref = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_inode_ref); + name_len = btrfs_inode_ref_name_len(path.nodes[0], + iref); + if (name_len > BTRFS_NAME_LEN) { + ret = -ENAMETOOLONG; + goto out; + } + read_extent_buffer(path.nodes[0], path_ret, + (unsigned long)(iref + 1), name_len); + path_ret[name_len] = '/'; + path_ret[name_len + 1] = '\0'; + strncat(path_ret, tmp, PATH_MAX); - if (btrfs_find_root(subvolid, &root, NULL)) - return -1; + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + cur = key.offset; + } +out: + btrfs_release_path(&path); + free(tmp); + return ret; +} - dir = rref.dirid; +static int list_one_subvol(struct btrfs_root *root, char *path_ret) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_path path; + struct btrfs_key key; + char *tmp; + u64 cur = root->root_key.objectid; + int ret = 0; - while (dir != BTRFS_FIRST_FREE_OBJECTID) { - dir = btrfs_lookup_inode_ref(&root, dir, &iref, tmp); + tmp = malloc(PATH_MAX); + if (!tmp) + return -ENOMEM; + tmp[0] = '\0'; + path_ret[0] = '\0'; + btrfs_init_path(&path); + while (cur != BTRFS_FS_TREE_OBJECTID) { + struct btrfs_root_ref *rr; + struct btrfs_key location; + int name_len; + u64 ino; - if (dir == -1ULL) - return -1; + key.objectid = cur; + key.type = BTRFS_ROOT_BACKREF_KEY; + key.offset = (u64)-1; + btrfs_release_path(&path); - ptr -= iref.name_len + 1; - if (ptr < name) - goto too_long; + ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0); + if (ret == 0) + ret = -EUCLEAN; + if (ret < 0) + goto out; + ret = btrfs_previous_item(tree_root, &path, cur, + BTRFS_ROOT_BACKREF_KEY); + if (ret > 0) + ret = -ENOENT; + if (ret < 0) + goto out; - memcpy(ptr + 1, tmp, iref.name_len); - *ptr = '/'; + /* Get the subvolume name */ + rr = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_root_ref); + strncpy(tmp, path_ret, PATH_MAX); + name_len = btrfs_root_ref_name_len(path.nodes[0], rr); + if (name_len > BTRFS_NAME_LEN) { + ret = -ENAMETOOLONG; + goto out; + } + ino = btrfs_root_ref_dirid(path.nodes[0], rr); + read_extent_buffer(path.nodes[0], path_ret, + (unsigned long)(rr + 1), name_len); + path_ret[name_len] = '/'; + path_ret[name_len + 1] = '\0'; + strncat(path_ret, tmp, PATH_MAX); + + /* Get the path inside the parent subvolume */ + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + location.objectid = key.offset; + location.type = BTRFS_ROOT_ITEM_KEY; + location.offset = (u64)-1; + root = btrfs_read_fs_root(fs_info, &location); + if (IS_ERR(root)) { + ret = PTR_ERR(root); + goto out; + } + ret = get_path_in_subvol(root, ino, path_ret); + if (ret < 0) + goto out; + cur = key.offset; + } + /* Add the leading '/' */ + strncpy(tmp, path_ret, PATH_MAX); + strncpy(path_ret, "/", PATH_MAX); + strncat(path_ret, tmp, PATH_MAX); +out: + btrfs_release_path(&path); + free(tmp); + return ret; +} + +static int list_subvolums(struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *root; + struct btrfs_path path; + struct btrfs_key key; + char *result; + int ret = 0; + + result = malloc(PATH_MAX); + if (!result) + return -ENOMEM; + + ret = list_one_subvol(fs_info->fs_root, result); + if (ret < 0) + goto out; + root = fs_info->fs_root; + printf("ID %llu gen %llu path %.*s\n", + root->root_key.objectid, btrfs_root_generation(&root->root_item), + PATH_MAX, result); + + key.objectid = BTRFS_FIRST_FREE_OBJECTID; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = 0; + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0); + if (ret < 0) + goto out; + while (1) { + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) + goto next; + + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + if (key.objectid > BTRFS_LAST_FREE_OBJECTID) + break; + if (key.objectid < BTRFS_FIRST_FREE_OBJECTID || + key.type != BTRFS_ROOT_ITEM_KEY) + goto next; + key.offset = (u64)-1; + root = btrfs_read_fs_root(fs_info, &key); + if (IS_ERR(root)) { + ret = PTR_ERR(root); + if (ret == -ENOENT) + goto next; + } + ret = list_one_subvol(root, result); + if (ret < 0) + goto out; + printf("ID %llu gen %llu path %.*s\n", + root->root_key.objectid, + btrfs_root_generation(&root->root_item), + PATH_MAX, result); +next: + ret = btrfs_next_item(tree_root, &path); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + break; } } - - if (ptr == name + max_len - 1) { - name[0] = '/'; - name[1] = '\0'; - } else { - memmove(name, ptr, name + max_len - ptr); - } - - return 0; - -too_long: - printf("%s: subvolume name too long\n", __func__); - return -1; -} - -u64 btrfs_get_default_subvol_objectid(void) -{ - struct btrfs_dir_item item; - - if (btrfs_lookup_dir_item(&btrfs_info.tree_root, - btrfs_info.sb.root_dir_objectid, "default", 7, - &item)) - return BTRFS_FS_TREE_OBJECTID; - return item.location.objectid; -} - -static void list_subvols(u64 tree, char *nameptr, int max_name_len, int level) -{ - struct btrfs_key key, *found_key; - struct btrfs_path path; - struct btrfs_root_ref *ref; - int res; - - key.objectid = tree; - key.type = BTRFS_ROOT_REF_KEY; - key.offset = 0; - - if (btrfs_search_tree(&btrfs_info.tree_root, &key, &path)) - return; - - do { - found_key = btrfs_path_leaf_key(&path); - if (btrfs_comp_keys_type(&key, found_key)) - break; - - ref = btrfs_path_item_ptr(&path, struct btrfs_root_ref); - btrfs_root_ref_to_cpu(ref); - - printf("ID %llu parent %llu name ", found_key->offset, tree); - if (nameptr && !get_subvol_name(found_key->offset, nameptr, - max_name_len)) - printf("%s\n", nameptr); - else - printf("%.*s\n", (int) ref->name_len, - (const char *) (ref + 1)); - - if (level > 0) - list_subvols(found_key->offset, nameptr, max_name_len, - level - 1); - else - printf("%s: Too much recursion, maybe skipping some " - "subvolumes\n", __func__); - } while (!(res = btrfs_next_slot(&path))); - - btrfs_free_path(&path); +out: + free(result); + return ret; } void btrfs_list_subvols(void) { - char *nameptr = malloc(4096); + struct btrfs_fs_info *fs_info = current_fs_info; + int ret; - list_subvols(BTRFS_FS_TREE_OBJECTID, nameptr, nameptr ? 4096 : 0, 40); - - if (nameptr) - free(nameptr); + if (!fs_info) + return; + ret = list_subvolums(fs_info); + if (ret < 0) + error("failed to list subvolume: %d", ret); } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c deleted file mode 100644 index 913a4d402e..0000000000 --- a/fs/btrfs/super.c +++ /dev/null @@ -1,257 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * BTRFS filesystem implementation for U-Boot - * - * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz - */ - -#include -#include -#include -#include -#include -#include "btrfs.h" - -#define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN \ - | BTRFS_HEADER_FLAG_RELOC \ - | BTRFS_SUPER_FLAG_ERROR \ - | BTRFS_SUPER_FLAG_SEEDING \ - | BTRFS_SUPER_FLAG_METADUMP) - -#define BTRFS_SUPER_INFO_SIZE 4096 - -/* - * checks if a valid root backup is present. - * considers the case when all root backups empty valid. - * returns -1 in case of invalid root backup and 0 for valid. - */ -static int btrfs_check_super_roots(struct btrfs_super_block *sb) -{ - struct btrfs_root_backup *root_backup; - int i, newest = -1; - int num_empty = 0; - - for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; ++i) { - root_backup = sb->super_roots + i; - - if (root_backup->tree_root == 0 && root_backup->tree_root_gen == 0) - num_empty++; - - if (root_backup->tree_root_gen == sb->generation) - newest = i; - } - - if (num_empty == BTRFS_NUM_BACKUP_ROOTS) { - return 0; - } else if (newest >= 0) { - return 0; - } - - return -1; -} - -static inline int is_power_of_2(u64 x) -{ - return !(x & (x - 1)); -} - -static int btrfs_check_super_csum(char *raw_disk_sb) -{ - struct btrfs_super_block *disk_sb = - (struct btrfs_super_block *) raw_disk_sb; - u16 csum_type = le16_to_cpu(disk_sb->csum_type); - - if (csum_type == BTRFS_CSUM_TYPE_CRC32) { - u32 crc = ~(u32) 0; - const int csum_size = sizeof(crc); - char result[csum_size]; - - crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE, crc, - BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); - btrfs_csum_final(crc, result); - - if (memcmp(raw_disk_sb, result, csum_size)) - return -1; - } else { - return -1; - } - - return 0; -} - -static int btrfs_check_super(struct btrfs_super_block *sb) -{ - int ret = 0; - - if (sb->flags & ~BTRFS_SUPER_FLAG_SUPP) { - printf("%s: Unsupported flags: %llu\n", __func__, - sb->flags & ~BTRFS_SUPER_FLAG_SUPP); - } - - if (sb->root_level > BTRFS_MAX_LEVEL) { - printf("%s: tree_root level too big: %d >= %d\n", __func__, - sb->root_level, BTRFS_MAX_LEVEL); - ret = -1; - } - - if (sb->chunk_root_level > BTRFS_MAX_LEVEL) { - printf("%s: chunk_root level too big: %d >= %d\n", __func__, - sb->chunk_root_level, BTRFS_MAX_LEVEL); - ret = -1; - } - - if (sb->log_root_level > BTRFS_MAX_LEVEL) { - printf("%s: log_root level too big: %d >= %d\n", __func__, - sb->log_root_level, BTRFS_MAX_LEVEL); - ret = -1; - } - - if (!is_power_of_2(sb->sectorsize) || sb->sectorsize < 4096 || - sb->sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) { - printf("%s: invalid sectorsize %u\n", __func__, - sb->sectorsize); - ret = -1; - } - - if (!is_power_of_2(sb->nodesize) || sb->nodesize < sb->sectorsize || - sb->nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) { - printf("%s: invalid nodesize %u\n", __func__, sb->nodesize); - ret = -1; - } - - if (sb->nodesize != sb->__unused_leafsize) { - printf("%s: invalid leafsize %u, should be %u\n", __func__, - sb->__unused_leafsize, sb->nodesize); - ret = -1; - } - - if (!IS_ALIGNED(sb->root, sb->sectorsize)) { - printf("%s: tree_root block unaligned: %llu\n", __func__, - sb->root); - ret = -1; - } - - if (!IS_ALIGNED(sb->chunk_root, sb->sectorsize)) { - printf("%s: chunk_root block unaligned: %llu\n", __func__, - sb->chunk_root); - ret = -1; - } - - if (!IS_ALIGNED(sb->log_root, sb->sectorsize)) { - printf("%s: log_root block unaligned: %llu\n", __func__, - sb->log_root); - ret = -1; - } - - if (memcmp(sb->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { - printf("%s: dev_item UUID does not match fsid\n", __func__); - ret = -1; - } - - if (sb->bytes_used < 6*sb->nodesize) { - printf("%s: bytes_used is too small %llu\n", __func__, - sb->bytes_used); - ret = -1; - } - - if (!is_power_of_2(sb->stripesize)) { - printf("%s: invalid stripesize %u\n", __func__, sb->stripesize); - ret = -1; - } - - if (sb->sys_chunk_array_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) { - printf("%s: system chunk array too big %u > %u\n", __func__, - sb->sys_chunk_array_size, BTRFS_SYSTEM_CHUNK_ARRAY_SIZE); - ret = -1; - } - - if (sb->sys_chunk_array_size < sizeof(struct btrfs_key) + - sizeof(struct btrfs_chunk)) { - printf("%s: system chunk array too small %u < %zu\n", __func__, - sb->sys_chunk_array_size, sizeof(struct btrfs_key) - + sizeof(struct btrfs_chunk)); - ret = -1; - } - - return ret; -} - -int btrfs_read_superblock(void) -{ - const u64 superblock_offsets[4] = { - 0x10000ull, - 0x4000000ull, - 0x4000000000ull, - 0x4000000000000ull - }; - ALLOC_CACHE_ALIGN_BUFFER(char, raw_sb, BTRFS_SUPER_INFO_SIZE); - struct btrfs_super_block *sb = (struct btrfs_super_block *) raw_sb; - u64 dev_total_bytes; - int i; - - dev_total_bytes = (u64) btrfs_part_info->size * btrfs_part_info->blksz; - - btrfs_info.sb.generation = 0; - - for (i = 0; i < 4; ++i) { - if (superblock_offsets[i] + sizeof(sb) > dev_total_bytes) - break; - - if (!btrfs_devread(superblock_offsets[i], BTRFS_SUPER_INFO_SIZE, - raw_sb)) - break; - - if (btrfs_check_super_csum(raw_sb)) { - debug("%s: invalid checksum at superblock mirror %i\n", - __func__, i); - continue; - } - - btrfs_super_block_to_cpu(sb); - - if (sb->magic != BTRFS_MAGIC) { - debug("%s: invalid BTRFS magic 0x%016llX at " - "superblock mirror %i\n", __func__, sb->magic, i); - } else if (sb->bytenr != superblock_offsets[i]) { - printf("%s: invalid bytenr 0x%016llX (expected " - "0x%016llX) at superblock mirror %i\n", - __func__, sb->bytenr, superblock_offsets[i], i); - } else if (btrfs_check_super(sb)) { - printf("%s: Checking superblock mirror %i failed\n", - __func__, i); - } else if (sb->generation > btrfs_info.sb.generation) { - memcpy(&btrfs_info.sb, sb, sizeof(*sb)); - } else { - /* Nothing */ - } - } - - if (!btrfs_info.sb.generation) { - debug("%s: No valid BTRFS superblock found!\n", __func__); - return -1; - } - - if (btrfs_check_super_roots(&btrfs_info.sb)) { - printf("%s: No valid root_backup found!\n", __func__); - return -1; - } - - if (sb->sectorsize != PAGE_SIZE) { - printf( - "%s: Unsupported sector size (%u), only supports %u as sector size\n", - __func__, sb->sectorsize, PAGE_SIZE); - return -1; - } - - if (btrfs_info.sb.num_devices != 1) { - printf("%s: Unsupported number of devices (%lli). This driver " - "only supports filesystem on one device.\n", __func__, - btrfs_info.sb.num_devices); - return -1; - } - - debug("Chosen superblock with generation = %llu\n", - btrfs_info.sb.generation); - - return 0; -} diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c new file mode 100644 index 0000000000..fcf52d4b0f --- /dev/null +++ b/fs/btrfs/volumes.c @@ -0,0 +1,1173 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include +#include +#include +#include "ctree.h" +#include "disk-io.h" +#include "volumes.h" +#include "extent-io.h" + +const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { + [BTRFS_RAID_RAID10] = { + .sub_stripes = 2, + .dev_stripes = 1, + .devs_max = 0, /* 0 == as many as possible */ + .devs_min = 4, + .tolerated_failures = 1, + .devs_increment = 2, + .ncopies = 2, + .nparity = 0, + .raid_name = "raid10", + .bg_flag = BTRFS_BLOCK_GROUP_RAID10, + }, + [BTRFS_RAID_RAID1] = { + .sub_stripes = 1, + .dev_stripes = 1, + .devs_max = 2, + .devs_min = 2, + .tolerated_failures = 1, + .devs_increment = 2, + .ncopies = 2, + .nparity = 0, + .raid_name = "raid1", + .bg_flag = BTRFS_BLOCK_GROUP_RAID1, + }, + [BTRFS_RAID_RAID1C3] = { + .sub_stripes = 1, + .dev_stripes = 1, + .devs_max = 3, + .devs_min = 3, + .tolerated_failures = 2, + .devs_increment = 3, + .ncopies = 3, + .raid_name = "raid1c3", + .bg_flag = BTRFS_BLOCK_GROUP_RAID1C3, + }, + [BTRFS_RAID_RAID1C4] = { + .sub_stripes = 1, + .dev_stripes = 1, + .devs_max = 4, + .devs_min = 4, + .tolerated_failures = 3, + .devs_increment = 4, + .ncopies = 4, + .raid_name = "raid1c4", + .bg_flag = BTRFS_BLOCK_GROUP_RAID1C4, + }, + [BTRFS_RAID_DUP] = { + .sub_stripes = 1, + .dev_stripes = 2, + .devs_max = 1, + .devs_min = 1, + .tolerated_failures = 0, + .devs_increment = 1, + .ncopies = 2, + .nparity = 0, + .raid_name = "dup", + .bg_flag = BTRFS_BLOCK_GROUP_DUP, + }, + [BTRFS_RAID_RAID0] = { + .sub_stripes = 1, + .dev_stripes = 1, + .devs_max = 0, + .devs_min = 2, + .tolerated_failures = 0, + .devs_increment = 1, + .ncopies = 1, + .nparity = 0, + .raid_name = "raid0", + .bg_flag = BTRFS_BLOCK_GROUP_RAID0, + }, + [BTRFS_RAID_SINGLE] = { + .sub_stripes = 1, + .dev_stripes = 1, + .devs_max = 1, + .devs_min = 1, + .tolerated_failures = 0, + .devs_increment = 1, + .ncopies = 1, + .nparity = 0, + .raid_name = "single", + .bg_flag = 0, + }, + [BTRFS_RAID_RAID5] = { + .sub_stripes = 1, + .dev_stripes = 1, + .devs_max = 0, + .devs_min = 2, + .tolerated_failures = 1, + .devs_increment = 1, + .ncopies = 1, + .nparity = 1, + .raid_name = "raid5", + .bg_flag = BTRFS_BLOCK_GROUP_RAID5, + }, + [BTRFS_RAID_RAID6] = { + .sub_stripes = 1, + .dev_stripes = 1, + .devs_max = 0, + .devs_min = 3, + .tolerated_failures = 2, + .devs_increment = 1, + .ncopies = 1, + .nparity = 2, + .raid_name = "raid6", + .bg_flag = BTRFS_BLOCK_GROUP_RAID6, + }, +}; + +struct stripe { + struct btrfs_device *dev; + u64 physical; +}; + +static inline int nr_parity_stripes(struct map_lookup *map) +{ + if (map->type & BTRFS_BLOCK_GROUP_RAID5) + return 1; + else if (map->type & BTRFS_BLOCK_GROUP_RAID6) + return 2; + else + return 0; +} + +static inline int nr_data_stripes(struct map_lookup *map) +{ + return map->num_stripes - nr_parity_stripes(map); +} + +#define is_parity_stripe(x) ( ((x) == BTRFS_RAID5_P_STRIPE) || ((x) == BTRFS_RAID6_Q_STRIPE) ) + +static LIST_HEAD(fs_uuids); + +/* + * Find a device specified by @devid or @uuid in the list of @fs_devices, or + * return NULL. + * + * If devid and uuid are both specified, the match must be exact, otherwise + * only devid is used. + */ +static struct btrfs_device *find_device(struct btrfs_fs_devices *fs_devices, + u64 devid, u8 *uuid) +{ + struct list_head *head = &fs_devices->devices; + struct btrfs_device *dev; + + list_for_each_entry(dev, head, dev_list) { + if (dev->devid == devid && + (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) { + return dev; + } + } + return NULL; +} + +static struct btrfs_fs_devices *find_fsid(u8 *fsid, u8 *metadata_uuid) +{ + struct btrfs_fs_devices *fs_devices; + + list_for_each_entry(fs_devices, &fs_uuids, list) { + if (metadata_uuid && (memcmp(fsid, fs_devices->fsid, + BTRFS_FSID_SIZE) == 0) && + (memcmp(metadata_uuid, fs_devices->metadata_uuid, + BTRFS_FSID_SIZE) == 0)) { + return fs_devices; + } else if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0){ + return fs_devices; + } + } + return NULL; +} + +static int device_list_add(struct btrfs_super_block *disk_super, + u64 devid, struct blk_desc *desc, + struct disk_partition *part, + struct btrfs_fs_devices **fs_devices_ret) +{ + struct btrfs_device *device; + struct btrfs_fs_devices *fs_devices; + u64 found_transid = btrfs_super_generation(disk_super); + bool metadata_uuid = (btrfs_super_incompat_flags(disk_super) & + BTRFS_FEATURE_INCOMPAT_METADATA_UUID); + + if (metadata_uuid) + fs_devices = find_fsid(disk_super->fsid, + disk_super->metadata_uuid); + else + fs_devices = find_fsid(disk_super->fsid, NULL); + + if (!fs_devices) { + fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); + if (!fs_devices) + return -ENOMEM; + INIT_LIST_HEAD(&fs_devices->devices); + list_add(&fs_devices->list, &fs_uuids); + memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); + if (metadata_uuid) + memcpy(fs_devices->metadata_uuid, + disk_super->metadata_uuid, BTRFS_FSID_SIZE); + else + memcpy(fs_devices->metadata_uuid, fs_devices->fsid, + BTRFS_FSID_SIZE); + + fs_devices->latest_devid = devid; + fs_devices->latest_trans = found_transid; + fs_devices->lowest_devid = (u64)-1; + device = NULL; + } else { + device = find_device(fs_devices, devid, + disk_super->dev_item.uuid); + } + if (!device) { + device = kzalloc(sizeof(*device), GFP_NOFS); + if (!device) { + /* we can safely leave the fs_devices entry around */ + return -ENOMEM; + } + device->devid = devid; + device->desc = desc; + device->part = part; + device->generation = found_transid; + memcpy(device->uuid, disk_super->dev_item.uuid, + BTRFS_UUID_SIZE); + device->total_devs = btrfs_super_num_devices(disk_super); + device->super_bytes_used = btrfs_super_bytes_used(disk_super); + device->total_bytes = + btrfs_stack_device_total_bytes(&disk_super->dev_item); + device->bytes_used = + btrfs_stack_device_bytes_used(&disk_super->dev_item); + list_add(&device->dev_list, &fs_devices->devices); + device->fs_devices = fs_devices; + } else if (!device->desc || !device->part) { + /* + * The existing device has newer generation, so this one could + * be a stale one, don't add it. + */ + if (found_transid < device->generation) { + error( + "adding devid %llu gen %llu but found an existing device gen %llu", + device->devid, found_transid, + device->generation); + return -EEXIST; + } else { + device->desc = desc; + device->part = part; + } + } + + + if (found_transid > fs_devices->latest_trans) { + fs_devices->latest_devid = devid; + fs_devices->latest_trans = found_transid; + } + if (fs_devices->lowest_devid > devid) { + fs_devices->lowest_devid = devid; + } + *fs_devices_ret = fs_devices; + return 0; +} + +int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) +{ + struct btrfs_fs_devices *seed_devices; + struct btrfs_device *device; + int ret = 0; + +again: + if (!fs_devices) + return 0; + while (!list_empty(&fs_devices->devices)) { + device = list_entry(fs_devices->devices.next, + struct btrfs_device, dev_list); + list_del(&device->dev_list); + /* free the memory */ + free(device); + } + + seed_devices = fs_devices->seed; + fs_devices->seed = NULL; + if (seed_devices) { + struct btrfs_fs_devices *orig; + + orig = fs_devices; + fs_devices = seed_devices; + list_del(&orig->list); + free(orig); + goto again; + } else { + list_del(&fs_devices->list); + free(fs_devices); + } + + return ret; +} + +void btrfs_close_all_devices(void) +{ + struct btrfs_fs_devices *fs_devices; + + while (!list_empty(&fs_uuids)) { + fs_devices = list_entry(fs_uuids.next, struct btrfs_fs_devices, + list); + btrfs_close_devices(fs_devices); + } +} + +int btrfs_open_devices(struct btrfs_fs_devices *fs_devices) +{ + struct btrfs_device *device; + + list_for_each_entry(device, &fs_devices->devices, dev_list) { + if (!device->desc || !device->part) { + printf("no device found for devid %llu, skip it \n", + device->devid); + continue; + } + } + return 0; +} + +int btrfs_scan_one_device(struct blk_desc *desc, struct disk_partition *part, + struct btrfs_fs_devices **fs_devices_ret, + u64 *total_devs) +{ + struct btrfs_super_block *disk_super; + char buf[BTRFS_SUPER_INFO_SIZE]; + int ret; + u64 devid; + + disk_super = (struct btrfs_super_block *)buf; + ret = btrfs_read_dev_super(desc, part, disk_super); + if (ret < 0) + return -EIO; + devid = btrfs_stack_device_id(&disk_super->dev_item); + if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_METADUMP) + *total_devs = 1; + else + *total_devs = btrfs_super_num_devices(disk_super); + + ret = device_list_add(disk_super, devid, desc, part, fs_devices_ret); + + return ret; +} + +struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid, + u8 *uuid, u8 *fsid) +{ + struct btrfs_device *device; + struct btrfs_fs_devices *cur_devices; + + cur_devices = fs_info->fs_devices; + while (cur_devices) { + if (!fsid || + !memcmp(cur_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE)) { + device = find_device(cur_devices, devid, uuid); + if (device) + return device; + } + cur_devices = cur_devices->seed; + } + return NULL; +} + +static struct btrfs_device *fill_missing_device(u64 devid) +{ + struct btrfs_device *device; + + device = kzalloc(sizeof(*device), GFP_NOFS); + return device; +} + +/* + * slot == -1: SYSTEM chunk + * return -EIO on error, otherwise return 0 + */ +int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk, + int slot, u64 logical) +{ + u64 length; + u64 stripe_len; + u16 num_stripes; + u16 sub_stripes; + u64 type; + u32 chunk_ondisk_size; + u32 sectorsize = fs_info->sectorsize; + + /* + * Basic chunk item size check. Note that btrfs_chunk already contains + * one stripe, so no "==" check. + */ + if (slot >= 0 && + btrfs_item_size_nr(leaf, slot) < sizeof(struct btrfs_chunk)) { + error("invalid chunk item size, have %u expect [%zu, %zu)", + btrfs_item_size_nr(leaf, slot), + sizeof(struct btrfs_chunk), + BTRFS_LEAF_DATA_SIZE(fs_info)); + return -EUCLEAN; + } + length = btrfs_chunk_length(leaf, chunk); + stripe_len = btrfs_chunk_stripe_len(leaf, chunk); + num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); + type = btrfs_chunk_type(leaf, chunk); + + if (num_stripes == 0) { + error("invalid num_stripes, have %u expect non-zero", + num_stripes); + return -EUCLEAN; + } + if (slot >= 0 && btrfs_chunk_item_size(num_stripes) != + btrfs_item_size_nr(leaf, slot)) { + error("invalid chunk item size, have %u expect %lu", + btrfs_item_size_nr(leaf, slot), + btrfs_chunk_item_size(num_stripes)); + return -EUCLEAN; + } + + /* + * These valid checks may be insufficient to cover every corner cases. + */ + if (!IS_ALIGNED(logical, sectorsize)) { + error("invalid chunk logical %llu", logical); + return -EIO; + } + if (btrfs_chunk_sector_size(leaf, chunk) != sectorsize) { + error("invalid chunk sectorsize %llu", + (unsigned long long)btrfs_chunk_sector_size(leaf, chunk)); + return -EIO; + } + if (!length || !IS_ALIGNED(length, sectorsize)) { + error("invalid chunk length %llu", length); + return -EIO; + } + if (stripe_len != BTRFS_STRIPE_LEN) { + error("invalid chunk stripe length: %llu", stripe_len); + return -EIO; + } + /* Check on chunk item type */ + if (slot == -1 && (type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) { + error("invalid chunk type %llu", type); + return -EIO; + } + if (type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK | + BTRFS_BLOCK_GROUP_PROFILE_MASK)) { + error("unrecognized chunk type: %llu", + ~(BTRFS_BLOCK_GROUP_TYPE_MASK | + BTRFS_BLOCK_GROUP_PROFILE_MASK) & type); + return -EIO; + } + if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { + error("missing chunk type flag: %llu", type); + return -EIO; + } + if (!(is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) || + (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0)) { + error("conflicting chunk type detected: %llu", type); + return -EIO; + } + if ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) && + !is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK)) { + error("conflicting chunk profile detected: %llu", type); + return -EIO; + } + + chunk_ondisk_size = btrfs_chunk_item_size(num_stripes); + /* + * Btrfs_chunk contains at least one stripe, and for sys_chunk + * it can't exceed the system chunk array size + * For normal chunk, it should match its chunk item size. + */ + if (num_stripes < 1 || + (slot == -1 && chunk_ondisk_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) || + (slot >= 0 && chunk_ondisk_size > btrfs_item_size_nr(leaf, slot))) { + error("invalid num_stripes: %u", num_stripes); + return -EIO; + } + /* + * Device number check against profile + */ + if ((type & BTRFS_BLOCK_GROUP_RAID10 && (sub_stripes != 2 || + !IS_ALIGNED(num_stripes, sub_stripes))) || + (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) || + (type & BTRFS_BLOCK_GROUP_RAID1C3 && num_stripes < 3) || + (type & BTRFS_BLOCK_GROUP_RAID1C4 && num_stripes < 4) || + (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || + (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) || + (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) || + ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && + num_stripes != 1)) { + error("Invalid num_stripes:sub_stripes %u:%u for profile %llu", + num_stripes, sub_stripes, + type & BTRFS_BLOCK_GROUP_PROFILE_MASK); + return -EIO; + } + + return 0; +} + +/* + * Slot is used to verify the chunk item is valid + * + * For sys chunk in superblock, pass -1 to indicate sys chunk. + */ +static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk, int slot) +{ + struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct map_lookup *map; + struct cache_extent *ce; + u64 logical; + u64 length; + u64 devid; + u8 uuid[BTRFS_UUID_SIZE]; + int num_stripes; + int ret; + int i; + + logical = key->offset; + length = btrfs_chunk_length(leaf, chunk); + num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + /* Validation check */ + ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, slot, logical); + if (ret) { + error("%s checksums match, but it has an invalid chunk, %s", + (slot == -1) ? "Superblock" : "Metadata", + (slot == -1) ? "try btrfsck --repair -s ie, 0,1,2" : ""); + return ret; + } + + ce = search_cache_extent(&map_tree->cache_tree, logical); + + /* already mapped? */ + if (ce && ce->start <= logical && ce->start + ce->size > logical) { + return 0; + } + + map = kmalloc(btrfs_map_lookup_size(num_stripes), GFP_NOFS); + if (!map) + return -ENOMEM; + + map->ce.start = logical; + map->ce.size = length; + map->num_stripes = num_stripes; + map->io_width = btrfs_chunk_io_width(leaf, chunk); + map->io_align = btrfs_chunk_io_align(leaf, chunk); + map->sector_size = btrfs_chunk_sector_size(leaf, chunk); + map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); + map->type = btrfs_chunk_type(leaf, chunk); + map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); + + for (i = 0; i < num_stripes; i++) { + map->stripes[i].physical = + btrfs_stripe_offset_nr(leaf, chunk, i); + devid = btrfs_stripe_devid_nr(leaf, chunk, i); + read_extent_buffer(leaf, uuid, (unsigned long) + btrfs_stripe_dev_uuid_nr(chunk, i), + BTRFS_UUID_SIZE); + map->stripes[i].dev = btrfs_find_device(fs_info, devid, uuid, + NULL); + if (!map->stripes[i].dev) { + map->stripes[i].dev = fill_missing_device(devid); + printf("warning, device %llu is missing\n", + (unsigned long long)devid); + list_add(&map->stripes[i].dev->dev_list, + &fs_info->fs_devices->devices); + } + + } + ret = insert_cache_extent(&map_tree->cache_tree, &map->ce); + if (ret < 0) { + errno = -ret; + error("failed to add chunk map start=%llu len=%llu: %d (%m)", + map->ce.start, map->ce.size, ret); + } + + return ret; +} + +static int fill_device_from_item(struct extent_buffer *leaf, + struct btrfs_dev_item *dev_item, + struct btrfs_device *device) +{ + unsigned long ptr; + + device->devid = btrfs_device_id(leaf, dev_item); + device->total_bytes = btrfs_device_total_bytes(leaf, dev_item); + device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); + device->type = btrfs_device_type(leaf, dev_item); + device->io_align = btrfs_device_io_align(leaf, dev_item); + device->io_width = btrfs_device_io_width(leaf, dev_item); + device->sector_size = btrfs_device_sector_size(leaf, dev_item); + + ptr = (unsigned long)btrfs_device_uuid(dev_item); + read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); + + return 0; +} + +static int read_one_dev(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_dev_item *dev_item) +{ + struct btrfs_device *device; + u64 devid; + int ret = 0; + u8 fs_uuid[BTRFS_UUID_SIZE]; + u8 dev_uuid[BTRFS_UUID_SIZE]; + + devid = btrfs_device_id(leaf, dev_item); + read_extent_buffer(leaf, dev_uuid, + (unsigned long)btrfs_device_uuid(dev_item), + BTRFS_UUID_SIZE); + read_extent_buffer(leaf, fs_uuid, + (unsigned long)btrfs_device_fsid(dev_item), + BTRFS_FSID_SIZE); + + if (memcmp(fs_uuid, fs_info->fs_devices->fsid, BTRFS_UUID_SIZE)) { + error("Seed device is not yet supported\n"); + return -ENOTSUPP; + } + + device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid); + if (!device) { + device = kzalloc(sizeof(*device), GFP_NOFS); + if (!device) + return -ENOMEM; + list_add(&device->dev_list, + &fs_info->fs_devices->devices); + } + + fill_device_from_item(leaf, dev_item, device); + fs_info->fs_devices->total_rw_bytes += + btrfs_device_total_bytes(leaf, dev_item); + return ret; +} + +int btrfs_read_sys_array(struct btrfs_fs_info *fs_info) +{ + struct btrfs_super_block *super_copy = fs_info->super_copy; + struct extent_buffer *sb; + struct btrfs_disk_key *disk_key; + struct btrfs_chunk *chunk; + u8 *array_ptr; + unsigned long sb_array_offset; + int ret = 0; + u32 num_stripes; + u32 array_size; + u32 len = 0; + u32 cur_offset; + struct btrfs_key key; + + if (fs_info->nodesize < BTRFS_SUPER_INFO_SIZE) { + printf("ERROR: nodesize %u too small to read superblock\n", + fs_info->nodesize); + return -EINVAL; + } + sb = alloc_dummy_extent_buffer(fs_info, BTRFS_SUPER_INFO_OFFSET, + BTRFS_SUPER_INFO_SIZE); + if (!sb) + return -ENOMEM; + btrfs_set_buffer_uptodate(sb); + write_extent_buffer(sb, super_copy, 0, sizeof(*super_copy)); + array_size = btrfs_super_sys_array_size(super_copy); + + array_ptr = super_copy->sys_chunk_array; + sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array); + cur_offset = 0; + + while (cur_offset < array_size) { + disk_key = (struct btrfs_disk_key *)array_ptr; + len = sizeof(*disk_key); + if (cur_offset + len > array_size) + goto out_short_read; + + btrfs_disk_key_to_cpu(&key, disk_key); + + array_ptr += len; + sb_array_offset += len; + cur_offset += len; + + if (key.type == BTRFS_CHUNK_ITEM_KEY) { + chunk = (struct btrfs_chunk *)sb_array_offset; + /* + * At least one btrfs_chunk with one stripe must be + * present, exact stripe count check comes afterwards + */ + len = btrfs_chunk_item_size(1); + if (cur_offset + len > array_size) + goto out_short_read; + + num_stripes = btrfs_chunk_num_stripes(sb, chunk); + if (!num_stripes) { + printk( + "ERROR: invalid number of stripes %u in sys_array at offset %u\n", + num_stripes, cur_offset); + ret = -EIO; + break; + } + + len = btrfs_chunk_item_size(num_stripes); + if (cur_offset + len > array_size) + goto out_short_read; + + ret = read_one_chunk(fs_info, &key, sb, chunk, -1); + if (ret) + break; + } else { + printk( + "ERROR: unexpected item type %u in sys_array at offset %u\n", + (u32)key.type, cur_offset); + ret = -EIO; + break; + } + array_ptr += len; + sb_array_offset += len; + cur_offset += len; + } + free_extent_buffer(sb); + return ret; + +out_short_read: + printk("ERROR: sys_array too short to read %u bytes at offset %u\n", + len, cur_offset); + free_extent_buffer(sb); + return -EIO; +} + +int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) +{ + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_root *root = fs_info->chunk_root; + int ret; + int slot; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + /* + * Read all device items, and then all the chunk items. All + * device items are found before any chunk item (their object id + * is smaller than the lowest possible object id for a chunk + * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID). + */ + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.offset = 0; + key.type = 0; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto error; + while(1) { + leaf = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + if (ret < 0) + goto error; + break; + } + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (found_key.type == BTRFS_DEV_ITEM_KEY) { + struct btrfs_dev_item *dev_item; + dev_item = btrfs_item_ptr(leaf, slot, + struct btrfs_dev_item); + ret = read_one_dev(fs_info, leaf, dev_item); + if (ret < 0) + goto error; + } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { + struct btrfs_chunk *chunk; + chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); + ret = read_one_chunk(fs_info, &found_key, leaf, chunk, + slot); + if (ret < 0) + goto error; + } + path->slots[0]++; + } + + ret = 0; +error: + btrfs_free_path(path); + return ret; +} + +/* + * Get stripe length from chunk item and its stripe items + * + * Caller should only call this function after validating the chunk item + * by using btrfs_check_chunk_valid(). + */ +u64 btrfs_stripe_length(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk) +{ + u64 stripe_len; + u64 chunk_len; + u32 num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + u64 profile = btrfs_chunk_type(leaf, chunk) & + BTRFS_BLOCK_GROUP_PROFILE_MASK; + + chunk_len = btrfs_chunk_length(leaf, chunk); + + switch (profile) { + case 0: /* Single profile */ + case BTRFS_BLOCK_GROUP_RAID1: + case BTRFS_BLOCK_GROUP_RAID1C3: + case BTRFS_BLOCK_GROUP_RAID1C4: + case BTRFS_BLOCK_GROUP_DUP: + stripe_len = chunk_len; + break; + case BTRFS_BLOCK_GROUP_RAID0: + stripe_len = chunk_len / num_stripes; + break; + case BTRFS_BLOCK_GROUP_RAID5: + stripe_len = chunk_len / (num_stripes - 1); + break; + case BTRFS_BLOCK_GROUP_RAID6: + stripe_len = chunk_len / (num_stripes - 2); + break; + case BTRFS_BLOCK_GROUP_RAID10: + stripe_len = chunk_len / (num_stripes / + btrfs_chunk_sub_stripes(leaf, chunk)); + break; + default: + /* Invalid chunk profile found */ + BUG_ON(1); + } + return stripe_len; +} + +int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) +{ + struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct cache_extent *ce; + struct map_lookup *map; + int ret; + + ce = search_cache_extent(&map_tree->cache_tree, logical); + if (!ce) { + fprintf(stderr, "No mapping for %llu-%llu\n", + (unsigned long long)logical, + (unsigned long long)logical+len); + return 1; + } + if (ce->start > logical || ce->start + ce->size < logical) { + fprintf(stderr, "Invalid mapping for %llu-%llu, got " + "%llu-%llu\n", (unsigned long long)logical, + (unsigned long long)logical+len, + (unsigned long long)ce->start, + (unsigned long long)ce->start + ce->size); + return 1; + } + map = container_of(ce, struct map_lookup, ce); + + if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4)) + ret = map->num_stripes; + else if (map->type & BTRFS_BLOCK_GROUP_RAID10) + ret = map->sub_stripes; + else if (map->type & BTRFS_BLOCK_GROUP_RAID5) + ret = 2; + else if (map->type & BTRFS_BLOCK_GROUP_RAID6) + ret = 3; + else + ret = 1; + return ret; +} + +int btrfs_next_bg(struct btrfs_fs_info *fs_info, u64 *logical, + u64 *size, u64 type) +{ + struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct cache_extent *ce; + struct map_lookup *map; + u64 cur = *logical; + + ce = search_cache_extent(&map_tree->cache_tree, cur); + + while (ce) { + /* + * only jump to next bg if our cur is not 0 + * As the initial logical for btrfs_next_bg() is 0, and + * if we jump to next bg, we skipped a valid bg. + */ + if (cur) { + ce = next_cache_extent(ce); + if (!ce) + return -ENOENT; + } + + cur = ce->start; + map = container_of(ce, struct map_lookup, ce); + if (map->type & type) { + *logical = ce->start; + *size = ce->size; + return 0; + } + if (!cur) + ce = next_cache_extent(ce); + } + + return -ENOENT; +} + +static inline int parity_smaller(u64 a, u64 b) +{ + return a > b; +} + +/* Bubble-sort the stripe set to put the parity/syndrome stripes last */ +static void sort_parity_stripes(struct btrfs_multi_bio *bbio, u64 *raid_map) +{ + struct btrfs_bio_stripe s; + int i; + u64 l; + int again = 1; + + while (again) { + again = 0; + for (i = 0; i < bbio->num_stripes - 1; i++) { + if (parity_smaller(raid_map[i], raid_map[i+1])) { + s = bbio->stripes[i]; + l = raid_map[i]; + bbio->stripes[i] = bbio->stripes[i+1]; + raid_map[i] = raid_map[i+1]; + bbio->stripes[i+1] = s; + raid_map[i+1] = l; + again = 1; + } + } + } +} + +int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, + u64 logical, u64 *length, u64 *type, + struct btrfs_multi_bio **multi_ret, int mirror_num, + u64 **raid_map_ret) +{ + struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct cache_extent *ce; + struct map_lookup *map; + u64 offset; + u64 stripe_offset; + u64 *raid_map = NULL; + int stripe_nr; + int stripes_allocated = 8; + int stripes_required = 1; + int stripe_index; + int i; + struct btrfs_multi_bio *multi = NULL; + + if (multi_ret && rw == READ) { + stripes_allocated = 1; + } +again: + ce = search_cache_extent(&map_tree->cache_tree, logical); + if (!ce) { + kfree(multi); + *length = (u64)-1; + return -ENOENT; + } + if (ce->start > logical) { + kfree(multi); + *length = ce->start - logical; + return -ENOENT; + } + + if (multi_ret) { + multi = kzalloc(btrfs_multi_bio_size(stripes_allocated), + GFP_NOFS); + if (!multi) + return -ENOMEM; + } + map = container_of(ce, struct map_lookup, ce); + offset = logical - ce->start; + + if (rw == WRITE) { + if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID1C3 | + BTRFS_BLOCK_GROUP_RAID1C4 | + BTRFS_BLOCK_GROUP_DUP)) { + stripes_required = map->num_stripes; + } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { + stripes_required = map->sub_stripes; + } + } + if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6) + && multi_ret && ((rw & WRITE) || mirror_num > 1) && raid_map_ret) { + /* RAID[56] write or recovery. Return all stripes */ + stripes_required = map->num_stripes; + + /* Only allocate the map if we've already got a large enough multi_ret */ + if (stripes_allocated >= stripes_required) { + raid_map = kmalloc(sizeof(u64) * map->num_stripes, GFP_NOFS); + if (!raid_map) { + kfree(multi); + return -ENOMEM; + } + } + } + + /* if our multi bio struct is too small, back off and try again */ + if (multi_ret && stripes_allocated < stripes_required) { + stripes_allocated = stripes_required; + kfree(multi); + multi = NULL; + goto again; + } + stripe_nr = offset; + /* + * stripe_nr counts the total number of stripes we have to stride + * to get to this block + */ + stripe_nr = stripe_nr / map->stripe_len; + + stripe_offset = stripe_nr * map->stripe_len; + BUG_ON(offset < stripe_offset); + + /* stripe_offset is the offset of this block in its stripe*/ + stripe_offset = offset - stripe_offset; + + if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4 | + BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | + BTRFS_BLOCK_GROUP_RAID10 | + BTRFS_BLOCK_GROUP_DUP)) { + /* we limit the length of each bio to what fits in a stripe */ + *length = min_t(u64, ce->size - offset, + map->stripe_len - stripe_offset); + } else { + *length = ce->size - offset; + } + + if (!multi_ret) + goto out; + + multi->num_stripes = 1; + stripe_index = 0; + if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID1C3 | + BTRFS_BLOCK_GROUP_RAID1C4)) { + if (rw == WRITE) + multi->num_stripes = map->num_stripes; + else if (mirror_num) + stripe_index = mirror_num - 1; + else + stripe_index = stripe_nr % map->num_stripes; + } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { + int factor = map->num_stripes / map->sub_stripes; + + stripe_index = stripe_nr % factor; + stripe_index *= map->sub_stripes; + + if (rw == WRITE) + multi->num_stripes = map->sub_stripes; + else if (mirror_num) + stripe_index += mirror_num - 1; + + stripe_nr = stripe_nr / factor; + } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { + if (rw == WRITE) + multi->num_stripes = map->num_stripes; + else if (mirror_num) + stripe_index = mirror_num - 1; + } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID6)) { + + if (raid_map) { + int rot; + u64 tmp; + u64 raid56_full_stripe_start; + u64 full_stripe_len = nr_data_stripes(map) * map->stripe_len; + + /* + * align the start of our data stripe in the logical + * address space + */ + raid56_full_stripe_start = offset / full_stripe_len; + raid56_full_stripe_start *= full_stripe_len; + + /* get the data stripe number */ + stripe_nr = raid56_full_stripe_start / map->stripe_len; + stripe_nr = stripe_nr / nr_data_stripes(map); + + /* Work out the disk rotation on this stripe-set */ + rot = stripe_nr % map->num_stripes; + + /* Fill in the logical address of each stripe */ + tmp = stripe_nr * nr_data_stripes(map); + + for (i = 0; i < nr_data_stripes(map); i++) + raid_map[(i+rot) % map->num_stripes] = + ce->start + (tmp + i) * map->stripe_len; + + raid_map[(i+rot) % map->num_stripes] = BTRFS_RAID5_P_STRIPE; + if (map->type & BTRFS_BLOCK_GROUP_RAID6) + raid_map[(i+rot+1) % map->num_stripes] = BTRFS_RAID6_Q_STRIPE; + + *length = map->stripe_len; + stripe_index = 0; + stripe_offset = 0; + multi->num_stripes = map->num_stripes; + } else { + stripe_index = stripe_nr % nr_data_stripes(map); + stripe_nr = stripe_nr / nr_data_stripes(map); + + /* + * Mirror #0 or #1 means the original data block. + * Mirror #2 is RAID5 parity block. + * Mirror #3 is RAID6 Q block. + */ + if (mirror_num > 1) + stripe_index = nr_data_stripes(map) + mirror_num - 2; + + /* We distribute the parity blocks across stripes */ + stripe_index = (stripe_nr + stripe_index) % map->num_stripes; + } + } else { + /* + * after this do_div call, stripe_nr is the number of stripes + * on this device we have to walk to find the data, and + * stripe_index is the number of our device in the stripe array + */ + stripe_index = stripe_nr % map->num_stripes; + stripe_nr = stripe_nr / map->num_stripes; + } + BUG_ON(stripe_index >= map->num_stripes); + + for (i = 0; i < multi->num_stripes; i++) { + multi->stripes[i].physical = + map->stripes[stripe_index].physical + stripe_offset + + stripe_nr * map->stripe_len; + multi->stripes[i].dev = map->stripes[stripe_index].dev; + stripe_index++; + } + *multi_ret = multi; + + if (type) + *type = map->type; + + if (raid_map) { + sort_parity_stripes(multi, raid_map); + *raid_map_ret = raid_map; + } +out: + return 0; +} + +int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, + u64 logical, u64 *length, + struct btrfs_multi_bio **multi_ret, int mirror_num, + u64 **raid_map_ret) +{ + return __btrfs_map_block(fs_info, rw, logical, length, NULL, + multi_ret, mirror_num, raid_map_ret); +} diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h new file mode 100644 index 0000000000..9d1a07ae78 --- /dev/null +++ b/fs/btrfs/volumes.h @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#ifndef __BTRFS_VOLUMES_H__ +#define __BTRFS_VOLUMES_H__ + +#include +#include "ctree.h" + +#define BTRFS_STRIPE_LEN SZ_64K + +struct btrfs_device { + struct list_head dev_list; + struct btrfs_root *dev_root; + struct btrfs_fs_devices *fs_devices; + + struct blk_desc *desc; + struct disk_partition *part; + + u64 total_devs; + u64 super_bytes_used; + + u64 generation; + + /* the internal btrfs device id */ + u64 devid; + + /* size of the device */ + u64 total_bytes; + + /* bytes used */ + u64 bytes_used; + + /* optimal io alignment for this device */ + u32 io_align; + + /* optimal io width for this device */ + u32 io_width; + + /* minimal io size for this device */ + u32 sector_size; + + /* type and info about this device */ + u64 type; + + /* physical drive uuid (or lvm uuid) */ + u8 uuid[BTRFS_UUID_SIZE]; +}; + +struct btrfs_fs_devices { + u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ + u8 metadata_uuid[BTRFS_FSID_SIZE]; /* FS specific uuid */ + + u64 latest_devid; + u64 lowest_devid; + u64 latest_trans; + + u64 total_rw_bytes; + + struct list_head devices; + struct list_head list; + + int seeding; + struct btrfs_fs_devices *seed; +}; + +struct btrfs_bio_stripe { + struct btrfs_device *dev; + u64 physical; +}; + +struct btrfs_multi_bio { + int error; + int num_stripes; + struct btrfs_bio_stripe stripes[]; +}; + +struct map_lookup { + struct cache_extent ce; + u64 type; + int io_align; + int io_width; + int stripe_len; + int sector_size; + int num_stripes; + int sub_stripes; + struct btrfs_bio_stripe stripes[]; +}; + +struct btrfs_raid_attr { + int sub_stripes; /* sub_stripes info for map */ + int dev_stripes; /* stripes per dev */ + int devs_max; /* max devs to use */ + int devs_min; /* min devs needed */ + int tolerated_failures; /* max tolerated fail devs */ + int devs_increment; /* ndevs has to be a multiple of this */ + int ncopies; /* how many copies to data has */ + int nparity; /* number of stripes worth of bytes to store + * parity information */ + const char raid_name[8]; /* name of the raid */ + u64 bg_flag; /* block group flag of the raid */ +}; + +extern const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES]; + +static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags) +{ + if (flags & BTRFS_BLOCK_GROUP_RAID10) + return BTRFS_RAID_RAID10; + else if (flags & BTRFS_BLOCK_GROUP_RAID1) + return BTRFS_RAID_RAID1; + else if (flags & BTRFS_BLOCK_GROUP_RAID1C3) + return BTRFS_RAID_RAID1C3; + else if (flags & BTRFS_BLOCK_GROUP_RAID1C4) + return BTRFS_RAID_RAID1C4; + else if (flags & BTRFS_BLOCK_GROUP_DUP) + return BTRFS_RAID_DUP; + else if (flags & BTRFS_BLOCK_GROUP_RAID0) + return BTRFS_RAID_RAID0; + else if (flags & BTRFS_BLOCK_GROUP_RAID5) + return BTRFS_RAID_RAID5; + else if (flags & BTRFS_BLOCK_GROUP_RAID6) + return BTRFS_RAID_RAID6; + + return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */ +} + +#define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ + (sizeof(struct btrfs_bio_stripe) * (n))) +#define btrfs_map_lookup_size(n) (sizeof(struct map_lookup) + \ + (sizeof(struct btrfs_bio_stripe) * (n))) + +#define BTRFS_RAID5_P_STRIPE ((u64)-2) +#define BTRFS_RAID6_Q_STRIPE ((u64)-1) + +static inline u64 calc_stripe_length(u64 type, u64 length, int num_stripes) +{ + u64 stripe_size; + + if (type & BTRFS_BLOCK_GROUP_RAID0) { + stripe_size = length; + stripe_size /= num_stripes; + } else if (type & BTRFS_BLOCK_GROUP_RAID10) { + stripe_size = length * 2; + stripe_size /= num_stripes; + } else if (type & BTRFS_BLOCK_GROUP_RAID5) { + stripe_size = length; + stripe_size /= (num_stripes - 1); + } else if (type & BTRFS_BLOCK_GROUP_RAID6) { + stripe_size = length; + stripe_size /= (num_stripes - 2); + } else { + stripe_size = length; + } + return stripe_size; +} + +#ifndef READ +#define READ 0 +#define WRITE 1 +#define READA 2 +#endif + +int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, + u64 logical, u64 *length, u64 *type, + struct btrfs_multi_bio **multi_ret, int mirror_num, + u64 **raid_map); +int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, + u64 logical, u64 *length, + struct btrfs_multi_bio **multi_ret, int mirror_num, + u64 **raid_map_ret); +int btrfs_next_bg(struct btrfs_fs_info *map_tree, u64 *logical, + u64 *size, u64 type); +static inline int btrfs_next_bg_metadata(struct btrfs_fs_info *fs_info, + u64 *logical, u64 *size) +{ + return btrfs_next_bg(fs_info, logical, size, + BTRFS_BLOCK_GROUP_METADATA); +} +static inline int btrfs_next_bg_system(struct btrfs_fs_info *fs_info, + u64 *logical, u64 *size) +{ + return btrfs_next_bg(fs_info, logical, size, + BTRFS_BLOCK_GROUP_SYSTEM); +} +int btrfs_read_sys_array(struct btrfs_fs_info *fs_info); +int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info); +int btrfs_open_devices(struct btrfs_fs_devices *fs_devices); +int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); +void btrfs_close_all_devices(void); +int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); +int btrfs_scan_one_device(struct blk_desc *desc, struct disk_partition *part, + struct btrfs_fs_devices **fs_devices_ret, + u64 *total_devs); +struct list_head *btrfs_scanned_uuids(void); +struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid, + u8 *uuid, u8 *fsid); +int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk, + int slot, u64 logical); +u64 btrfs_stripe_length(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk); +#endif