1/* extent-scan.c -- core functions for scanning extents 2 Copyright (C) 2010-2015 Free Software Foundation, Inc. 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation, either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. 16 17 Written by Jie Liu (jeff.liu@oracle.com). */ 18 19#include <config.h> 20#include <sys/types.h> 21#include <sys/ioctl.h> 22#include <sys/utsname.h> 23#include <assert.h> 24 25#include "system.h" 26#include "extent-scan.h" 27#include "fiemap.h" 28#include "xstrtol.h" 29 30 31/* Work around Linux kernel issues on BTRFS and EXT4. */ 32static bool 33extent_need_sync (void) 34{ 35 /* For now always return true, to be on the safe side. 36 If/when FIEMAP semantics are well defined (before SEEK_HOLE support 37 is usable) and kernels implementing them are in use, we may relax 38 this once again. */ 39 return true; 40 41#if FIEMAP_BEHAVIOR_IS_DEFINED_AND_USABLE 42 static int need_sync = -1; 43 44 if (need_sync == -1) 45 { 46 struct utsname name; 47 need_sync = 0; /* No workaround by default. */ 48 49# ifdef __linux__ 50 if (uname (&name) != -1 && STRNCMP_LIT (name.release, "2.6.") == 0) 51 { 52 unsigned long val; 53 if (xstrtoul (name.release + 4, NULL, 10, &val, NULL) == LONGINT_OK) 54 { 55 if (val < 39) 56 need_sync = 1; 57 } 58 } 59# endif 60 } 61 62 return need_sync; 63#endif 64} 65 66/* Allocate space for struct extent_scan, initialize the entries if 67 necessary and return it as the input argument of extent_scan_read(). */ 68extern void 69extent_scan_init (int src_fd, struct extent_scan *scan) 70{ 71 scan->fd = src_fd; 72 scan->ei_count = 0; 73 scan->ext_info = NULL; 74 scan->scan_start = 0; 75 scan->initial_scan_failed = false; 76 scan->hit_final_extent = false; 77 scan->fm_flags = extent_need_sync () ? FIEMAP_FLAG_SYNC : 0; 78} 79 80#ifdef __linux__ 81# ifndef FS_IOC_FIEMAP 82# define FS_IOC_FIEMAP _IOWR ('f', 11, struct fiemap) 83# endif 84/* Call ioctl(2) with FS_IOC_FIEMAP (available in linux 2.6.27) to 85 obtain a map of file extents excluding holes. */ 86extern bool 87extent_scan_read (struct extent_scan *scan) 88{ 89 unsigned int si = 0; 90 struct extent_info *last_ei = scan->ext_info; 91 92 while (true) 93 { 94 union { struct fiemap f; char c[4096]; } fiemap_buf; 95 struct fiemap *fiemap = &fiemap_buf.f; 96 struct fiemap_extent *fm_extents = &fiemap->fm_extents[0]; 97 enum { count = (sizeof fiemap_buf - sizeof *fiemap)/sizeof *fm_extents }; 98 verify (count > 1); 99 100 /* This is required at least to initialize fiemap->fm_start, 101 but also serves (in mid 2010) to appease valgrind, which 102 appears not to know the semantics of the FIEMAP ioctl. */ 103 memset (&fiemap_buf, 0, sizeof fiemap_buf); 104 105 fiemap->fm_start = scan->scan_start; 106 fiemap->fm_flags = scan->fm_flags; 107 fiemap->fm_extent_count = count; 108 fiemap->fm_length = FIEMAP_MAX_OFFSET - scan->scan_start; 109 110 /* Fall back to the standard copy if call ioctl(2) failed for 111 the first time. */ 112 if (ioctl (scan->fd, FS_IOC_FIEMAP, fiemap) < 0) 113 { 114 if (scan->scan_start == 0) 115 scan->initial_scan_failed = true; 116 return false; 117 } 118 119 /* If 0 extents are returned, then no more scans are needed. */ 120 if (fiemap->fm_mapped_extents == 0) 121 { 122 scan->hit_final_extent = true; 123 return scan->scan_start != 0; 124 } 125 126 assert (scan->ei_count <= SIZE_MAX - fiemap->fm_mapped_extents); 127 scan->ei_count += fiemap->fm_mapped_extents; 128 { 129 /* last_ei points into a buffer that may be freed via xnrealloc. 130 Record its offset and adjust after allocation. */ 131 size_t prev_idx = last_ei - scan->ext_info; 132 scan->ext_info = xnrealloc (scan->ext_info, scan->ei_count, 133 sizeof (struct extent_info)); 134 last_ei = scan->ext_info + prev_idx; 135 } 136 137 unsigned int i = 0; 138 for (i = 0; i < fiemap->fm_mapped_extents; i++) 139 { 140 assert (fm_extents[i].fe_logical 141 <= OFF_T_MAX - fm_extents[i].fe_length); 142 143 verify (sizeof last_ei->ext_flags >= sizeof fm_extents->fe_flags); 144 145 if (si && last_ei->ext_flags 146 == (fm_extents[i].fe_flags & ~FIEMAP_EXTENT_LAST) 147 && (last_ei->ext_logical + last_ei->ext_length 148 == fm_extents[i].fe_logical)) 149 { 150 /* Merge previous with last. */ 151 last_ei->ext_length += fm_extents[i].fe_length; 152 /* Copy flags in case different. */ 153 last_ei->ext_flags = fm_extents[i].fe_flags; 154 } 155 else if ((si == 0 && scan->scan_start > fm_extents[i].fe_logical) 156 || (si && (last_ei->ext_logical + last_ei->ext_length 157 > fm_extents[i].fe_logical))) 158 { 159 /* BTRFS before 2.6.38 could return overlapping extents 160 for sparse files. We adjust the returned extents 161 rather than failing, as otherwise it would be inefficient 162 to detect this on the initial scan. */ 163 uint64_t new_logical; 164 uint64_t length_adjust; 165 if (si == 0) 166 new_logical = scan->scan_start; 167 else 168 { 169 /* We could return here if scan->scan_start == 0 170 but don't so as to minimize special cases. */ 171 new_logical = last_ei->ext_logical + last_ei->ext_length; 172 } 173 length_adjust = new_logical - fm_extents[i].fe_logical; 174 /* If an extent is contained within the previous one, fail. */ 175 if (length_adjust < fm_extents[i].fe_length) 176 { 177 if (scan->scan_start == 0) 178 scan->initial_scan_failed = true; 179 return false; 180 } 181 fm_extents[i].fe_logical = new_logical; 182 fm_extents[i].fe_length -= length_adjust; 183 /* Process the adjusted extent again. */ 184 i--; 185 continue; 186 } 187 else 188 { 189 last_ei = scan->ext_info + si; 190 last_ei->ext_logical = fm_extents[i].fe_logical; 191 last_ei->ext_length = fm_extents[i].fe_length; 192 last_ei->ext_flags = fm_extents[i].fe_flags; 193 si++; 194 } 195 } 196 197 if (last_ei->ext_flags & FIEMAP_EXTENT_LAST) 198 scan->hit_final_extent = true; 199 200 /* If we have enough extents, discard the last as it might 201 be merged with one from the next scan. */ 202 if (si > count && !scan->hit_final_extent) 203 last_ei = scan->ext_info + --si - 1; 204 205 /* We don't bother reallocating any trailing slots. */ 206 scan->ei_count = si; 207 208 if (scan->hit_final_extent) 209 break; 210 else 211 scan->scan_start = last_ei->ext_logical + last_ei->ext_length; 212 213 if (si >= count) 214 break; 215 } 216 217 return true; 218} 219#else 220extern bool 221extent_scan_read (struct extent_scan *scan _GL_UNUSED) 222{ 223 scan->initial_scan_failed = true; 224 errno = ENOTSUP; 225 return false; 226} 227#endif 228