dirsnapshot.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. #
  4. # Copyright 2011 Yesudeep Mangalapilly <yesudeep@gmail.com>
  5. # Copyright 2012 Google, Inc.
  6. # Copyright 2014 Thomas Amland <thomas.amland@gmail.com>
  7. #
  8. # Licensed under the Apache License, Version 2.0 (the "License");
  9. # you may not use this file except in compliance with the License.
  10. # You may obtain a copy of the License at
  11. #
  12. # http://www.apache.org/licenses/LICENSE-2.0
  13. #
  14. # Unless required by applicable law or agreed to in writing, software
  15. # distributed under the License is distributed on an "AS IS" BASIS,
  16. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17. # See the License for the specific language governing permissions and
  18. # limitations under the License.
  19. """
  20. :module: watchdog.utils.dirsnapshot
  21. :synopsis: Directory snapshots and comparison.
  22. :author: yesudeep@google.com (Yesudeep Mangalapilly)
  23. .. ADMONITION:: Where are the moved events? They "disappeared"
  24. This implementation does not take partition boundaries
  25. into consideration. It will only work when the directory
  26. tree is entirely on the same file system. More specifically,
  27. any part of the code that depends on inode numbers can
  28. break if partition boundaries are crossed. In these cases,
  29. the snapshot diff will represent file/directory movement as
  30. created and deleted events.
  31. Classes
  32. -------
  33. .. autoclass:: DirectorySnapshot
  34. :members:
  35. :show-inheritance:
  36. .. autoclass:: DirectorySnapshotDiff
  37. :members:
  38. :show-inheritance:
  39. """
  40. import errno
  41. import os
  42. from stat import S_ISDIR
  43. from watchdog.utils import stat as default_stat
  44. try:
  45. from os import scandir
  46. except ImportError:
  47. from os import listdir as scandir
  48. class DirectorySnapshotDiff(object):
  49. """
  50. Compares two directory snapshots and creates an object that represents
  51. the difference between the two snapshots.
  52. :param ref:
  53. The reference directory snapshot.
  54. :type ref:
  55. :class:`DirectorySnapshot`
  56. :param snapshot:
  57. The directory snapshot which will be compared
  58. with the reference snapshot.
  59. :type snapshot:
  60. :class:`DirectorySnapshot`
  61. """
  62. def __init__(self, ref, snapshot):
  63. created = snapshot.paths - ref.paths
  64. deleted = ref.paths - snapshot.paths
  65. # check that all unchanged paths have the same inode
  66. for path in ref.paths & snapshot.paths:
  67. if ref.inode(path) != snapshot.inode(path):
  68. created.add(path)
  69. deleted.add(path)
  70. # find moved paths
  71. moved = set()
  72. for path in set(deleted):
  73. inode = ref.inode(path)
  74. new_path = snapshot.path(inode)
  75. if new_path:
  76. # file is not deleted but moved
  77. deleted.remove(path)
  78. moved.add((path, new_path))
  79. for path in set(created):
  80. inode = snapshot.inode(path)
  81. old_path = ref.path(inode)
  82. if old_path:
  83. created.remove(path)
  84. moved.add((old_path, path))
  85. # find modified paths
  86. # first check paths that have not moved
  87. modified = set()
  88. for path in ref.paths & snapshot.paths:
  89. if ref.inode(path) == snapshot.inode(path):
  90. if ref.mtime(path) != snapshot.mtime(path) or ref.size(path) != snapshot.size(path):
  91. modified.add(path)
  92. for (old_path, new_path) in moved:
  93. if ref.mtime(old_path) != snapshot.mtime(new_path) or ref.size(old_path) != snapshot.size(new_path):
  94. modified.add(old_path)
  95. self._dirs_created = [path for path in created if snapshot.isdir(path)]
  96. self._dirs_deleted = [path for path in deleted if ref.isdir(path)]
  97. self._dirs_modified = [path for path in modified if ref.isdir(path)]
  98. self._dirs_moved = [(frm, to) for (frm, to) in moved if ref.isdir(frm)]
  99. self._files_created = list(created - set(self._dirs_created))
  100. self._files_deleted = list(deleted - set(self._dirs_deleted))
  101. self._files_modified = list(modified - set(self._dirs_modified))
  102. self._files_moved = list(moved - set(self._dirs_moved))
  103. def __str__(self):
  104. return self.__repr__()
  105. def __repr__(self):
  106. fmt = (
  107. '<{0} files(created={1}, deleted={2}, modified={3}, moved={4}),'
  108. ' folders(created={5}, deleted={6}, modified={7}, moved={8})>'
  109. )
  110. return fmt.format(
  111. type(self).__name__,
  112. len(self._files_created),
  113. len(self._files_deleted),
  114. len(self._files_modified),
  115. len(self._files_moved),
  116. len(self._dirs_created),
  117. len(self._dirs_deleted),
  118. len(self._dirs_modified),
  119. len(self._dirs_moved)
  120. )
  121. @property
  122. def files_created(self):
  123. """List of files that were created."""
  124. return self._files_created
  125. @property
  126. def files_deleted(self):
  127. """List of files that were deleted."""
  128. return self._files_deleted
  129. @property
  130. def files_modified(self):
  131. """List of files that were modified."""
  132. return self._files_modified
  133. @property
  134. def files_moved(self):
  135. """
  136. List of files that were moved.
  137. Each event is a two-tuple the first item of which is the path
  138. that has been renamed to the second item in the tuple.
  139. """
  140. return self._files_moved
  141. @property
  142. def dirs_modified(self):
  143. """
  144. List of directories that were modified.
  145. """
  146. return self._dirs_modified
  147. @property
  148. def dirs_moved(self):
  149. """
  150. List of directories that were moved.
  151. Each event is a two-tuple the first item of which is the path
  152. that has been renamed to the second item in the tuple.
  153. """
  154. return self._dirs_moved
  155. @property
  156. def dirs_deleted(self):
  157. """
  158. List of directories that were deleted.
  159. """
  160. return self._dirs_deleted
  161. @property
  162. def dirs_created(self):
  163. """
  164. List of directories that were created.
  165. """
  166. return self._dirs_created
  167. class DirectorySnapshot(object):
  168. """
  169. A snapshot of stat information of files in a directory.
  170. :param path:
  171. The directory path for which a snapshot should be taken.
  172. :type path:
  173. ``str``
  174. :param recursive:
  175. ``True`` if the entire directory tree should be included in the
  176. snapshot; ``False`` otherwise.
  177. :type recursive:
  178. ``bool``
  179. :param walker_callback:
  180. .. deprecated:: 0.7.2
  181. :param stat:
  182. Use custom stat function that returns a stat structure for path.
  183. Currently only st_dev, st_ino, st_mode and st_mtime are needed.
  184. A function with the signature ``walker_callback(path, stat_info)``
  185. which will be called for every entry in the directory tree.
  186. :param listdir:
  187. Use custom listdir function. For details see ``os.scandir`` if available, else ``os.listdir``.
  188. """
  189. def __init__(self, path, recursive=True,
  190. walker_callback=(lambda p, s: None),
  191. stat=default_stat,
  192. listdir=scandir):
  193. self.recursive = recursive
  194. self.walker_callback = walker_callback
  195. self.stat = stat
  196. self.listdir = listdir
  197. self._stat_info = {}
  198. self._inode_to_path = {}
  199. st = stat(path)
  200. self._stat_info[path] = st
  201. self._inode_to_path[(st.st_ino, st.st_dev)] = path
  202. for p, st in self.walk(path):
  203. i = (st.st_ino, st.st_dev)
  204. self._inode_to_path[i] = p
  205. self._stat_info[p] = st
  206. walker_callback(p, st)
  207. def walk(self, root):
  208. try:
  209. paths = [os.path.join(root, entry if isinstance(entry, str) else entry.name)
  210. for entry in self.listdir(root)]
  211. except OSError as e:
  212. # Directory may have been deleted between finding it in the directory
  213. # list of its parent and trying to delete its contents. If this
  214. # happens we treat it as empty. Likewise if the directory was replaced
  215. # with a file of the same name (less likely, but possible).
  216. if e.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
  217. return
  218. else:
  219. raise
  220. entries = []
  221. for p in paths:
  222. try:
  223. entry = (p, self.stat(p))
  224. entries.append(entry)
  225. yield entry
  226. except OSError:
  227. continue
  228. if self.recursive:
  229. for path, st in entries:
  230. try:
  231. if S_ISDIR(st.st_mode):
  232. for entry in self.walk(path):
  233. yield entry
  234. except (IOError, OSError) as e:
  235. # IOError for Python 2
  236. # OSError for Python 3
  237. # (should be only PermissionError when dropping Python 2 support)
  238. if e.errno != errno.EACCES:
  239. raise
  240. @property
  241. def paths(self):
  242. """
  243. Set of file/directory paths in the snapshot.
  244. """
  245. return set(self._stat_info.keys())
  246. def path(self, id):
  247. """
  248. Returns path for id. None if id is unknown to this snapshot.
  249. """
  250. return self._inode_to_path.get(id)
  251. def inode(self, path):
  252. """ Returns an id for path. """
  253. st = self._stat_info[path]
  254. return (st.st_ino, st.st_dev)
  255. def isdir(self, path):
  256. return S_ISDIR(self._stat_info[path].st_mode)
  257. def mtime(self, path):
  258. return self._stat_info[path].st_mtime
  259. def size(self, path):
  260. return self._stat_info[path].st_size
  261. def stat_info(self, path):
  262. """
  263. Returns a stat information object for the specified path from
  264. the snapshot.
  265. Attached information is subject to change. Do not use unless
  266. you specify `stat` in constructor. Use :func:`inode`, :func:`mtime`,
  267. :func:`isdir` instead.
  268. :param path:
  269. The path for which stat information should be obtained
  270. from a snapshot.
  271. """
  272. return self._stat_info[path]
  273. def __sub__(self, previous_dirsnap):
  274. """Allow subtracting a DirectorySnapshot object instance from
  275. another.
  276. :returns:
  277. A :class:`DirectorySnapshotDiff` object.
  278. """
  279. return DirectorySnapshotDiff(previous_dirsnap, self)
  280. def __str__(self):
  281. return self.__repr__()
  282. def __repr__(self):
  283. return str(self._stat_info)