verify.js 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. 'use strict'
  2. const {
  3. mkdir,
  4. readFile,
  5. rm,
  6. stat,
  7. truncate,
  8. writeFile,
  9. } = require('fs/promises')
  10. const pMap = require('p-map')
  11. const contentPath = require('./content/path')
  12. const fsm = require('fs-minipass')
  13. const glob = require('./util/glob.js')
  14. const index = require('./entry-index')
  15. const path = require('path')
  16. const ssri = require('ssri')
  17. const hasOwnProperty = (obj, key) =>
  18. Object.prototype.hasOwnProperty.call(obj, key)
  19. const verifyOpts = (opts) => ({
  20. concurrency: 20,
  21. log: { silly () {} },
  22. ...opts,
  23. })
  24. module.exports = verify
  25. async function verify (cache, opts) {
  26. opts = verifyOpts(opts)
  27. opts.log.silly('verify', 'verifying cache at', cache)
  28. const steps = [
  29. markStartTime,
  30. fixPerms,
  31. garbageCollect,
  32. rebuildIndex,
  33. cleanTmp,
  34. writeVerifile,
  35. markEndTime,
  36. ]
  37. const stats = {}
  38. for (const step of steps) {
  39. const label = step.name
  40. const start = new Date()
  41. const s = await step(cache, opts)
  42. if (s) {
  43. Object.keys(s).forEach((k) => {
  44. stats[k] = s[k]
  45. })
  46. }
  47. const end = new Date()
  48. if (!stats.runTime) {
  49. stats.runTime = {}
  50. }
  51. stats.runTime[label] = end - start
  52. }
  53. stats.runTime.total = stats.endTime - stats.startTime
  54. opts.log.silly(
  55. 'verify',
  56. 'verification finished for',
  57. cache,
  58. 'in',
  59. `${stats.runTime.total}ms`
  60. )
  61. return stats
  62. }
  63. async function markStartTime (cache, opts) {
  64. return { startTime: new Date() }
  65. }
  66. async function markEndTime (cache, opts) {
  67. return { endTime: new Date() }
  68. }
  69. async function fixPerms (cache, opts) {
  70. opts.log.silly('verify', 'fixing cache permissions')
  71. await mkdir(cache, { recursive: true })
  72. return null
  73. }
  74. // Implements a naive mark-and-sweep tracing garbage collector.
  75. //
  76. // The algorithm is basically as follows:
  77. // 1. Read (and filter) all index entries ("pointers")
  78. // 2. Mark each integrity value as "live"
  79. // 3. Read entire filesystem tree in `content-vX/` dir
  80. // 4. If content is live, verify its checksum and delete it if it fails
  81. // 5. If content is not marked as live, rm it.
  82. //
  83. async function garbageCollect (cache, opts) {
  84. opts.log.silly('verify', 'garbage collecting content')
  85. const indexStream = index.lsStream(cache)
  86. const liveContent = new Set()
  87. indexStream.on('data', (entry) => {
  88. if (opts.filter && !opts.filter(entry)) {
  89. return
  90. }
  91. liveContent.add(entry.integrity.toString())
  92. })
  93. await new Promise((resolve, reject) => {
  94. indexStream.on('end', resolve).on('error', reject)
  95. })
  96. const contentDir = contentPath.contentDir(cache)
  97. const files = await glob(path.join(contentDir, '**'), {
  98. follow: false,
  99. nodir: true,
  100. nosort: true,
  101. })
  102. const stats = {
  103. verifiedContent: 0,
  104. reclaimedCount: 0,
  105. reclaimedSize: 0,
  106. badContentCount: 0,
  107. keptSize: 0,
  108. }
  109. await pMap(
  110. files,
  111. async (f) => {
  112. const split = f.split(/[/\\]/)
  113. const digest = split.slice(split.length - 3).join('')
  114. const algo = split[split.length - 4]
  115. const integrity = ssri.fromHex(digest, algo)
  116. if (liveContent.has(integrity.toString())) {
  117. const info = await verifyContent(f, integrity)
  118. if (!info.valid) {
  119. stats.reclaimedCount++
  120. stats.badContentCount++
  121. stats.reclaimedSize += info.size
  122. } else {
  123. stats.verifiedContent++
  124. stats.keptSize += info.size
  125. }
  126. } else {
  127. // No entries refer to this content. We can delete.
  128. stats.reclaimedCount++
  129. const s = await stat(f)
  130. await rm(f, { recursive: true, force: true })
  131. stats.reclaimedSize += s.size
  132. }
  133. return stats
  134. },
  135. { concurrency: opts.concurrency }
  136. )
  137. return stats
  138. }
  139. async function verifyContent (filepath, sri) {
  140. const contentInfo = {}
  141. try {
  142. const { size } = await stat(filepath)
  143. contentInfo.size = size
  144. contentInfo.valid = true
  145. await ssri.checkStream(new fsm.ReadStream(filepath), sri)
  146. } catch (err) {
  147. if (err.code === 'ENOENT') {
  148. return { size: 0, valid: false }
  149. }
  150. if (err.code !== 'EINTEGRITY') {
  151. throw err
  152. }
  153. await rm(filepath, { recursive: true, force: true })
  154. contentInfo.valid = false
  155. }
  156. return contentInfo
  157. }
  158. async function rebuildIndex (cache, opts) {
  159. opts.log.silly('verify', 'rebuilding index')
  160. const entries = await index.ls(cache)
  161. const stats = {
  162. missingContent: 0,
  163. rejectedEntries: 0,
  164. totalEntries: 0,
  165. }
  166. const buckets = {}
  167. for (const k in entries) {
  168. /* istanbul ignore else */
  169. if (hasOwnProperty(entries, k)) {
  170. const hashed = index.hashKey(k)
  171. const entry = entries[k]
  172. const excluded = opts.filter && !opts.filter(entry)
  173. excluded && stats.rejectedEntries++
  174. if (buckets[hashed] && !excluded) {
  175. buckets[hashed].push(entry)
  176. } else if (buckets[hashed] && excluded) {
  177. // skip
  178. } else if (excluded) {
  179. buckets[hashed] = []
  180. buckets[hashed]._path = index.bucketPath(cache, k)
  181. } else {
  182. buckets[hashed] = [entry]
  183. buckets[hashed]._path = index.bucketPath(cache, k)
  184. }
  185. }
  186. }
  187. await pMap(
  188. Object.keys(buckets),
  189. (key) => {
  190. return rebuildBucket(cache, buckets[key], stats, opts)
  191. },
  192. { concurrency: opts.concurrency }
  193. )
  194. return stats
  195. }
  196. async function rebuildBucket (cache, bucket, stats, opts) {
  197. await truncate(bucket._path)
  198. // This needs to be serialized because cacache explicitly
  199. // lets very racy bucket conflicts clobber each other.
  200. for (const entry of bucket) {
  201. const content = contentPath(cache, entry.integrity)
  202. try {
  203. await stat(content)
  204. await index.insert(cache, entry.key, entry.integrity, {
  205. metadata: entry.metadata,
  206. size: entry.size,
  207. })
  208. stats.totalEntries++
  209. } catch (err) {
  210. if (err.code === 'ENOENT') {
  211. stats.rejectedEntries++
  212. stats.missingContent++
  213. } else {
  214. throw err
  215. }
  216. }
  217. }
  218. }
  219. function cleanTmp (cache, opts) {
  220. opts.log.silly('verify', 'cleaning tmp directory')
  221. return rm(path.join(cache, 'tmp'), { recursive: true, force: true })
  222. }
  223. async function writeVerifile (cache, opts) {
  224. const verifile = path.join(cache, '_lastverified')
  225. opts.log.silly('verify', 'writing verifile to ' + verifile)
  226. return writeFile(verifile, `${Date.now()}`)
  227. }
  228. module.exports.lastRun = lastRun
  229. async function lastRun (cache) {
  230. const data = await readFile(path.join(cache, '_lastverified'), { encoding: 'utf8' })
  231. return new Date(+data)
  232. }