entry-index.js 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. 'use strict'
  2. const crypto = require('crypto')
  3. const {
  4. appendFile,
  5. mkdir,
  6. readFile,
  7. readdir,
  8. rm,
  9. writeFile,
  10. } = require('fs/promises')
  11. const { Minipass } = require('minipass')
  12. const path = require('path')
  13. const ssri = require('ssri')
  14. const uniqueFilename = require('unique-filename')
  15. const contentPath = require('./content/path')
  16. const hashToSegments = require('./util/hash-to-segments')
  17. const indexV = require('../package.json')['cache-version'].index
  18. const { moveFile } = require('@npmcli/fs')
  19. module.exports.NotFoundError = class NotFoundError extends Error {
  20. constructor (cache, key) {
  21. super(`No cache entry for ${key} found in ${cache}`)
  22. this.code = 'ENOENT'
  23. this.cache = cache
  24. this.key = key
  25. }
  26. }
  27. module.exports.compact = compact
  28. async function compact (cache, key, matchFn, opts = {}) {
  29. const bucket = bucketPath(cache, key)
  30. const entries = await bucketEntries(bucket)
  31. const newEntries = []
  32. // we loop backwards because the bottom-most result is the newest
  33. // since we add new entries with appendFile
  34. for (let i = entries.length - 1; i >= 0; --i) {
  35. const entry = entries[i]
  36. // a null integrity could mean either a delete was appended
  37. // or the user has simply stored an index that does not map
  38. // to any content. we determine if the user wants to keep the
  39. // null integrity based on the validateEntry function passed in options.
  40. // if the integrity is null and no validateEntry is provided, we break
  41. // as we consider the null integrity to be a deletion of everything
  42. // that came before it.
  43. if (entry.integrity === null && !opts.validateEntry) {
  44. break
  45. }
  46. // if this entry is valid, and it is either the first entry or
  47. // the newEntries array doesn't already include an entry that
  48. // matches this one based on the provided matchFn, then we add
  49. // it to the beginning of our list
  50. if ((!opts.validateEntry || opts.validateEntry(entry) === true) &&
  51. (newEntries.length === 0 ||
  52. !newEntries.find((oldEntry) => matchFn(oldEntry, entry)))) {
  53. newEntries.unshift(entry)
  54. }
  55. }
  56. const newIndex = '\n' + newEntries.map((entry) => {
  57. const stringified = JSON.stringify(entry)
  58. const hash = hashEntry(stringified)
  59. return `${hash}\t${stringified}`
  60. }).join('\n')
  61. const setup = async () => {
  62. const target = uniqueFilename(path.join(cache, 'tmp'), opts.tmpPrefix)
  63. await mkdir(path.dirname(target), { recursive: true })
  64. return {
  65. target,
  66. moved: false,
  67. }
  68. }
  69. const teardown = async (tmp) => {
  70. if (!tmp.moved) {
  71. return rm(tmp.target, { recursive: true, force: true })
  72. }
  73. }
  74. const write = async (tmp) => {
  75. await writeFile(tmp.target, newIndex, { flag: 'wx' })
  76. await mkdir(path.dirname(bucket), { recursive: true })
  77. // we use @npmcli/move-file directly here because we
  78. // want to overwrite the existing file
  79. await moveFile(tmp.target, bucket)
  80. tmp.moved = true
  81. }
  82. // write the file atomically
  83. const tmp = await setup()
  84. try {
  85. await write(tmp)
  86. } finally {
  87. await teardown(tmp)
  88. }
  89. // we reverse the list we generated such that the newest
  90. // entries come first in order to make looping through them easier
  91. // the true passed to formatEntry tells it to keep null
  92. // integrity values, if they made it this far it's because
  93. // validateEntry returned true, and as such we should return it
  94. return newEntries.reverse().map((entry) => formatEntry(cache, entry, true))
  95. }
  96. module.exports.insert = insert
  97. async function insert (cache, key, integrity, opts = {}) {
  98. const { metadata, size } = opts
  99. const bucket = bucketPath(cache, key)
  100. const entry = {
  101. key,
  102. integrity: integrity && ssri.stringify(integrity),
  103. time: Date.now(),
  104. size,
  105. metadata,
  106. }
  107. try {
  108. await mkdir(path.dirname(bucket), { recursive: true })
  109. const stringified = JSON.stringify(entry)
  110. // NOTE - Cleverness ahoy!
  111. //
  112. // This works because it's tremendously unlikely for an entry to corrupt
  113. // another while still preserving the string length of the JSON in
  114. // question. So, we just slap the length in there and verify it on read.
  115. //
  116. // Thanks to @isaacs for the whiteboarding session that ended up with
  117. // this.
  118. await appendFile(bucket, `\n${hashEntry(stringified)}\t${stringified}`)
  119. } catch (err) {
  120. if (err.code === 'ENOENT') {
  121. return undefined
  122. }
  123. throw err
  124. }
  125. return formatEntry(cache, entry)
  126. }
  127. module.exports.find = find
  128. async function find (cache, key) {
  129. const bucket = bucketPath(cache, key)
  130. try {
  131. const entries = await bucketEntries(bucket)
  132. return entries.reduce((latest, next) => {
  133. if (next && next.key === key) {
  134. return formatEntry(cache, next)
  135. } else {
  136. return latest
  137. }
  138. }, null)
  139. } catch (err) {
  140. if (err.code === 'ENOENT') {
  141. return null
  142. } else {
  143. throw err
  144. }
  145. }
  146. }
  147. module.exports.delete = del
  148. function del (cache, key, opts = {}) {
  149. if (!opts.removeFully) {
  150. return insert(cache, key, null, opts)
  151. }
  152. const bucket = bucketPath(cache, key)
  153. return rm(bucket, { recursive: true, force: true })
  154. }
  155. module.exports.lsStream = lsStream
  156. function lsStream (cache) {
  157. const indexDir = bucketDir(cache)
  158. const stream = new Minipass({ objectMode: true })
  159. // Set all this up to run on the stream and then just return the stream
  160. Promise.resolve().then(async () => {
  161. const buckets = await readdirOrEmpty(indexDir)
  162. await Promise.all(buckets.map(async (bucket) => {
  163. const bucketPath = path.join(indexDir, bucket)
  164. const subbuckets = await readdirOrEmpty(bucketPath)
  165. await Promise.all(subbuckets.map(async (subbucket) => {
  166. const subbucketPath = path.join(bucketPath, subbucket)
  167. // "/cachename/<bucket 0xFF>/<bucket 0xFF>./*"
  168. const subbucketEntries = await readdirOrEmpty(subbucketPath)
  169. await Promise.all(subbucketEntries.map(async (entry) => {
  170. const entryPath = path.join(subbucketPath, entry)
  171. try {
  172. const entries = await bucketEntries(entryPath)
  173. // using a Map here prevents duplicate keys from showing up
  174. // twice, I guess?
  175. const reduced = entries.reduce((acc, entry) => {
  176. acc.set(entry.key, entry)
  177. return acc
  178. }, new Map())
  179. // reduced is a map of key => entry
  180. for (const entry of reduced.values()) {
  181. const formatted = formatEntry(cache, entry)
  182. if (formatted) {
  183. stream.write(formatted)
  184. }
  185. }
  186. } catch (err) {
  187. if (err.code === 'ENOENT') {
  188. return undefined
  189. }
  190. throw err
  191. }
  192. }))
  193. }))
  194. }))
  195. stream.end()
  196. return stream
  197. }).catch(err => stream.emit('error', err))
  198. return stream
  199. }
  200. module.exports.ls = ls
  201. async function ls (cache) {
  202. const entries = await lsStream(cache).collect()
  203. return entries.reduce((acc, xs) => {
  204. acc[xs.key] = xs
  205. return acc
  206. }, {})
  207. }
  208. module.exports.bucketEntries = bucketEntries
  209. async function bucketEntries (bucket, filter) {
  210. const data = await readFile(bucket, 'utf8')
  211. return _bucketEntries(data, filter)
  212. }
  213. function _bucketEntries (data, filter) {
  214. const entries = []
  215. data.split('\n').forEach((entry) => {
  216. if (!entry) {
  217. return
  218. }
  219. const pieces = entry.split('\t')
  220. if (!pieces[1] || hashEntry(pieces[1]) !== pieces[0]) {
  221. // Hash is no good! Corruption or malice? Doesn't matter!
  222. // EJECT EJECT
  223. return
  224. }
  225. let obj
  226. try {
  227. obj = JSON.parse(pieces[1])
  228. } catch (_) {
  229. // eslint-ignore-next-line no-empty-block
  230. }
  231. // coverage disabled here, no need to test with an entry that parses to something falsey
  232. // istanbul ignore else
  233. if (obj) {
  234. entries.push(obj)
  235. }
  236. })
  237. return entries
  238. }
  239. module.exports.bucketDir = bucketDir
  240. function bucketDir (cache) {
  241. return path.join(cache, `index-v${indexV}`)
  242. }
  243. module.exports.bucketPath = bucketPath
  244. function bucketPath (cache, key) {
  245. const hashed = hashKey(key)
  246. return path.join.apply(
  247. path,
  248. [bucketDir(cache)].concat(hashToSegments(hashed))
  249. )
  250. }
  251. module.exports.hashKey = hashKey
  252. function hashKey (key) {
  253. return hash(key, 'sha256')
  254. }
  255. module.exports.hashEntry = hashEntry
  256. function hashEntry (str) {
  257. return hash(str, 'sha1')
  258. }
  259. function hash (str, digest) {
  260. return crypto
  261. .createHash(digest)
  262. .update(str)
  263. .digest('hex')
  264. }
  265. function formatEntry (cache, entry, keepAll) {
  266. // Treat null digests as deletions. They'll shadow any previous entries.
  267. if (!entry.integrity && !keepAll) {
  268. return null
  269. }
  270. return {
  271. key: entry.key,
  272. integrity: entry.integrity,
  273. path: entry.integrity ? contentPath(cache, entry.integrity) : undefined,
  274. size: entry.size,
  275. time: entry.time,
  276. metadata: entry.metadata,
  277. }
  278. }
  279. function readdirOrEmpty (dir) {
  280. return readdir(dir).catch((err) => {
  281. if (err.code === 'ENOENT' || err.code === 'ENOTDIR') {
  282. return []
  283. }
  284. throw err
  285. })
  286. }