fetcher.js 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. // This is the base class that the other fetcher types in lib
  2. // all descend from.
  3. // It handles the unpacking and retry logic that is shared among
  4. // all of the other Fetcher types.
  5. const npa = require('npm-package-arg')
  6. const ssri = require('ssri')
  7. const { promisify } = require('util')
  8. const { basename, dirname } = require('path')
  9. const tar = require('tar')
  10. const log = require('proc-log')
  11. const retry = require('promise-retry')
  12. const fs = require('fs/promises')
  13. const fsm = require('fs-minipass')
  14. const cacache = require('cacache')
  15. const isPackageBin = require('./util/is-package-bin.js')
  16. const removeTrailingSlashes = require('./util/trailing-slashes.js')
  17. const getContents = require('@npmcli/installed-package-contents')
  18. const readPackageJsonFast = require('read-package-json-fast')
  19. const readPackageJson = promisify(require('read-package-json'))
  20. const { Minipass } = require('minipass')
  21. const cacheDir = require('./util/cache-dir.js')
  22. // Private methods.
  23. // Child classes should not have to override these.
  24. // Users should never call them.
  25. const _extract = Symbol('_extract')
  26. const _mkdir = Symbol('_mkdir')
  27. const _empty = Symbol('_empty')
  28. const _toFile = Symbol('_toFile')
  29. const _tarxOptions = Symbol('_tarxOptions')
  30. const _entryMode = Symbol('_entryMode')
  31. const _istream = Symbol('_istream')
  32. const _assertType = Symbol('_assertType')
  33. const _tarballFromCache = Symbol('_tarballFromCache')
  34. const _tarballFromResolved = Symbol.for('pacote.Fetcher._tarballFromResolved')
  35. const _cacheFetches = Symbol.for('pacote.Fetcher._cacheFetches')
  36. const _readPackageJson = Symbol.for('package.Fetcher._readPackageJson')
  37. class FetcherBase {
  38. constructor (spec, opts) {
  39. if (!opts || typeof opts !== 'object') {
  40. throw new TypeError('options object is required')
  41. }
  42. this.spec = npa(spec, opts.where)
  43. this.allowGitIgnore = !!opts.allowGitIgnore
  44. // a bit redundant because presumably the caller already knows this,
  45. // but it makes it easier to not have to keep track of the requested
  46. // spec when we're dispatching thousands of these at once, and normalizing
  47. // is nice. saveSpec is preferred if set, because it turns stuff like
  48. // x/y#committish into github:x/y#committish. use name@rawSpec for
  49. // registry deps so that we turn xyz and xyz@ -> xyz@
  50. this.from = this.spec.registry
  51. ? `${this.spec.name}@${this.spec.rawSpec}` : this.spec.saveSpec
  52. this[_assertType]()
  53. // clone the opts object so that others aren't upset when we mutate it
  54. // by adding/modifying the integrity value.
  55. this.opts = { ...opts }
  56. this.cache = opts.cache || cacheDir()
  57. this.resolved = opts.resolved || null
  58. // default to caching/verifying with sha512, that's what we usually have
  59. // need to change this default, or start overriding it, when sha512
  60. // is no longer strong enough.
  61. this.defaultIntegrityAlgorithm = opts.defaultIntegrityAlgorithm || 'sha512'
  62. if (typeof opts.integrity === 'string') {
  63. this.opts.integrity = ssri.parse(opts.integrity)
  64. }
  65. this.package = null
  66. this.type = this.constructor.name
  67. this.fmode = opts.fmode || 0o666
  68. this.dmode = opts.dmode || 0o777
  69. // we don't need a default umask, because we don't chmod files coming
  70. // out of package tarballs. they're forced to have a mode that is
  71. // valid, regardless of what's in the tarball entry, and then we let
  72. // the process's umask setting do its job. but if configured, we do
  73. // respect it.
  74. this.umask = opts.umask || 0
  75. this.preferOnline = !!opts.preferOnline
  76. this.preferOffline = !!opts.preferOffline
  77. this.offline = !!opts.offline
  78. this.before = opts.before
  79. this.fullMetadata = this.before ? true : !!opts.fullMetadata
  80. this.fullReadJson = !!opts.fullReadJson
  81. if (this.fullReadJson) {
  82. this[_readPackageJson] = readPackageJson
  83. } else {
  84. this[_readPackageJson] = readPackageJsonFast
  85. }
  86. // rrh is a registry hostname or 'never' or 'always'
  87. // defaults to registry.npmjs.org
  88. this.replaceRegistryHost = (!opts.replaceRegistryHost || opts.replaceRegistryHost === 'npmjs') ?
  89. 'registry.npmjs.org' : opts.replaceRegistryHost
  90. this.defaultTag = opts.defaultTag || 'latest'
  91. this.registry = removeTrailingSlashes(opts.registry || 'https://registry.npmjs.org')
  92. // command to run 'prepare' scripts on directories and git dirs
  93. // To use pacote with yarn, for example, set npmBin to 'yarn'
  94. // and npmCliConfig with yarn's equivalents.
  95. this.npmBin = opts.npmBin || 'npm'
  96. // command to install deps for preparing
  97. this.npmInstallCmd = opts.npmInstallCmd || ['install', '--force']
  98. // XXX fill more of this in based on what we know from this.opts
  99. // we explicitly DO NOT fill in --tag, though, since we are often
  100. // going to be packing in the context of a publish, which may set
  101. // a dist-tag, but certainly wants to keep defaulting to latest.
  102. this.npmCliConfig = opts.npmCliConfig || [
  103. `--cache=${dirname(this.cache)}`,
  104. `--prefer-offline=${!!this.preferOffline}`,
  105. `--prefer-online=${!!this.preferOnline}`,
  106. `--offline=${!!this.offline}`,
  107. ...(this.before ? [`--before=${this.before.toISOString()}`] : []),
  108. '--no-progress',
  109. '--no-save',
  110. '--no-audit',
  111. // override any omit settings from the environment
  112. '--include=dev',
  113. '--include=peer',
  114. '--include=optional',
  115. // we need the actual things, not just the lockfile
  116. '--no-package-lock-only',
  117. '--no-dry-run',
  118. ]
  119. }
  120. get integrity () {
  121. return this.opts.integrity || null
  122. }
  123. set integrity (i) {
  124. if (!i) {
  125. return
  126. }
  127. i = ssri.parse(i)
  128. const current = this.opts.integrity
  129. // do not ever update an existing hash value, but do
  130. // merge in NEW algos and hashes that we don't already have.
  131. if (current) {
  132. current.merge(i)
  133. } else {
  134. this.opts.integrity = i
  135. }
  136. }
  137. get notImplementedError () {
  138. return new Error('not implemented in this fetcher type: ' + this.type)
  139. }
  140. // override in child classes
  141. // Returns a Promise that resolves to this.resolved string value
  142. resolve () {
  143. return this.resolved ? Promise.resolve(this.resolved)
  144. : Promise.reject(this.notImplementedError)
  145. }
  146. packument () {
  147. return Promise.reject(this.notImplementedError)
  148. }
  149. // override in child class
  150. // returns a manifest containing:
  151. // - name
  152. // - version
  153. // - _resolved
  154. // - _integrity
  155. // - plus whatever else was in there (corgi, full metadata, or pj file)
  156. manifest () {
  157. return Promise.reject(this.notImplementedError)
  158. }
  159. // private, should be overridden.
  160. // Note that they should *not* calculate or check integrity or cache,
  161. // but *just* return the raw tarball data stream.
  162. [_tarballFromResolved] () {
  163. throw this.notImplementedError
  164. }
  165. // public, should not be overridden
  166. tarball () {
  167. return this.tarballStream(stream => stream.concat().then(data => {
  168. data.integrity = this.integrity && String(this.integrity)
  169. data.resolved = this.resolved
  170. data.from = this.from
  171. return data
  172. }))
  173. }
  174. // private
  175. // Note: cacache will raise a EINTEGRITY error if the integrity doesn't match
  176. [_tarballFromCache] () {
  177. return cacache.get.stream.byDigest(this.cache, this.integrity, this.opts)
  178. }
  179. get [_cacheFetches] () {
  180. return true
  181. }
  182. [_istream] (stream) {
  183. // if not caching this, just return it
  184. if (!this.opts.cache || !this[_cacheFetches]) {
  185. // instead of creating a new integrity stream, we only piggyback on the
  186. // provided stream's events
  187. if (stream.hasIntegrityEmitter) {
  188. stream.on('integrity', i => this.integrity = i)
  189. return stream
  190. }
  191. const istream = ssri.integrityStream(this.opts)
  192. istream.on('integrity', i => this.integrity = i)
  193. stream.on('error', err => istream.emit('error', err))
  194. return stream.pipe(istream)
  195. }
  196. // we have to return a stream that gets ALL the data, and proxies errors,
  197. // but then pipe from the original tarball stream into the cache as well.
  198. // To do this without losing any data, and since the cacache put stream
  199. // is not a passthrough, we have to pipe from the original stream into
  200. // the cache AFTER we pipe into the middleStream. Since the cache stream
  201. // has an asynchronous flush to write its contents to disk, we need to
  202. // defer the middleStream end until the cache stream ends.
  203. const middleStream = new Minipass()
  204. stream.on('error', err => middleStream.emit('error', err))
  205. stream.pipe(middleStream, { end: false })
  206. const cstream = cacache.put.stream(
  207. this.opts.cache,
  208. `pacote:tarball:${this.from}`,
  209. this.opts
  210. )
  211. cstream.on('integrity', i => this.integrity = i)
  212. cstream.on('error', err => stream.emit('error', err))
  213. stream.pipe(cstream)
  214. // eslint-disable-next-line promise/catch-or-return
  215. cstream.promise().catch(() => {}).then(() => middleStream.end())
  216. return middleStream
  217. }
  218. pickIntegrityAlgorithm () {
  219. return this.integrity ? this.integrity.pickAlgorithm(this.opts)
  220. : this.defaultIntegrityAlgorithm
  221. }
  222. // TODO: check error class, once those are rolled out to our deps
  223. isDataCorruptionError (er) {
  224. return er.code === 'EINTEGRITY' || er.code === 'Z_DATA_ERROR'
  225. }
  226. // override the types getter
  227. get types () {
  228. return false
  229. }
  230. [_assertType] () {
  231. if (this.types && !this.types.includes(this.spec.type)) {
  232. throw new TypeError(`Wrong spec type (${
  233. this.spec.type
  234. }) for ${
  235. this.constructor.name
  236. }. Supported types: ${this.types.join(', ')}`)
  237. }
  238. }
  239. // We allow ENOENTs from cacache, but not anywhere else.
  240. // An ENOENT trying to read a tgz file, for example, is Right Out.
  241. isRetriableError (er) {
  242. // TODO: check error class, once those are rolled out to our deps
  243. return this.isDataCorruptionError(er) ||
  244. er.code === 'ENOENT' ||
  245. er.code === 'EISDIR'
  246. }
  247. // Mostly internal, but has some uses
  248. // Pass in a function which returns a promise
  249. // Function will be called 1 or more times with streams that may fail.
  250. // Retries:
  251. // Function MUST handle errors on the stream by rejecting the promise,
  252. // so that retry logic can pick it up and either retry or fail whatever
  253. // promise it was making (ie, failing extraction, etc.)
  254. //
  255. // The return value of this method is a Promise that resolves the same
  256. // as whatever the streamHandler resolves to.
  257. //
  258. // This should never be overridden by child classes, but it is public.
  259. tarballStream (streamHandler) {
  260. // Only short-circuit via cache if we have everything else we'll need,
  261. // and the user has not expressed a preference for checking online.
  262. const fromCache = (
  263. !this.preferOnline &&
  264. this.integrity &&
  265. this.resolved
  266. ) ? streamHandler(this[_tarballFromCache]()).catch(er => {
  267. if (this.isDataCorruptionError(er)) {
  268. log.warn('tarball', `cached data for ${
  269. this.spec
  270. } (${this.integrity}) seems to be corrupted. Refreshing cache.`)
  271. return this.cleanupCached().then(() => {
  272. throw er
  273. })
  274. } else {
  275. throw er
  276. }
  277. }) : null
  278. const fromResolved = er => {
  279. if (er) {
  280. if (!this.isRetriableError(er)) {
  281. throw er
  282. }
  283. log.silly('tarball', `no local data for ${
  284. this.spec
  285. }. Extracting by manifest.`)
  286. }
  287. return this.resolve().then(() => retry(tryAgain =>
  288. streamHandler(this[_istream](this[_tarballFromResolved]()))
  289. .catch(streamErr => {
  290. // Most likely data integrity. A cache ENOENT error is unlikely
  291. // here, since we're definitely not reading from the cache, but it
  292. // IS possible that the fetch subsystem accessed the cache, and the
  293. // entry got blown away or something. Try one more time to be sure.
  294. if (this.isRetriableError(streamErr)) {
  295. log.warn('tarball', `tarball data for ${
  296. this.spec
  297. } (${this.integrity}) seems to be corrupted. Trying again.`)
  298. return this.cleanupCached().then(() => tryAgain(streamErr))
  299. }
  300. throw streamErr
  301. }), { retries: 1, minTimeout: 0, maxTimeout: 0 }))
  302. }
  303. return fromCache ? fromCache.catch(fromResolved) : fromResolved()
  304. }
  305. cleanupCached () {
  306. return cacache.rm.content(this.cache, this.integrity, this.opts)
  307. }
  308. [_empty] (path) {
  309. return getContents({ path, depth: 1 }).then(contents => Promise.all(
  310. contents.map(entry => fs.rm(entry, { recursive: true, force: true }))))
  311. }
  312. async [_mkdir] (dest) {
  313. await this[_empty](dest)
  314. return await fs.mkdir(dest, { recursive: true })
  315. }
  316. // extraction is always the same. the only difference is where
  317. // the tarball comes from.
  318. async extract (dest) {
  319. await this[_mkdir](dest)
  320. return this.tarballStream((tarball) => this[_extract](dest, tarball))
  321. }
  322. [_toFile] (dest) {
  323. return this.tarballStream(str => new Promise((res, rej) => {
  324. const writer = new fsm.WriteStream(dest)
  325. str.on('error', er => writer.emit('error', er))
  326. writer.on('error', er => rej(er))
  327. writer.on('close', () => res({
  328. integrity: this.integrity && String(this.integrity),
  329. resolved: this.resolved,
  330. from: this.from,
  331. }))
  332. str.pipe(writer)
  333. }))
  334. }
  335. // don't use this[_mkdir] because we don't want to rimraf anything
  336. async tarballFile (dest) {
  337. const dir = dirname(dest)
  338. await fs.mkdir(dir, { recursive: true })
  339. return this[_toFile](dest)
  340. }
  341. [_extract] (dest, tarball) {
  342. const extractor = tar.x(this[_tarxOptions]({ cwd: dest }))
  343. const p = new Promise((resolve, reject) => {
  344. extractor.on('end', () => {
  345. resolve({
  346. resolved: this.resolved,
  347. integrity: this.integrity && String(this.integrity),
  348. from: this.from,
  349. })
  350. })
  351. extractor.on('error', er => {
  352. log.warn('tar', er.message)
  353. log.silly('tar', er)
  354. reject(er)
  355. })
  356. tarball.on('error', er => reject(er))
  357. })
  358. tarball.pipe(extractor)
  359. return p
  360. }
  361. // always ensure that entries are at least as permissive as our configured
  362. // dmode/fmode, but never more permissive than the umask allows.
  363. [_entryMode] (path, mode, type) {
  364. const m = /Directory|GNUDumpDir/.test(type) ? this.dmode
  365. : /File$/.test(type) ? this.fmode
  366. : /* istanbul ignore next - should never happen in a pkg */ 0
  367. // make sure package bins are executable
  368. const exe = isPackageBin(this.package, path) ? 0o111 : 0
  369. // always ensure that files are read/writable by the owner
  370. return ((mode | m) & ~this.umask) | exe | 0o600
  371. }
  372. [_tarxOptions] ({ cwd }) {
  373. const sawIgnores = new Set()
  374. return {
  375. cwd,
  376. noChmod: true,
  377. noMtime: true,
  378. filter: (name, entry) => {
  379. if (/Link$/.test(entry.type)) {
  380. return false
  381. }
  382. entry.mode = this[_entryMode](entry.path, entry.mode, entry.type)
  383. // this replicates the npm pack behavior where .gitignore files
  384. // are treated like .npmignore files, but only if a .npmignore
  385. // file is not present.
  386. if (/File$/.test(entry.type)) {
  387. const base = basename(entry.path)
  388. if (base === '.npmignore') {
  389. sawIgnores.add(entry.path)
  390. } else if (base === '.gitignore' && !this.allowGitIgnore) {
  391. // rename, but only if there's not already a .npmignore
  392. const ni = entry.path.replace(/\.gitignore$/, '.npmignore')
  393. if (sawIgnores.has(ni)) {
  394. return false
  395. }
  396. entry.path = ni
  397. }
  398. return true
  399. }
  400. },
  401. strip: 1,
  402. onwarn: /* istanbul ignore next - we can trust that tar logs */
  403. (code, msg, data) => {
  404. log.warn('tar', code, msg)
  405. log.silly('tar', code, msg, data)
  406. },
  407. umask: this.umask,
  408. // always ignore ownership info from tarball metadata
  409. preserveOwner: false,
  410. }
  411. }
  412. }
  413. module.exports = FetcherBase
  414. // Child classes
  415. const GitFetcher = require('./git.js')
  416. const RegistryFetcher = require('./registry.js')
  417. const FileFetcher = require('./file.js')
  418. const DirFetcher = require('./dir.js')
  419. const RemoteFetcher = require('./remote.js')
  420. // Get an appropriate fetcher object from a spec and options
  421. FetcherBase.get = (rawSpec, opts = {}) => {
  422. const spec = npa(rawSpec, opts.where)
  423. switch (spec.type) {
  424. case 'git':
  425. return new GitFetcher(spec, opts)
  426. case 'remote':
  427. return new RemoteFetcher(spec, opts)
  428. case 'version':
  429. case 'range':
  430. case 'tag':
  431. case 'alias':
  432. return new RegistryFetcher(spec.subSpec || spec, opts)
  433. case 'file':
  434. return new FileFetcher(spec, opts)
  435. case 'directory':
  436. return new DirFetcher(spec, opts)
  437. default:
  438. throw new TypeError('Unknown spec type: ' + spec.type)
  439. }
  440. }