uri.js 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595
  1. /*---------------------------------------------------------------------------------------------
  2. * Copyright (c) Microsoft Corporation. All rights reserved.
  3. * Licensed under the MIT License. See License.txt in the project root for license information.
  4. *--------------------------------------------------------------------------------------------*/
  5. 'use strict';
  6. Object.defineProperty(exports, "__esModule", { value: true });
  7. exports.uriToFsPath = exports.URI = void 0;
  8. const platform_1 = require("./platform");
  9. const _schemePattern = /^\w[\w\d+.-]*$/;
  10. const _singleSlashStart = /^\//;
  11. const _doubleSlashStart = /^\/\//;
  12. function _validateUri(ret, _strict) {
  13. // scheme, must be set
  14. if (!ret.scheme && _strict) {
  15. throw new Error(`[UriError]: Scheme is missing: {scheme: "", authority: "${ret.authority}", path: "${ret.path}", query: "${ret.query}", fragment: "${ret.fragment}"}`);
  16. }
  17. // scheme, https://tools.ietf.org/html/rfc3986#section-3.1
  18. // ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
  19. if (ret.scheme && !_schemePattern.test(ret.scheme)) {
  20. throw new Error('[UriError]: Scheme contains illegal characters.');
  21. }
  22. // path, http://tools.ietf.org/html/rfc3986#section-3.3
  23. // If a URI contains an authority component, then the path component
  24. // must either be empty or begin with a slash ("/") character. If a URI
  25. // does not contain an authority component, then the path cannot begin
  26. // with two slash characters ("//").
  27. if (ret.path) {
  28. if (ret.authority) {
  29. if (!_singleSlashStart.test(ret.path)) {
  30. throw new Error('[UriError]: If a URI contains an authority component, then the path component must either be empty or begin with a slash ("/") character');
  31. }
  32. }
  33. else {
  34. if (_doubleSlashStart.test(ret.path)) {
  35. throw new Error('[UriError]: If a URI does not contain an authority component, then the path cannot begin with two slash characters ("//")');
  36. }
  37. }
  38. }
  39. }
  40. // for a while we allowed uris *without* schemes and this is the migration
  41. // for them, e.g. an uri without scheme and without strict-mode warns and falls
  42. // back to the file-scheme. that should cause the least carnage and still be a
  43. // clear warning
  44. function _schemeFix(scheme, _strict) {
  45. if (!scheme && !_strict) {
  46. return 'file';
  47. }
  48. return scheme;
  49. }
  50. // implements a bit of https://tools.ietf.org/html/rfc3986#section-5
  51. function _referenceResolution(scheme, path) {
  52. // the slash-character is our 'default base' as we don't
  53. // support constructing URIs relative to other URIs. This
  54. // also means that we alter and potentially break paths.
  55. // see https://tools.ietf.org/html/rfc3986#section-5.1.4
  56. switch (scheme) {
  57. case 'https':
  58. case 'http':
  59. case 'file':
  60. if (!path) {
  61. path = _slash;
  62. }
  63. else if (path[0] !== _slash) {
  64. path = _slash + path;
  65. }
  66. break;
  67. }
  68. return path;
  69. }
  70. const _empty = '';
  71. const _slash = '/';
  72. const _regexp = /^(([^:/?#]+?):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/;
  73. /**
  74. * Uniform Resource Identifier (URI) http://tools.ietf.org/html/rfc3986.
  75. * This class is a simple parser which creates the basic component parts
  76. * (http://tools.ietf.org/html/rfc3986#section-3) with minimal validation
  77. * and encoding.
  78. *
  79. * ```txt
  80. * foo://example.com:8042/over/there?name=ferret#nose
  81. * \_/ \______________/\_________/ \_________/ \__/
  82. * | | | | |
  83. * scheme authority path query fragment
  84. * | _____________________|__
  85. * / \ / \
  86. * urn:example:animal:ferret:nose
  87. * ```
  88. */
  89. class URI {
  90. static isUri(thing) {
  91. if (thing instanceof URI) {
  92. return true;
  93. }
  94. if (!thing) {
  95. return false;
  96. }
  97. return typeof thing.authority === 'string'
  98. && typeof thing.fragment === 'string'
  99. && typeof thing.path === 'string'
  100. && typeof thing.query === 'string'
  101. && typeof thing.scheme === 'string'
  102. && typeof thing.fsPath === 'string'
  103. && typeof thing.with === 'function'
  104. && typeof thing.toString === 'function';
  105. }
  106. /**
  107. * scheme is the 'http' part of 'http://www.example.com/some/path?query#fragment'.
  108. * The part before the first colon.
  109. */
  110. scheme;
  111. /**
  112. * authority is the 'www.example.com' part of 'http://www.example.com/some/path?query#fragment'.
  113. * The part between the first double slashes and the next slash.
  114. */
  115. authority;
  116. /**
  117. * path is the '/some/path' part of 'http://www.example.com/some/path?query#fragment'.
  118. */
  119. path;
  120. /**
  121. * query is the 'query' part of 'http://www.example.com/some/path?query#fragment'.
  122. */
  123. query;
  124. /**
  125. * fragment is the 'fragment' part of 'http://www.example.com/some/path?query#fragment'.
  126. */
  127. fragment;
  128. /**
  129. * @internal
  130. */
  131. constructor(schemeOrData, authority, path, query, fragment, _strict = false) {
  132. if (typeof schemeOrData === 'object') {
  133. this.scheme = schemeOrData.scheme || _empty;
  134. this.authority = schemeOrData.authority || _empty;
  135. this.path = schemeOrData.path || _empty;
  136. this.query = schemeOrData.query || _empty;
  137. this.fragment = schemeOrData.fragment || _empty;
  138. // no validation because it's this URI
  139. // that creates uri components.
  140. // _validateUri(this);
  141. }
  142. else {
  143. this.scheme = _schemeFix(schemeOrData, _strict);
  144. this.authority = authority || _empty;
  145. this.path = _referenceResolution(this.scheme, path || _empty);
  146. this.query = query || _empty;
  147. this.fragment = fragment || _empty;
  148. _validateUri(this, _strict);
  149. }
  150. }
  151. // ---- filesystem path -----------------------
  152. /**
  153. * Returns a string representing the corresponding file system path of this URI.
  154. * Will handle UNC paths, normalizes windows drive letters to lower-case, and uses the
  155. * platform specific path separator.
  156. *
  157. * * Will *not* validate the path for invalid characters and semantics.
  158. * * Will *not* look at the scheme of this URI.
  159. * * The result shall *not* be used for display purposes but for accessing a file on disk.
  160. *
  161. *
  162. * The *difference* to `URI#path` is the use of the platform specific separator and the handling
  163. * of UNC paths. See the below sample of a file-uri with an authority (UNC path).
  164. *
  165. * ```ts
  166. const u = URI.parse('file://server/c$/folder/file.txt')
  167. u.authority === 'server'
  168. u.path === '/shares/c$/file.txt'
  169. u.fsPath === '\\server\c$\folder\file.txt'
  170. ```
  171. *
  172. * Using `URI#path` to read a file (using fs-apis) would not be enough because parts of the path,
  173. * namely the server name, would be missing. Therefore `URI#fsPath` exists - it's sugar to ease working
  174. * with URIs that represent files on disk (`file` scheme).
  175. */
  176. get fsPath() {
  177. // if (this.scheme !== 'file') {
  178. // console.warn(`[UriError] calling fsPath with scheme ${this.scheme}`);
  179. // }
  180. return uriToFsPath(this, false);
  181. }
  182. // ---- modify to new -------------------------
  183. with(change) {
  184. if (!change) {
  185. return this;
  186. }
  187. let { scheme, authority, path, query, fragment } = change;
  188. if (scheme === undefined) {
  189. scheme = this.scheme;
  190. }
  191. else if (scheme === null) {
  192. scheme = _empty;
  193. }
  194. if (authority === undefined) {
  195. authority = this.authority;
  196. }
  197. else if (authority === null) {
  198. authority = _empty;
  199. }
  200. if (path === undefined) {
  201. path = this.path;
  202. }
  203. else if (path === null) {
  204. path = _empty;
  205. }
  206. if (query === undefined) {
  207. query = this.query;
  208. }
  209. else if (query === null) {
  210. query = _empty;
  211. }
  212. if (fragment === undefined) {
  213. fragment = this.fragment;
  214. }
  215. else if (fragment === null) {
  216. fragment = _empty;
  217. }
  218. if (scheme === this.scheme
  219. && authority === this.authority
  220. && path === this.path
  221. && query === this.query
  222. && fragment === this.fragment) {
  223. return this;
  224. }
  225. return new Uri(scheme, authority, path, query, fragment);
  226. }
  227. // ---- parse & validate ------------------------
  228. /**
  229. * Creates a new URI from a string, e.g. `http://www.example.com/some/path`,
  230. * `file:///usr/home`, or `scheme:with/path`.
  231. *
  232. * @param value A string which represents an URI (see `URI#toString`).
  233. */
  234. static parse(value, _strict = false) {
  235. const match = _regexp.exec(value);
  236. if (!match) {
  237. return new Uri(_empty, _empty, _empty, _empty, _empty);
  238. }
  239. return new Uri(match[2] || _empty, percentDecode(match[4] || _empty), percentDecode(match[5] || _empty), percentDecode(match[7] || _empty), percentDecode(match[9] || _empty), _strict);
  240. }
  241. /**
  242. * Creates a new URI from a file system path, e.g. `c:\my\files`,
  243. * `/usr/home`, or `\\server\share\some\path`.
  244. *
  245. * The *difference* between `URI#parse` and `URI#file` is that the latter treats the argument
  246. * as path, not as stringified-uri. E.g. `URI.file(path)` is **not the same as**
  247. * `URI.parse('file://' + path)` because the path might contain characters that are
  248. * interpreted (# and ?). See the following sample:
  249. * ```ts
  250. const good = URI.file('/coding/c#/project1');
  251. good.scheme === 'file';
  252. good.path === '/coding/c#/project1';
  253. good.fragment === '';
  254. const bad = URI.parse('file://' + '/coding/c#/project1');
  255. bad.scheme === 'file';
  256. bad.path === '/coding/c'; // path is now broken
  257. bad.fragment === '/project1';
  258. ```
  259. *
  260. * @param path A file system path (see `URI#fsPath`)
  261. */
  262. static file(path) {
  263. let authority = _empty;
  264. // normalize to fwd-slashes on windows,
  265. // on other systems bwd-slashes are valid
  266. // filename character, eg /f\oo/ba\r.txt
  267. if (platform_1.isWindows) {
  268. path = path.replace(/\\/g, _slash);
  269. }
  270. // check for authority as used in UNC shares
  271. // or use the path as given
  272. if (path[0] === _slash && path[1] === _slash) {
  273. const idx = path.indexOf(_slash, 2);
  274. if (idx === -1) {
  275. authority = path.substring(2);
  276. path = _slash;
  277. }
  278. else {
  279. authority = path.substring(2, idx);
  280. path = path.substring(idx) || _slash;
  281. }
  282. }
  283. return new Uri('file', authority, path, _empty, _empty);
  284. }
  285. static from(components) {
  286. const result = new Uri(components.scheme, components.authority, components.path, components.query, components.fragment);
  287. _validateUri(result, true);
  288. return result;
  289. }
  290. // ---- printing/externalize ---------------------------
  291. /**
  292. * Creates a string representation for this URI. It's guaranteed that calling
  293. * `URI.parse` with the result of this function creates an URI which is equal
  294. * to this URI.
  295. *
  296. * * The result shall *not* be used for display purposes but for externalization or transport.
  297. * * The result will be encoded using the percentage encoding and encoding happens mostly
  298. * ignore the scheme-specific encoding rules.
  299. *
  300. * @param skipEncoding Do not encode the result, default is `false`
  301. */
  302. toString(skipEncoding = false) {
  303. return _asFormatted(this, skipEncoding);
  304. }
  305. toJSON() {
  306. return this;
  307. }
  308. static revive(data) {
  309. if (!data) {
  310. return data;
  311. }
  312. else if (data instanceof URI) {
  313. return data;
  314. }
  315. else {
  316. const result = new Uri(data);
  317. result._formatted = data.external;
  318. result._fsPath = data._sep === _pathSepMarker ? data.fsPath : null;
  319. return result;
  320. }
  321. }
  322. }
  323. exports.URI = URI;
  324. const _pathSepMarker = platform_1.isWindows ? 1 : undefined;
  325. // This class exists so that URI is compatible with vscode.Uri (API).
  326. class Uri extends URI {
  327. _formatted = null;
  328. _fsPath = null;
  329. get fsPath() {
  330. if (!this._fsPath) {
  331. this._fsPath = uriToFsPath(this, false);
  332. }
  333. return this._fsPath;
  334. }
  335. toString(skipEncoding = false) {
  336. if (!skipEncoding) {
  337. if (!this._formatted) {
  338. this._formatted = _asFormatted(this, false);
  339. }
  340. return this._formatted;
  341. }
  342. else {
  343. // we don't cache that
  344. return _asFormatted(this, true);
  345. }
  346. }
  347. toJSON() {
  348. const res = {
  349. $mid: 1
  350. };
  351. // cached state
  352. if (this._fsPath) {
  353. res.fsPath = this._fsPath;
  354. res._sep = _pathSepMarker;
  355. }
  356. if (this._formatted) {
  357. res.external = this._formatted;
  358. }
  359. // uri components
  360. if (this.path) {
  361. res.path = this.path;
  362. }
  363. if (this.scheme) {
  364. res.scheme = this.scheme;
  365. }
  366. if (this.authority) {
  367. res.authority = this.authority;
  368. }
  369. if (this.query) {
  370. res.query = this.query;
  371. }
  372. if (this.fragment) {
  373. res.fragment = this.fragment;
  374. }
  375. return res;
  376. }
  377. }
  378. // reserved characters: https://tools.ietf.org/html/rfc3986#section-2.2
  379. const encodeTable = {
  380. [58 /* CharCode.Colon */]: '%3A',
  381. [47 /* CharCode.Slash */]: '%2F',
  382. [63 /* CharCode.QuestionMark */]: '%3F',
  383. [35 /* CharCode.Hash */]: '%23',
  384. [91 /* CharCode.OpenSquareBracket */]: '%5B',
  385. [93 /* CharCode.CloseSquareBracket */]: '%5D',
  386. [64 /* CharCode.AtSign */]: '%40',
  387. [33 /* CharCode.ExclamationMark */]: '%21',
  388. [36 /* CharCode.DollarSign */]: '%24',
  389. [38 /* CharCode.Ampersand */]: '%26',
  390. [39 /* CharCode.SingleQuote */]: '%27',
  391. [40 /* CharCode.OpenParen */]: '%28',
  392. [41 /* CharCode.CloseParen */]: '%29',
  393. [42 /* CharCode.Asterisk */]: '%2A',
  394. [43 /* CharCode.Plus */]: '%2B',
  395. [44 /* CharCode.Comma */]: '%2C',
  396. [59 /* CharCode.Semicolon */]: '%3B',
  397. [61 /* CharCode.Equals */]: '%3D',
  398. [32 /* CharCode.Space */]: '%20',
  399. };
  400. function encodeURIComponentFast(uriComponent, isPath, isAuthority) {
  401. let res = undefined;
  402. let nativeEncodePos = -1;
  403. for (let pos = 0; pos < uriComponent.length; pos++) {
  404. const code = uriComponent.charCodeAt(pos);
  405. // unreserved characters: https://tools.ietf.org/html/rfc3986#section-2.3
  406. if ((code >= 97 /* CharCode.a */ && code <= 122 /* CharCode.z */)
  407. || (code >= 65 /* CharCode.A */ && code <= 90 /* CharCode.Z */)
  408. || (code >= 48 /* CharCode.Digit0 */ && code <= 57 /* CharCode.Digit9 */)
  409. || code === 45 /* CharCode.Dash */
  410. || code === 46 /* CharCode.Period */
  411. || code === 95 /* CharCode.Underline */
  412. || code === 126 /* CharCode.Tilde */
  413. || (isPath && code === 47 /* CharCode.Slash */)
  414. || (isAuthority && code === 91 /* CharCode.OpenSquareBracket */)
  415. || (isAuthority && code === 93 /* CharCode.CloseSquareBracket */)
  416. || (isAuthority && code === 58 /* CharCode.Colon */)) {
  417. // check if we are delaying native encode
  418. if (nativeEncodePos !== -1) {
  419. res += encodeURIComponent(uriComponent.substring(nativeEncodePos, pos));
  420. nativeEncodePos = -1;
  421. }
  422. // check if we write into a new string (by default we try to return the param)
  423. if (res !== undefined) {
  424. res += uriComponent.charAt(pos);
  425. }
  426. }
  427. else {
  428. // encoding needed, we need to allocate a new string
  429. if (res === undefined) {
  430. res = uriComponent.substr(0, pos);
  431. }
  432. // check with default table first
  433. const escaped = encodeTable[code];
  434. if (escaped !== undefined) {
  435. // check if we are delaying native encode
  436. if (nativeEncodePos !== -1) {
  437. res += encodeURIComponent(uriComponent.substring(nativeEncodePos, pos));
  438. nativeEncodePos = -1;
  439. }
  440. // append escaped variant to result
  441. res += escaped;
  442. }
  443. else if (nativeEncodePos === -1) {
  444. // use native encode only when needed
  445. nativeEncodePos = pos;
  446. }
  447. }
  448. }
  449. if (nativeEncodePos !== -1) {
  450. res += encodeURIComponent(uriComponent.substring(nativeEncodePos));
  451. }
  452. return res !== undefined ? res : uriComponent;
  453. }
  454. function encodeURIComponentMinimal(path) {
  455. let res = undefined;
  456. for (let pos = 0; pos < path.length; pos++) {
  457. const code = path.charCodeAt(pos);
  458. if (code === 35 /* CharCode.Hash */ || code === 63 /* CharCode.QuestionMark */) {
  459. if (res === undefined) {
  460. res = path.substr(0, pos);
  461. }
  462. res += encodeTable[code];
  463. }
  464. else {
  465. if (res !== undefined) {
  466. res += path[pos];
  467. }
  468. }
  469. }
  470. return res !== undefined ? res : path;
  471. }
  472. /**
  473. * Compute `fsPath` for the given uri
  474. */
  475. function uriToFsPath(uri, keepDriveLetterCasing) {
  476. let value;
  477. if (uri.authority && uri.path.length > 1 && uri.scheme === 'file') {
  478. // unc path: file://shares/c$/far/boo
  479. value = `//${uri.authority}${uri.path}`;
  480. }
  481. else if (uri.path.charCodeAt(0) === 47 /* CharCode.Slash */
  482. && (uri.path.charCodeAt(1) >= 65 /* CharCode.A */ && uri.path.charCodeAt(1) <= 90 /* CharCode.Z */ || uri.path.charCodeAt(1) >= 97 /* CharCode.a */ && uri.path.charCodeAt(1) <= 122 /* CharCode.z */)
  483. && uri.path.charCodeAt(2) === 58 /* CharCode.Colon */) {
  484. if (!keepDriveLetterCasing) {
  485. // windows drive letter: file:///c:/far/boo
  486. value = uri.path[1].toLowerCase() + uri.path.substr(2);
  487. }
  488. else {
  489. value = uri.path.substr(1);
  490. }
  491. }
  492. else {
  493. // other path
  494. value = uri.path;
  495. }
  496. if (platform_1.isWindows) {
  497. value = value.replace(/\//g, '\\');
  498. }
  499. return value;
  500. }
  501. exports.uriToFsPath = uriToFsPath;
  502. /**
  503. * Create the external version of a uri
  504. */
  505. function _asFormatted(uri, skipEncoding) {
  506. const encoder = !skipEncoding
  507. ? encodeURIComponentFast
  508. : encodeURIComponentMinimal;
  509. let res = '';
  510. let { scheme, authority, path, query, fragment } = uri;
  511. if (scheme) {
  512. res += scheme;
  513. res += ':';
  514. }
  515. if (authority || scheme === 'file') {
  516. res += _slash;
  517. res += _slash;
  518. }
  519. if (authority) {
  520. let idx = authority.indexOf('@');
  521. if (idx !== -1) {
  522. // <user>@<auth>
  523. const userinfo = authority.substr(0, idx);
  524. authority = authority.substr(idx + 1);
  525. idx = userinfo.lastIndexOf(':');
  526. if (idx === -1) {
  527. res += encoder(userinfo, false, false);
  528. }
  529. else {
  530. // <user>:<pass>@<auth>
  531. res += encoder(userinfo.substr(0, idx), false, false);
  532. res += ':';
  533. res += encoder(userinfo.substr(idx + 1), false, true);
  534. }
  535. res += '@';
  536. }
  537. authority = authority.toLowerCase();
  538. idx = authority.lastIndexOf(':');
  539. if (idx === -1) {
  540. res += encoder(authority, false, true);
  541. }
  542. else {
  543. // <auth>:<port>
  544. res += encoder(authority.substr(0, idx), false, true);
  545. res += authority.substr(idx);
  546. }
  547. }
  548. if (path) {
  549. // lower-case windows drive letters in /C:/fff or C:/fff
  550. if (path.length >= 3 && path.charCodeAt(0) === 47 /* CharCode.Slash */ && path.charCodeAt(2) === 58 /* CharCode.Colon */) {
  551. const code = path.charCodeAt(1);
  552. if (code >= 65 /* CharCode.A */ && code <= 90 /* CharCode.Z */) {
  553. path = `/${String.fromCharCode(code + 32)}:${path.substr(3)}`; // "/c:".length === 3
  554. }
  555. }
  556. else if (path.length >= 2 && path.charCodeAt(1) === 58 /* CharCode.Colon */) {
  557. const code = path.charCodeAt(0);
  558. if (code >= 65 /* CharCode.A */ && code <= 90 /* CharCode.Z */) {
  559. path = `${String.fromCharCode(code + 32)}:${path.substr(2)}`; // "/c:".length === 3
  560. }
  561. }
  562. // encode the rest of the path
  563. res += encoder(path, true, false);
  564. }
  565. if (query) {
  566. res += '?';
  567. res += encoder(query, false, false);
  568. }
  569. if (fragment) {
  570. res += '#';
  571. res += !skipEncoding ? encodeURIComponentFast(fragment, false, false) : fragment;
  572. }
  573. return res;
  574. }
  575. // --- decode
  576. function decodeURIComponentGraceful(str) {
  577. try {
  578. return decodeURIComponent(str);
  579. }
  580. catch {
  581. if (str.length > 3) {
  582. return str.substr(0, 3) + decodeURIComponentGraceful(str.substr(3));
  583. }
  584. else {
  585. return str;
  586. }
  587. }
  588. }
  589. const _rEncodedAsHex = /(%[0-9A-Za-z][0-9A-Za-z])+/g;
  590. function percentDecode(str) {
  591. if (!str.match(_rEncodedAsHex)) {
  592. return str;
  593. }
  594. return str.replace(_rEncodedAsHex, (match) => decodeURIComponentGraceful(match));
  595. }