S3UriParser.php 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. <?php
  2. namespace Aws\S3;
  3. use Aws\Arn\Exception\InvalidArnException;
  4. use Aws\Arn\S3\AccessPointArn;
  5. use Aws\Arn\ArnParser;
  6. use GuzzleHttp\Psr7;
  7. use Psr\Http\Message\UriInterface;
  8. /**
  9. * Extracts a region, bucket, key, and and if a URI is in path-style
  10. */
  11. class S3UriParser
  12. {
  13. private $pattern = '/^(.+\\.)?s3[.-]([A-Za-z0-9-]+)\\./';
  14. private $streamWrapperScheme = 's3';
  15. private static $defaultResult = [
  16. 'path_style' => true,
  17. 'bucket' => null,
  18. 'key' => null,
  19. 'region' => null
  20. ];
  21. /**
  22. * Parses a URL or S3 StreamWrapper Uri (s3://) into an associative array
  23. * of Amazon S3 data including:
  24. *
  25. * - bucket: The Amazon S3 bucket (null if none)
  26. * - key: The Amazon S3 key (null if none)
  27. * - path_style: Set to true if using path style, or false if not
  28. * - region: Set to a string if a non-class endpoint is used or null.
  29. *
  30. * @param string|UriInterface $uri
  31. *
  32. * @return array
  33. * @throws \InvalidArgumentException|InvalidArnException
  34. */
  35. public function parse($uri)
  36. {
  37. // Attempt to parse host component of uri as an ARN
  38. $components = $this->parseS3UrlComponents($uri);
  39. if (!empty($components)) {
  40. if (ArnParser::isArn($components['host'])) {
  41. $arn = new AccessPointArn($components['host']);
  42. return [
  43. 'bucket' => $components['host'],
  44. 'key' => $components['path'],
  45. 'path_style' => false,
  46. 'region' => $arn->getRegion()
  47. ];
  48. }
  49. }
  50. $url = Psr7\Utils::uriFor($uri);
  51. if ($url->getScheme() == $this->streamWrapperScheme) {
  52. return $this->parseStreamWrapper($url);
  53. }
  54. if (!$url->getHost()) {
  55. throw new \InvalidArgumentException('No hostname found in URI: '
  56. . $uri);
  57. }
  58. if (!preg_match($this->pattern, $url->getHost(), $matches)) {
  59. return $this->parseCustomEndpoint($url);
  60. }
  61. // Parse the URI based on the matched format (path / virtual)
  62. $result = empty($matches[1])
  63. ? $this->parsePathStyle($url)
  64. : $this->parseVirtualHosted($url, $matches);
  65. // Add the region if one was found and not the classic endpoint
  66. $result['region'] = $matches[2] == 'amazonaws' ? null : $matches[2];
  67. return $result;
  68. }
  69. private function parseS3UrlComponents($uri)
  70. {
  71. preg_match("/^([a-zA-Z0-9]*):\/\/([a-zA-Z0-9:-]*)\/(.*)/", $uri, $components);
  72. if (empty($components)) {
  73. return [];
  74. }
  75. return [
  76. 'scheme' => $components[1],
  77. 'host' => $components[2],
  78. 'path' => $components[3],
  79. ];
  80. }
  81. private function parseStreamWrapper(UriInterface $url)
  82. {
  83. $result = self::$defaultResult;
  84. $result['path_style'] = false;
  85. $result['bucket'] = $url->getHost();
  86. if ($url->getPath()) {
  87. $key = ltrim($url->getPath(), '/ ');
  88. if (!empty($key)) {
  89. $result['key'] = $key;
  90. }
  91. }
  92. return $result;
  93. }
  94. private function parseCustomEndpoint(UriInterface $url)
  95. {
  96. $result = self::$defaultResult;
  97. $path = ltrim($url->getPath(), '/ ');
  98. $segments = explode('/', $path, 2);
  99. if (isset($segments[0])) {
  100. $result['bucket'] = $segments[0];
  101. if (isset($segments[1])) {
  102. $result['key'] = $segments[1];
  103. }
  104. }
  105. return $result;
  106. }
  107. private function parsePathStyle(UriInterface $url)
  108. {
  109. $result = self::$defaultResult;
  110. if ($url->getPath() != '/') {
  111. $path = ltrim($url->getPath(), '/');
  112. if ($path) {
  113. $pathPos = strpos($path, '/');
  114. if ($pathPos === false) {
  115. // https://s3.amazonaws.com/bucket
  116. $result['bucket'] = $path;
  117. } elseif ($pathPos == strlen($path) - 1) {
  118. // https://s3.amazonaws.com/bucket/
  119. $result['bucket'] = substr($path, 0, -1);
  120. } else {
  121. // https://s3.amazonaws.com/bucket/key
  122. $result['bucket'] = substr($path, 0, $pathPos);
  123. $result['key'] = substr($path, $pathPos + 1) ?: null;
  124. }
  125. }
  126. }
  127. return $result;
  128. }
  129. private function parseVirtualHosted(UriInterface $url, array $matches)
  130. {
  131. $result = self::$defaultResult;
  132. $result['path_style'] = false;
  133. // Remove trailing "." from the prefix to get the bucket
  134. $result['bucket'] = substr($matches[1], 0, -1);
  135. $path = $url->getPath();
  136. // Check if a key was present, and if so, removing the leading "/"
  137. $result['key'] = !$path || $path == '/' ? null : substr($path, 1);
  138. return $result;
  139. }
  140. }