LogFileIterator.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. <?php
  2. namespace Aws\CloudTrail;
  3. use Aws\S3\S3Client;
  4. use Aws\CloudTrail\Exception\CloudTrailException;
  5. /**
  6. * The `Aws\CloudTrail\LogFileIterator` provides an easy way to iterate over
  7. * log file generated by AWS CloudTrail.
  8. *
  9. * CloudTrail log files contain data about your AWS API calls and are stored in
  10. * Amazon S3 at a predictable path based on a bucket name, a key prefix, an
  11. * account ID, a region, and date information. This class allows you to specify
  12. * options, including a date range, and emits each log file that match the
  13. * provided options.
  14. *
  15. * Yields an array containing the Amazon S3 bucket and key of the log file.
  16. */
  17. class LogFileIterator extends \IteratorIterator
  18. {
  19. // For internal use
  20. const DEFAULT_TRAIL_NAME = 'Default';
  21. const PREFIX_TEMPLATE = 'prefix/AWSLogs/account/CloudTrail/region/date/';
  22. const PREFIX_WILDCARD = '*';
  23. // Option names used internally or externally
  24. const TRAIL_NAME = 'trail_name';
  25. const KEY_PREFIX = 'key_prefix';
  26. const START_DATE = 'start_date';
  27. const END_DATE = 'end_date';
  28. const ACCOUNT_ID = 'account_id';
  29. const LOG_REGION = 'log_region';
  30. /** @var S3Client S3 client used to perform ListObjects operations */
  31. private $s3Client;
  32. /** @var string S3 bucket that contains the log files */
  33. private $s3BucketName;
  34. /**
  35. * Constructs a LogRecordIterator. This factory method is used if the name
  36. * of the S3 bucket containing your logs is not known. This factory method
  37. * uses a CloudTrail client and the trail name (or "Default") to find the
  38. * information about the trail necessary for constructing the
  39. * LogRecordIterator.
  40. *
  41. * @param S3Client $s3Client
  42. * @param CloudTrailClient $cloudTrailClient
  43. * @param array $options
  44. *
  45. * @return LogRecordIterator
  46. * @throws \InvalidArgumentException
  47. * @see LogRecordIterator::__contruct
  48. */
  49. public static function forTrail(
  50. S3Client $s3Client,
  51. CloudTrailClient $cloudTrailClient,
  52. array $options = []
  53. ) {
  54. $trailName = isset($options[self::TRAIL_NAME])
  55. ? $options[self::TRAIL_NAME]
  56. : self::DEFAULT_TRAIL_NAME;
  57. $s3BucketName = null;
  58. // Use the CloudTrail client to get information about the trail,
  59. // including the bucket name.
  60. try {
  61. $result = $cloudTrailClient->describeTrails([
  62. 'trailNameList' => [$trailName]
  63. ]);
  64. $s3BucketName = $result->search('trailList[0].S3BucketName');
  65. $options[self::KEY_PREFIX] = $result->search(
  66. 'trailList[0].S3KeyPrefix'
  67. );
  68. } catch (CloudTrailException $e) {
  69. // There was an error describing the trail
  70. }
  71. // If the bucket name is still unknown, then throw an exception
  72. if (!$s3BucketName) {
  73. $prev = isset($e) ? $e : null;
  74. throw new \InvalidArgumentException('The bucket name could not '
  75. . 'be determined from the trail.', 0, $prev);
  76. }
  77. return new self($s3Client, $s3BucketName, $options);
  78. }
  79. /**
  80. * Constructs a LogFileIterator using the specified options:
  81. *
  82. * - trail_name: The name of the trail that is generating our logs. If
  83. * none is provided, then "Default" will be used, since that is the name
  84. * of the trail created in the AWS Management Console.
  85. * - key_prefix: The S3 key prefix of your log files. This value will be
  86. * overwritten when using the `fromTrail()` method. However, if you are
  87. * using the constructor, then this value will be used.
  88. * - start_date: The timestamp of the beginning of date range of the log
  89. * records you want to read. You can pass this in as a `DateTime` object,
  90. * integer (unix timestamp), or a string compatible with `strtotime()`.
  91. * - end_date: The timestamp of the end of date range of the log records
  92. * you want to read. You can pass this in as a `DateTime` object, integer
  93. * (unix timestamp), or a string compatible with `strtotime()`.
  94. * - account_id: This is your AWS account ID, which is the 12-digit number
  95. * found on the *Account Identifiers* section of the *AWS Security
  96. * Credentials* page. See https://console.aws.amazon.com/iam/home?#security_credential
  97. * - log_region: Region of the services of the log records you want to read.
  98. *
  99. * @param S3Client $s3Client
  100. * @param string $s3BucketName
  101. * @param array $options
  102. */
  103. public function __construct(
  104. S3Client $s3Client,
  105. $s3BucketName,
  106. array $options = []
  107. ) {
  108. $this->s3Client = $s3Client;
  109. $this->s3BucketName = $s3BucketName;
  110. parent::__construct($this->buildListObjectsIterator($options));
  111. }
  112. /**
  113. * An override of the typical current behavior of \IteratorIterator to
  114. * format the output such that the bucket and key are returned in an array
  115. *
  116. * @return array|bool
  117. */
  118. #[\ReturnTypeWillChange]
  119. public function current()
  120. {
  121. if ($object = parent::current()) {
  122. return [
  123. 'Bucket' => $this->s3BucketName,
  124. 'Key' => $object['Key']
  125. ];
  126. }
  127. return false;
  128. }
  129. /**
  130. * Constructs an S3 ListObjects iterator, optionally decorated with
  131. * FilterIterators, based on the provided options.
  132. *
  133. * @param array $options
  134. *
  135. * @return \Iterator
  136. */
  137. private function buildListObjectsIterator(array $options)
  138. {
  139. // Extract and normalize the date values from the options
  140. $startDate = isset($options[self::START_DATE])
  141. ? $this->normalizeDateValue($options[self::START_DATE])
  142. : null;
  143. $endDate = isset($options[self::END_DATE])
  144. ? $this->normalizeDateValue($options[self::END_DATE])
  145. : null;
  146. // Determine the parts of the key prefix of the log files being read
  147. $parts = [
  148. 'prefix' => isset($options[self::KEY_PREFIX])
  149. ? $options[self::KEY_PREFIX]
  150. : null,
  151. 'account' => isset($options[self::ACCOUNT_ID])
  152. ? $options[self::ACCOUNT_ID]
  153. : self::PREFIX_WILDCARD,
  154. 'region' => isset($options[self::LOG_REGION])
  155. ? $options[self::LOG_REGION]
  156. : self::PREFIX_WILDCARD,
  157. 'date' => $this->determineDateForPrefix($startDate, $endDate),
  158. ];
  159. // Determine the longest key prefix that can be used to retrieve all
  160. // of the relevant log files.
  161. $candidatePrefix = ltrim(strtr(self::PREFIX_TEMPLATE, $parts), '/');
  162. $logKeyPrefix = $candidatePrefix;
  163. $index = strpos($candidatePrefix, self::PREFIX_WILDCARD);
  164. if ($index !== false) {
  165. $logKeyPrefix = substr($candidatePrefix, 0, $index);
  166. }
  167. // Create an iterator that will emit all of the objects matching the
  168. // key prefix.
  169. $objectsIterator = $this->s3Client->getIterator('ListObjects', [
  170. 'Bucket' => $this->s3BucketName,
  171. 'Prefix' => $logKeyPrefix,
  172. ]);
  173. // Apply regex and/or date filters to the objects iterator to emit only
  174. // log files matching the options.
  175. $objectsIterator = $this->applyRegexFilter(
  176. $objectsIterator,
  177. $logKeyPrefix,
  178. $candidatePrefix
  179. );
  180. $objectsIterator = $this->applyDateFilter(
  181. $objectsIterator,
  182. $startDate,
  183. $endDate
  184. );
  185. return $objectsIterator;
  186. }
  187. /**
  188. * Normalizes a date value to a unix timestamp
  189. *
  190. * @param int|string|\DateTimeInterface $date
  191. *
  192. * @return int
  193. * @throws \InvalidArgumentException if the value cannot be converted to
  194. * a timestamp
  195. */
  196. private function normalizeDateValue($date)
  197. {
  198. if (is_string($date)) {
  199. $date = strtotime($date);
  200. } elseif ($date instanceof \DateTimeInterface) {
  201. $date = $date->format('U');
  202. } elseif (!is_int($date)) {
  203. throw new \InvalidArgumentException('Date values must be a '
  204. . 'string, an int, or a DateTime object.');
  205. }
  206. return $date;
  207. }
  208. /**
  209. * Uses the provided date values to determine the date portion of the prefix
  210. */
  211. private function determineDateForPrefix($startDate, $endDate)
  212. {
  213. // The default date value should look like "*/*/*" after joining
  214. $dateParts = array_fill_keys(['Y', 'm', 'd'], self::PREFIX_WILDCARD);
  215. // Narrow down the date by replacing the WILDCARDs with values if they
  216. // are the same for the start and end date.
  217. if ($startDate && $endDate) {
  218. foreach ($dateParts as $key => &$value) {
  219. $candidateValue = date($key, $startDate);
  220. if ($candidateValue === date($key, $endDate)) {
  221. $value = $candidateValue;
  222. } else {
  223. break;
  224. }
  225. }
  226. }
  227. return join('/', $dateParts);
  228. }
  229. /**
  230. * Applies a regex iterator filter that limits the ListObjects result set
  231. * based on the provided options.
  232. *
  233. * @param \Iterator $objectsIterator
  234. * @param string $logKeyPrefix
  235. * @param string $candidatePrefix
  236. *
  237. * @return \Iterator
  238. */
  239. private function applyRegexFilter(
  240. $objectsIterator,
  241. $logKeyPrefix,
  242. $candidatePrefix
  243. ) {
  244. // If the prefix and candidate prefix are not the same, then there were
  245. // WILDCARDs.
  246. if ($logKeyPrefix !== $candidatePrefix) {
  247. // Turn the candidate prefix into a regex by trimming and
  248. // converting WILDCARDs to regex notation.
  249. $regex = rtrim($candidatePrefix, '/' . self::PREFIX_WILDCARD) . '/';
  250. $regex = strtr($regex, [self::PREFIX_WILDCARD => '[^/]+']);
  251. // After trimming WILDCARDs or the end, if the regex is the same as
  252. // the prefix, then no regex is needed.
  253. if ($logKeyPrefix !== $regex) {
  254. // Apply a regex filter iterator to remove files that don't
  255. // match the provided options.
  256. $objectsIterator = new \CallbackFilterIterator(
  257. $objectsIterator,
  258. function ($object) use ($regex) {
  259. return preg_match("#{$regex}#", $object['Key']);
  260. }
  261. );
  262. }
  263. }
  264. return $objectsIterator;
  265. }
  266. /**
  267. * Applies an iterator filter to restrict the ListObjects result set to the
  268. * specified date range.
  269. *
  270. * @param \Iterator $objectsIterator
  271. * @param int $startDate
  272. * @param int $endDate
  273. *
  274. * @return \Iterator
  275. */
  276. private function applyDateFilter($objectsIterator, $startDate, $endDate)
  277. {
  278. // If either a start or end date was provided, filter out dates that
  279. // don't match the date range.
  280. if ($startDate || $endDate) {
  281. $fn = function ($object) use ($startDate, $endDate) {
  282. if (!preg_match('/[0-9]{8}T[0-9]{4}Z/', $object['Key'], $m)) {
  283. return false;
  284. }
  285. $date = strtotime($m[0]);
  286. return (!$startDate || $date >= $startDate)
  287. && (!$endDate || $date <= $endDate);
  288. };
  289. $objectsIterator = new \CallbackFilterIterator($objectsIterator, $fn);
  290. }
  291. return $objectsIterator;
  292. }
  293. }