| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325 | <?phpnamespace Aws\CloudTrail;use Aws\S3\S3Client;use Aws\CloudTrail\Exception\CloudTrailException;/** * The `Aws\CloudTrail\LogFileIterator` provides an easy way to iterate over * log file generated by AWS CloudTrail. * * CloudTrail log files contain data about your AWS API calls and are stored in * Amazon S3 at a predictable path based on a bucket name, a key prefix, an * account ID, a region, and date information. This class allows you to specify * options, including a date range, and emits each log file that match the * provided options. * * Yields an array containing the Amazon S3 bucket and key of the log file. */class LogFileIterator extends \IteratorIterator{    // For internal use    const DEFAULT_TRAIL_NAME = 'Default';    const PREFIX_TEMPLATE = 'prefix/AWSLogs/account/CloudTrail/region/date/';    const PREFIX_WILDCARD = '*';    // Option names used internally or externally    const TRAIL_NAME = 'trail_name';    const KEY_PREFIX = 'key_prefix';    const START_DATE = 'start_date';    const END_DATE = 'end_date';    const ACCOUNT_ID = 'account_id';    const LOG_REGION = 'log_region';    /** @var S3Client S3 client used to perform ListObjects operations */    private $s3Client;    /** @var string S3 bucket that contains the log files */    private $s3BucketName;    /**     * Constructs a LogRecordIterator. This factory method is used if the name     * of the S3 bucket containing your logs is not known. This factory method     * uses a CloudTrail client and the trail name (or "Default") to find the     * information about the trail necessary for constructing the     * LogRecordIterator.     *     * @param S3Client         $s3Client     * @param CloudTrailClient $cloudTrailClient     * @param array            $options     *     * @return LogRecordIterator     * @throws \InvalidArgumentException     * @see LogRecordIterator::__contruct     */    public static function forTrail(        S3Client $s3Client,        CloudTrailClient $cloudTrailClient,        array $options = []    ) {        $trailName = isset($options[self::TRAIL_NAME])            ? $options[self::TRAIL_NAME]            : self::DEFAULT_TRAIL_NAME;        $s3BucketName = null;        // Use the CloudTrail client to get information about the trail,        // including the bucket name.        try {            $result = $cloudTrailClient->describeTrails([                'trailNameList' => [$trailName]            ]);            $s3BucketName = $result->search('trailList[0].S3BucketName');            $options[self::KEY_PREFIX] = $result->search(                'trailList[0].S3KeyPrefix'            );        } catch (CloudTrailException $e) {            // There was an error describing the trail        }        // If the bucket name is still unknown, then throw an exception        if (!$s3BucketName) {            $prev = isset($e) ? $e : null;            throw new \InvalidArgumentException('The bucket name could not '                . 'be determined from the trail.', 0, $prev);        }        return new self($s3Client, $s3BucketName, $options);    }    /**     * Constructs a LogFileIterator using the specified options:     *     * - trail_name: The name of the trail that is generating our logs. If     *   none is provided, then "Default" will be used, since that is the name     *   of the trail created in the AWS Management Console.     * - key_prefix: The S3 key prefix of your log files. This value will be     *   overwritten when using the `fromTrail()` method. However, if you are     *   using the constructor, then this value will be used.     * - start_date: The timestamp of the beginning of date range of the log     *   records you want to read. You can pass this in as a `DateTime` object,     *   integer (unix timestamp), or a string compatible with `strtotime()`.     * - end_date: The timestamp of the end of date range of the log records     *   you want to read. You can pass this in as a `DateTime` object, integer     *   (unix timestamp), or a string compatible with `strtotime()`.     * - account_id: This is your AWS account ID, which is the 12-digit number     *   found on the *Account Identifiers* section of the *AWS Security     *   Credentials* page. See https://console.aws.amazon.com/iam/home?#security_credential     * - log_region: Region of the services of the log records you want to read.     *     * @param S3Client $s3Client     * @param string   $s3BucketName     * @param array    $options     */    public function __construct(        S3Client $s3Client,        $s3BucketName,        array $options = []    ) {        $this->s3Client = $s3Client;        $this->s3BucketName = $s3BucketName;        parent::__construct($this->buildListObjectsIterator($options));    }    /**     * An override of the typical current behavior of \IteratorIterator to     * format the output such that the bucket and key are returned in an array     *     * @return array|bool     */    #[\ReturnTypeWillChange]    public function current()    {        if ($object = parent::current()) {            return [                'Bucket' => $this->s3BucketName,                'Key'    => $object['Key']            ];        }        return false;    }    /**     * Constructs an S3 ListObjects iterator, optionally decorated with     * FilterIterators, based on the provided options.     *     * @param array $options     *     * @return \Iterator     */    private function buildListObjectsIterator(array $options)    {        // Extract and normalize the date values from the options        $startDate = isset($options[self::START_DATE])            ? $this->normalizeDateValue($options[self::START_DATE])            : null;        $endDate = isset($options[self::END_DATE])            ? $this->normalizeDateValue($options[self::END_DATE])            : null;        // Determine the parts of the key prefix of the log files being read        $parts = [            'prefix' => isset($options[self::KEY_PREFIX])                    ? $options[self::KEY_PREFIX]                    : null,            'account' => isset($options[self::ACCOUNT_ID])                    ? $options[self::ACCOUNT_ID]                    : self::PREFIX_WILDCARD,            'region' => isset($options[self::LOG_REGION])                    ? $options[self::LOG_REGION]                    : self::PREFIX_WILDCARD,            'date' => $this->determineDateForPrefix($startDate, $endDate),        ];        // Determine the longest key prefix that can be used to retrieve all        // of the relevant log files.        $candidatePrefix = ltrim(strtr(self::PREFIX_TEMPLATE, $parts), '/');        $logKeyPrefix = $candidatePrefix;        $index = strpos($candidatePrefix, self::PREFIX_WILDCARD);        if ($index !== false) {            $logKeyPrefix = substr($candidatePrefix, 0, $index);        }        // Create an iterator that will emit all of the objects matching the        // key prefix.        $objectsIterator = $this->s3Client->getIterator('ListObjects', [            'Bucket' => $this->s3BucketName,            'Prefix' => $logKeyPrefix,        ]);        // Apply regex and/or date filters to the objects iterator to emit only        // log files matching the options.        $objectsIterator = $this->applyRegexFilter(            $objectsIterator,            $logKeyPrefix,            $candidatePrefix        );        $objectsIterator = $this->applyDateFilter(            $objectsIterator,            $startDate,            $endDate        );        return $objectsIterator;    }    /**     * Normalizes a date value to a unix timestamp     *     * @param int|string|\DateTimeInterface $date     *     * @return int     * @throws \InvalidArgumentException if the value cannot be converted to     *     a timestamp     */    private function normalizeDateValue($date)    {        if (is_string($date)) {            $date = strtotime($date);        } elseif ($date instanceof \DateTimeInterface) {            $date = $date->format('U');        } elseif (!is_int($date)) {            throw new \InvalidArgumentException('Date values must be a '                . 'string, an int, or a DateTime object.');        }        return $date;    }    /**     * Uses the provided date values to determine the date portion of the prefix     */    private function determineDateForPrefix($startDate, $endDate)    {        // The default date value should look like "*/*/*" after joining        $dateParts = array_fill_keys(['Y', 'm', 'd'], self::PREFIX_WILDCARD);        // Narrow down the date by replacing the WILDCARDs with values if they        // are the same for the start and end date.        if ($startDate && $endDate) {            foreach ($dateParts as $key => &$value) {                $candidateValue = date($key, $startDate);                if ($candidateValue === date($key, $endDate)) {                    $value = $candidateValue;                } else {                    break;                }            }        }        return join('/', $dateParts);    }    /**     * Applies a regex iterator filter that limits the ListObjects result set     * based on the provided options.     *     * @param \Iterator $objectsIterator     * @param string    $logKeyPrefix     * @param string    $candidatePrefix     *     * @return \Iterator     */    private function applyRegexFilter(        $objectsIterator,        $logKeyPrefix,        $candidatePrefix    ) {        // If the prefix and candidate prefix are not the same, then there were        // WILDCARDs.        if ($logKeyPrefix !== $candidatePrefix) {            // Turn the candidate prefix into a regex by trimming and            // converting WILDCARDs to regex notation.            $regex = rtrim($candidatePrefix, '/' . self::PREFIX_WILDCARD) . '/';            $regex = strtr($regex, [self::PREFIX_WILDCARD => '[^/]+']);            // After trimming WILDCARDs or the end, if the regex is the same as            // the prefix, then no regex is needed.            if ($logKeyPrefix !== $regex) {                // Apply a regex filter iterator to remove files that don't                // match the provided options.                $objectsIterator = new \CallbackFilterIterator(                    $objectsIterator,                    function ($object) use ($regex) {                        return preg_match("#{$regex}#", $object['Key']);                    }                );            }        }        return $objectsIterator;    }    /**     * Applies an iterator filter to restrict the ListObjects result set to the     * specified date range.     *     * @param \Iterator $objectsIterator     * @param int       $startDate     * @param int       $endDate     *     * @return \Iterator     */    private function applyDateFilter($objectsIterator, $startDate, $endDate)    {        // If either a start or end date was provided, filter out dates that        // don't match the date range.        if ($startDate || $endDate) {            $fn = function ($object) use ($startDate, $endDate) {                if (!preg_match('/[0-9]{8}T[0-9]{4}Z/', $object['Key'], $m)) {                    return false;                }                $date = strtotime($m[0]);                return (!$startDate || $date >= $startDate)                    && (!$endDate || $date <= $endDate);            };            $objectsIterator = new \CallbackFilterIterator($objectsIterator, $fn);        }        return $objectsIterator;    }}
 |