Reducing Clojure Lambda Cold Starts Part 9 - AWS SDK V3 With Webpack

Reducing Clojure Lambda Cold Starts Part 9 - AWS SDK V3 With Webpack

ClojureScript is still looking very promising, with init durations and durations nearly identical to JavaScript ones with the same dependencies and equivalent code. I've been itching to see what the numbers look like with the "bare-bones", "modular" AWS SDK V3, but the '@aws-sdk/client-s3' dependency is not built into the Lambda Node runtime like 'aws-sdk' is. I tried including it as dependency in package.json, but this ended up downloading the world and stuffing it into node_modules. Including these in my Lambda increased the bundle size by orders of magnitude and led to terrible cold starts.

I tried to get shadow-cljs to bundle the S3 Client, but I couldn't get it to do so with a 'node-library' configuration, so I decided to resort to Webpack to do so. First I installed Webpack:

npm install webpack webpack-cli --save-dev

Then added a Webpack configuration file:

const path = require('path');

module.exports = {
  mode: 'production',
  target: "node",
  entry: './build/lambda/calcs/index.js',
  output: {
    library: {"name": "calcs", "type": "this"},
    path: path.resolve(__dirname, 'dist'),
    filename: 'index.js',
  }
};

Then I modified template.yml:

  RunCalculationsCLJS:
    Type: AWS::Serverless::Function
    Properties:
      FunctionName: !Sub "${AWS::StackName}-run-calcs-cljs-2"
      Handler: index.calcs.handler
      Runtime: nodejs14.x
      CodeUri: dist
      Timeout: 900
      MemorySize: 512
      Policies:
        - AWSLambdaBasicExecutionRole
        - S3ReadPolicy:
            BucketName: !Ref TransactionsBucket
        - S3WritePolicy:
            BucketName: !Ref CalculationsBucket
        - Version: '2012-10-17' 
          Statement:
            - Effect: Allow
              Action:
                - s3:ListAllMyBuckets
              Resource: 'arn:aws:s3:::*'
      Environment:
        Variables:
          TRANSACTIONS_BUCKET: !Ref TransactionsBucket
          CALCULATIONS_BUCKET: !Ref CalculationsBucket
      Events:
        SQSEvent:
          Type: SQS
          Properties:
            Queue: !GetAtt RunClojureScriptCalculationsQueue.Arn
            BatchSize: 1

And src/cljs/tax/core.cljs:

(ns tax.core
  (:require [cljs.core.async :as async :refer [<!]]
            [cljs.core.async.interop :refer-macros [<p!]]
            [clojure.string :as s]
            ["@aws-sdk/client-s3" :refer [GetObjectCommand S3Client PutObjectCommand]]
            ["fs" :as fs]
            ["readline" :as readline]

            [tax.calcs :refer [calculate]]
            [tax.metrics :as metrics :refer [emit-metric]])
  (:require-macros [cljs.core.async.macros :refer [go]]))

(def client (S3Client. #js{}))

(def output-bucket js/process.env.CALCULATIONS_BUCKET)

(defn put-object [bucket-name object-key body]
  (.send client (PutObjectCommand. #js{"Bucket" bucket-name
                                   "Key" object-key
                                   "Body" body})))

(defn get-object [bucket-name object-key]
  (.send client (GetObjectCommand. #js{"Bucket" bucket-name,
                                   "Key" object-key})))

(defn stream-to-string [^js/ReadableStream stream]
  (let [c (async/chan)
        rl (.createInterface readline #js{"input" stream
                                          "crlfDelay" js/Infinity})]
    (.on rl "line" (fn [line] (async/put! c line)))
    (.on rl "close" (fn [] (async/close! c)))
    c))

(defn get-object-as-string [bucket-name object-key]
  (go (let [resp (<p! (get-object bucket-name object-key))
            body (.-Body resp)
            lines (<! (async/into [] (stream-to-string body)))]
        (s/join "\n" lines))))

(defn ->items [input]
  ;; realizing the items with mapv to print parse time
  (mapv
   (fn [line]
     (js/JSON.parse line))
   (s/split input #"\n")))

(defn ->json-output [items]
  (s/join "\n" (map js/JSON.stringify items)))

(defn handler [event context callback]
  ;; only grabbing a single message at a time, so we can just get the first.
  (go (let [message-body (get-in (js->clj event) ["Records" 0 "body"])
            props (js/JSON.parse message-body)
            bucket (.-bucket props)
            key (.-key props)

            start (metrics/now)
            input (<! (get-object-as-string bucket key))
            _ (emit-metric "get-object" (- (metrics/now) start))

            start (metrics/now)
            input-lines (->items input)
            _ (emit-metric "parse-input" (- (metrics/now) start))

            calculated-items (calculate input-lines)

            start (metrics/now)
            output-string (->json-output calculated-items)
            _ (emit-metric "convert-to-output" (- (metrics/now) start))

            start (metrics/now)
            put-result (<p! (put-object output-bucket key output-string))
            _ (emit-metric "put-to-output" (- (metrics/now) start))]
        (callback nil put-result))))

Compiling the ClojureScript code to JavaScript with shadow-cljs, then attempting to "transpile" the JavaScript into fully bundled JavaScript:

>> shadow-cljs release :calcs-lambda
>> npx webpack

Gave the following nasty warning:

WARNING in ./node_modules/aws-crt/dist/native/binding.js 55:18-31
Critical dependency: the request of a dependency is an expression
    at CommonJsRequireContextDependency.getWarnings (/Users/larry/Documents/code/tax-engine-experiments/node_modules/webpack/lib/dependencies/ContextDependency.js:91:18)
    at Compilation.reportDependencyErrorsAndWarnings (/Users/larry/Documents/code/tax-engine-experiments/node_modules/webpack/lib/Compilation.js:3127:24)
    at /Users/larry/Documents/code/tax-engine-experiments/node_modules/webpack/lib/Compilation.js:2724:28
    at _next2 (eval at create (/Users/larry/Documents/code/tax-engine-experiments/node_modules/tapable/lib/HookCodeFactory.js:33:10), <anonymous>:16:1)
    at eval (eval at create (/Users/larry/Documents/code/tax-engine-experiments/node_modules/tapable/lib/HookCodeFactory.js:33:10), <anonymous>:42:1)
    at /Users/larry/Documents/code/tax-engine-experiments/node_modules/webpack/lib/FlagDependencyExportsPlugin.js:385:11
    at /Users/larry/Documents/code/tax-engine-experiments/node_modules/neo-async/async.js:2830:7
    at Object.each (/Users/larry/Documents/code/tax-engine-experiments/node_modules/neo-async/async.js:2850:39)
    at /Users/larry/Documents/code/tax-engine-experiments/node_modules/webpack/lib/FlagDependencyExportsPlugin.js:361:18
    at /Users/larry/Documents/code/tax-engine-experiments/node_modules/neo-async/async.js:2830:7
 @ ./node_modules/aws-crt/dist/native/crt.js 25:34-54
 @ ./node_modules/aws-crt/dist/index.js 38:25-48
 @ ./node_modules/@aws-sdk/util-user-agent-node/dist-es/is-crt-available.js 3:96-114
 @ ./node_modules/@aws-sdk/util-user-agent-node/dist-es/index.js 5:0-52 16:23-37
 @ ./node_modules/@aws-sdk/client-s3/dist-es/runtimeConfig.js 15:0-65 23:870-886
 @ ./node_modules/@aws-sdk/client-s3/dist-es/S3Client.js 14:0-73 19:24-42
 @ ./node_modules/@aws-sdk/client-s3/dist-es/index.js 2:0-27 2:0-27
 @ ./build/lambda/calcs/index.js 282:466-495

Running "sam local invoke RunCalculationsCLJS" then gave some cryptic errors that seemed to have to do with failure in dependency resolution. Fortunately "aws-crt" comes built into the Node Lambda runtime, so I fixed this by modifying my webpack.config.js:

const path = require('path');

module.exports = {
  mode: 'production',
  target: "node",
  entry: './build/lambda/calcs/index.js',
  output: {
    library: {"name": "calcs", "type": "this"},
    path: path.resolve(__dirname, 'dist'),
    filename: 'index.js',
  },
  externals: {
    "aws-crt": "commonjs aws-crt"
  }
};

This causes Webpack to skip trying to bundle "aws-crt" and just "transpiles" the require to "require('aws-crt')". Running "sam local" again gave the desired results, so I deployed and ran my SQS blaster again to see how it affected performance:

avg(@duration)avg(@initDuration)count(@initDuration)count(@duration)
616.0709327.7015201015

Interestingly, the init duration dropped significantly while the duration increased significantly. Looking at the metrics:

Screen Shot 2021-12-28 at 12.54.00 PM.png

Everything looks great except get-object and put-to-output. This got me wondering: since I have a bunch of concurrent invocations reading the same object and writing to the same object, maybe they're stepping on each others' toes with the reads and writes. Let's try turning the SQS blaster into an SQS trickler:

(ns tax.profile
  (:import (software.amazon.awssdk.services.sqs SqsClient)
           (software.amazon.awssdk.services.sqs.model SendMessageRequest)))

(def queues (map
             (fn [suffix]
               (str "https://sqs.us-east-1.amazonaws.com/170594410696/tax-engine-experiments-2-run-calcs-queue-" suffix))
             ["cljs"]))

(defn profile []
  (doseq [queue queues]
    (let [sqs (-> (SqsClient/builder) (.build))
          req (-> (SendMessageRequest/builder)
                  (.queueUrl queue)
                  (.messageBody "{\"bucket\": \"tax-engine-experiments-2-transactionsbucket-78gg1f219mel\", \"key\": \"test.json\"}")
                  (.build))]
      (dotimes [i 1000]
        (.sendMessage sqs req)
        (Thread/sleep 1000)))))

That does seem like it might be the case:

Screen Shot 2021-12-28 at 1.25.40 PM.png

Maybe I'll copy the object 1000 times and have the blaster just iterate over those. First I need to add the S3 dependency to deps.edn:

{:paths ["src/clj" "src/cljc"]
 :deps {software.amazon.awssdk/s3 {:mvn/version "2.17.100"}
        metosin/jsonista {:mvn/version "0.3.5"}}
 :aliases {:build {:extra-deps {io.github.clojure/tools.build {:tag "v0.7.2" :sha "0361dde"}}
                   :ns-default build}
           :profile {:extra-paths ["dev/clj"]
                     :deps {software.amazon.awssdk/sqs {:mvn/version "2.17.100"}
                            software.amazon.awssdk/s3 {:mvn/version "2.17.100"}
                            software.amazon.awssdk/sso {:mvn/version "2.17.100"}}}}}

Then update dev/clj/tax/profile.clj:

(ns tax.profile
  (:import (software.amazon.awssdk.services.sqs SqsClient)
           (software.amazon.awssdk.services.sqs.model SendMessageRequest)
           (software.amazon.awssdk.services.s3 S3Client)
           (software.amazon.awssdk.services.s3.model CopyObjectRequest)))

(def queues (map
             (fn [suffix]
               (str "https://sqs.us-east-1.amazonaws.com/170594410696/tax-engine-experiments-2-run-calcs-queue-" suffix))
             ["cljs"]))

(def bucket "tax-engine-experiments-2-transactionsbucket-78gg1f219mel")

(defn copy-objects []
  (let [s3 (-> (S3Client/builder) (.build))]
    (dotimes [i 1000]
      (let [object-key (format "input-%s.json" i)
            req (-> (CopyObjectRequest/builder)
                    (.sourceBucket bucket)
                    (.sourceKey "test.json")
                    (.destinationBucket bucket)
                    (.destinationKey (format "test-%s.json" i))
                    (.build))]
        (.copyObject s3 req)))))

(defn profile []
  (let [sqs (-> (SqsClient/builder) (.build))]
    (doseq [queue queues]
      (dotimes [i 2]
        (let [body (format "{\"bucket\": \"%s\", \"key\": \"test-%s.json\"}"
                           bucket
                           i)
              req (-> (SendMessageRequest/builder)
                      (.queueUrl queue)
                      (.messageBody body)
                      (.build))]
          (.start (Thread. (fn [] (.sendMessage sqs req)))))))))

Screen Shot 2021-12-28 at 1.56.04 PM.png

Well that's better than the blaster with a single file, but not the trickler with a single file. Removing the durations where there were cold starts does bring the number down a bit, but not all the way, maybe there's some kind of S3 caching going on? Anyway, I'll probably not get to the bottom of it here, so I'll just move on to trying to optimize the gets and puts. The lowest-hanging fruit seems to be combining the parsing with the line reading:

(ns tax.core
  (:require [cljs.core.async :as async :refer [<!]]
            [cljs.core.async.interop :refer-macros [<p!]]
            [clojure.string :as s]
            ["@aws-sdk/client-s3" :refer [GetObjectCommand S3Client PutObjectCommand]]
            ["fs" :as fs]
            ["readline" :as readline]

            [tax.calcs :refer [calculate]]
            [tax.metrics :as metrics :refer [emit-metric]])
  (:require-macros [cljs.core.async.macros :refer [go]]))

(def client (S3Client. #js{}))

(def output-bucket js/process.env.CALCULATIONS_BUCKET)

(defn put-object [bucket-name object-key body]
  (.send client (PutObjectCommand. #js{"Bucket" bucket-name
                                   "Key" object-key
                                   "Body" body})))

(defn get-object [bucket-name object-key]
  (.send client (GetObjectCommand. #js{"Bucket" bucket-name,
                                   "Key" object-key})))

(defn stream-to-string [^js/ReadableStream stream]
  (let [c (async/chan)
        rl (.createInterface readline #js{"input" stream
                                          "crlfDelay" js/Infinity})]
    (.on rl "line" (fn [line] (async/put! c (js/JSON.parse line))))
    (.on rl "close" (fn [] (async/close! c)))
    c))

(defn parse-object-lines [bucket-name object-key]
  (go (let [resp (<p! (get-object bucket-name object-key))
            body (.-Body resp)
            items (<! (async/into [] (stream-to-string body)))]
        items)))

(defn ->json-output [items]
  (s/join "\n" (map js/JSON.stringify items)))

(defn handler [event context callback]
  ;; only grabbing a single message at a time, so we can just get the first.
  (go (let [message-body (get-in (js->clj event) ["Records" 0 "body"])
            props (js/JSON.parse message-body)
            bucket (.-bucket props)
            key (.-key props)

            start (metrics/now)
            input-lines (<! (parse-object-lines bucket key))
            _ (emit-metric "parse-input" (- (metrics/now) start))

            calculated-items (calculate input-lines)

            start (metrics/now)
            output-string (->json-output calculated-items)
            _ (emit-metric "convert-to-output" (- (metrics/now) start))

            start (metrics/now)
            put-result (<p! (put-object output-bucket key output-string))
            _ (emit-metric "put-to-output" (- (metrics/now) start))]
        (callback nil put-result))))

Hmm, warm duration still at 470.3523, seems the joining and splitting lines didn't really make that much of a difference.

Conclusion

Using the pipeline ShadowCLJS->Webpack did seem to help init times significantly but didn't seem to do much for overall run times. Changing to the newer AWS SDK V3 didn't really seem to improve performance that much, but, then again, I'm probably using it pretty naively. I may further investigate optimizations there in later posts, but I'm feeling pretty happy with ClojureScript as a solution to cold starts. The cold start times are way better than the equivalent Clojure and even the overall warm durations were significantly better, so I feel pretty confident that it should work well for my production workloads. I've been itching to try the same workload in Rust and compare the performance, so I might take a detour and do that in my next post.