Repository: thatdot/quine Branch: main Commit: 6a744a272e04 Files: 1062 Total size: 6.6 MB Directory structure: gitextract_uxvox9yj/ ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ └── feature_request.md │ ├── PULL_REQUEST_TEMPLATE.md │ ├── actions/ │ │ └── notify-slack-on-failure/ │ │ └── action.yml │ └── workflows/ │ ├── ci.yml │ ├── copy.bara.sky │ └── copybara.yml ├── .gitignore ├── .scalafix.conf ├── .scalafmt.conf ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── api/ │ └── src/ │ ├── main/ │ │ └── scala/ │ │ └── com/ │ │ └── thatdot/ │ │ └── api/ │ │ ├── codec/ │ │ │ └── SecretCodecs.scala │ │ ├── schema/ │ │ │ └── SecretSchemas.scala │ │ └── v2/ │ │ ├── ApiErrors.scala │ │ ├── AwsCredentials.scala │ │ ├── AwsRegion.scala │ │ ├── RatesSummary.scala │ │ ├── SaslJaasConfig.scala │ │ ├── ShowShort.scala │ │ ├── SuccessEnvelope.scala │ │ ├── V2EndpointDefinitions.scala │ │ ├── YamlCodec.scala │ │ ├── codec/ │ │ │ ├── DisjointEither.scala │ │ │ └── ThirdPartyCodecs.scala │ │ ├── outputs/ │ │ │ ├── DestinationSteps.scala │ │ │ ├── Format.scala │ │ │ └── OutputFormat.scala │ │ └── schema/ │ │ ├── TapirJsonConfig.scala │ │ └── ThirdPartySchemas.scala │ └── test/ │ └── scala/ │ └── com/ │ └── thatdot/ │ ├── api/ │ │ ├── codec/ │ │ │ └── SecretCodecsSpec.scala │ │ └── v2/ │ │ ├── ApiErrorsCodecSpec.scala │ │ ├── AwsCredentialsCodecSpec.scala │ │ ├── AwsGenerators.scala │ │ ├── AwsRegionCodecSpec.scala │ │ ├── ErrorResponseGenerators.scala │ │ ├── ErrorTypeGenerators.scala │ │ ├── SaslJaasConfigCodecSpec.scala │ │ ├── SaslJaasConfigGenerators.scala │ │ ├── SaslJaasConfigLoggableSpec.scala │ │ ├── SuccessEnvelopeCodecSpec.scala │ │ └── SuccessEnvelopeGenerators.scala │ └── quine/ │ ├── JsonGenerators.scala │ ├── ScalaPrimitiveGenerators.scala │ └── TimeGenerators.scala ├── aws/ │ └── src/ │ ├── main/ │ │ └── scala/ │ │ └── com/ │ │ └── thatdot/ │ │ └── aws/ │ │ ├── model/ │ │ │ ├── AwsCredentials.scala │ │ │ └── AwsRegion.scala │ │ └── util/ │ │ └── AwsOps.scala │ └── test/ │ └── scala/ │ └── com/ │ └── thatdot/ │ └── aws/ │ └── util/ │ └── AwsOpsSpec.scala ├── build.sbt ├── data/ │ └── src/ │ ├── main/ │ │ └── scala/ │ │ └── com/ │ │ └── thatdot/ │ │ └── data/ │ │ ├── DataFoldableFrom.scala │ │ └── DataFolderTo.scala │ └── test/ │ └── scala/ │ └── com/ │ └── thatdot/ │ └── data/ │ ├── AvroDecoderTest.scala │ ├── DataFoldableFromSpec.scala │ ├── DataFolderToSpec.scala │ └── FoldableTestData.scala ├── model-converters/ │ └── src/ │ └── main/ │ └── scala/ │ └── com/ │ └── thatdot/ │ └── convert/ │ ├── Api2ToAws.scala │ ├── Api2ToModel1.scala │ ├── Api2ToOutputs2.scala │ └── Model1ToApi2.scala ├── outputs2/ │ └── src/ │ ├── main/ │ │ └── scala/ │ │ └── com/ │ │ └── thatdot/ │ │ └── outputs2/ │ │ ├── DestinationSteps.scala │ │ ├── OutputEncoder.scala │ │ ├── OutputsLoggables.scala │ │ ├── ResultDestination.scala │ │ ├── SaslJaasConfig.scala │ │ ├── Sinks.scala │ │ ├── destination/ │ │ │ ├── Drop.scala │ │ │ ├── File.scala │ │ │ ├── HttpEndpoint.scala │ │ │ ├── Kafka.scala │ │ │ ├── Kinesis.scala │ │ │ ├── ReactiveStream.scala │ │ │ ├── SNS.scala │ │ │ └── StandardOut.scala │ │ └── package.scala │ └── test/ │ └── scala/ │ └── com/ │ └── thatdot/ │ └── outputs2/ │ └── destination/ │ └── KafkaSpec.scala ├── project/ │ ├── Dependencies.scala │ ├── Docker.scala │ ├── Ecr.scala │ ├── FlatcPlugin.scala │ ├── GitVersion.scala │ ├── Packaging.scala │ ├── QuineSettings.scala │ ├── ScalaFix.scala │ ├── build.properties │ ├── dependencySchemes.sbt │ └── plugins.sbt ├── quine/ │ ├── recipes/ │ │ ├── apache_log.yaml │ │ ├── apt-detection.yaml │ │ ├── books.yaml │ │ ├── cdn.yaml │ │ ├── certstream-firehose.yaml │ │ ├── conways-gol.yaml │ │ ├── duration.yaml │ │ ├── entity-resolution.yaml │ │ ├── ethereum.yaml │ │ ├── finance.yaml │ │ ├── hpotter.yaml │ │ ├── ingest.yaml │ │ ├── kafka-ingest.yaml │ │ ├── movieData.yaml │ │ ├── pi.yaml │ │ ├── ping.yaml │ │ ├── pipe.yaml │ │ ├── planetside-2.yaml │ │ ├── quine-logs-recipe.yaml │ │ ├── sq-test.yaml │ │ ├── template-recipe.yaml │ │ ├── webhook.yaml │ │ ├── wikipedia-non-bot-revisions.yaml │ │ └── wikipedia.yaml │ └── src/ │ ├── main/ │ │ ├── resources/ │ │ │ ├── ionicons.tsv │ │ │ ├── reference.conf │ │ │ └── web/ │ │ │ ├── browserconfig.xml │ │ │ ├── quine-ui-startup.js │ │ │ ├── quine-ui.html │ │ │ └── site.webmanifest │ │ └── scala/ │ │ └── com/ │ │ └── thatdot/ │ │ └── quine/ │ │ └── app/ │ │ ├── BaseApp.scala │ │ ├── CmdArgs.scala │ │ ├── ImproveQuine.scala │ │ ├── Main.scala │ │ ├── MeteredExecutors.scala │ │ ├── Metrics.scala │ │ ├── QuineApp.scala │ │ ├── QuineAppIngestControl.scala │ │ ├── QuinePreservingCodecs.scala │ │ ├── Recipe.scala │ │ ├── RecipeInterpreter.scala │ │ ├── RecipeInterpreterV2.scala │ │ ├── RecipePackage.scala │ │ ├── RecipeV2.scala │ │ ├── SchemaCache.scala │ │ ├── StandingQueryResultOutput.scala │ │ ├── StatusLines.scala │ │ ├── config/ │ │ │ ├── Address.scala │ │ │ ├── BaseConfig.scala │ │ │ ├── EdgeIteration.scala │ │ │ ├── FileAccessPolicy.scala │ │ │ ├── FileIngestConfig.scala │ │ │ ├── IdProviderType.scala │ │ │ ├── MetricsConfig.scala │ │ │ ├── MetricsReporter.scala │ │ │ ├── PersistenceAgentType.scala │ │ │ ├── PersistenceBuilder.scala │ │ │ ├── PureconfigInstances.scala │ │ │ ├── QuineConfig.scala │ │ │ ├── QuinePersistenceBuilder.scala │ │ │ ├── WebServerConfig.scala │ │ │ └── errors/ │ │ │ └── ConfigErrorFormatter.scala │ │ ├── data/ │ │ │ ├── QuineDataFoldablesFrom.scala │ │ │ └── QuineDataFoldersTo.scala │ │ ├── migrations/ │ │ │ ├── Migration.scala │ │ │ ├── QuineMigrations.scala │ │ │ └── instances/ │ │ │ ├── MultipleValuesRewrite.scala │ │ │ └── package.scala │ │ ├── model/ │ │ │ ├── README.md │ │ │ ├── ingest/ │ │ │ │ ├── ContentDelimitedIngestSrcDef.scala │ │ │ │ ├── IngestSrcDef.scala │ │ │ │ ├── KafkaSrcDef.scala │ │ │ │ ├── KinesisKclSrcDef.scala │ │ │ │ ├── KinesisSrcDef.scala │ │ │ │ ├── NamedPipeSource.scala │ │ │ │ ├── ServerSentEventsSrcDef.scala │ │ │ │ ├── SqsStreamSrcDef.scala │ │ │ │ ├── WebsocketSimpleStartupSrcDef.scala │ │ │ │ ├── serialization/ │ │ │ │ │ ├── ContentDecoder.scala │ │ │ │ │ ├── CypherParseProtobuf.scala │ │ │ │ │ ├── CypherToProtobuf.scala │ │ │ │ │ ├── ImportFormat.scala │ │ │ │ │ └── ProtobufParser.scala │ │ │ │ └── util/ │ │ │ │ ├── AwsOps.scala │ │ │ │ └── KafkaSettingsValidator.scala │ │ │ ├── ingest2/ │ │ │ │ ├── V1IngestCodecs.scala │ │ │ │ ├── V1IngestSchemas.scala │ │ │ │ ├── V1ToV2.scala │ │ │ │ ├── V2IngestEntities.scala │ │ │ │ ├── V2IngestSources.scala │ │ │ │ ├── V2ToV1.scala │ │ │ │ ├── codec/ │ │ │ │ │ └── FrameDecoder.scala │ │ │ │ ├── source/ │ │ │ │ │ ├── DecodedSource.scala │ │ │ │ │ ├── FramedSource.scala │ │ │ │ │ ├── IngestBounds.scala │ │ │ │ │ └── QuineIngestQuery.scala │ │ │ │ └── sources/ │ │ │ │ ├── CsvFileSource.scala │ │ │ │ ├── FileSource.scala │ │ │ │ ├── FramedSourceProvider.scala │ │ │ │ ├── KafkaSource.scala │ │ │ │ ├── KinesisKclSrc.scala │ │ │ │ ├── KinesisSource.scala │ │ │ │ ├── NumberIteratorSource.scala │ │ │ │ ├── ReactiveSource.scala │ │ │ │ ├── S3Source.scala │ │ │ │ ├── ServerSentEventSource.scala │ │ │ │ ├── SqsSource.scala │ │ │ │ ├── StandardInputSource.scala │ │ │ │ ├── WebSocketClientSource.scala │ │ │ │ ├── WebSocketFileUploadSource.scala │ │ │ │ └── package.scala │ │ │ ├── outputs/ │ │ │ │ ├── ConsoleLoggingOutput.scala │ │ │ │ ├── CypherQueryOutput.scala │ │ │ │ ├── DropOutput.scala │ │ │ │ ├── FileOutput.scala │ │ │ │ ├── KafkaOutput.scala │ │ │ │ ├── KinesisOutput.scala │ │ │ │ ├── OutputRuntime.scala │ │ │ │ ├── PostToEndpointOutput.scala │ │ │ │ ├── QuinePatternOutput.scala │ │ │ │ ├── SlackOutput.scala │ │ │ │ └── SnsOutput.scala │ │ │ ├── outputs2/ │ │ │ │ ├── QuineDestinationSteps.scala │ │ │ │ ├── QuineResultDestination.scala │ │ │ │ ├── destination/ │ │ │ │ │ ├── CypherQueryDestination.scala │ │ │ │ │ └── Slack.scala │ │ │ │ ├── package.scala │ │ │ │ └── query/ │ │ │ │ ├── CypherQuery.scala │ │ │ │ └── standing/ │ │ │ │ ├── Predicate.scala │ │ │ │ ├── StandingQuery.scala │ │ │ │ ├── StandingQueryPattern.scala │ │ │ │ ├── StandingQueryResultTransformation.scala │ │ │ │ ├── StandingQueryResultWorkflow.scala │ │ │ │ ├── StandingQueryStats.scala │ │ │ │ └── package.scala │ │ │ └── transformation/ │ │ │ └── polyglot/ │ │ │ ├── Polyglot.scala │ │ │ ├── PolyglotValueDataFoldableFrom.scala │ │ │ ├── PolyglotValueDataFolderTo.scala │ │ │ ├── Transformation.scala │ │ │ └── langauges/ │ │ │ └── QuineJavaScript.scala │ │ ├── routes/ │ │ │ ├── AdministrationRoutesImpl.scala │ │ │ ├── AlgorithmRoutesImpl.scala │ │ │ ├── BaseAppRoutes.scala │ │ │ ├── DebugRoutesImpl.scala │ │ │ ├── HealthAppRoutes.scala │ │ │ ├── IngestApiMethods.scala │ │ │ ├── IngestMeter.scala │ │ │ ├── IngestRoutesImpl.scala │ │ │ ├── IngestStreamState.scala │ │ │ ├── IngestStreamWithControl.scala │ │ │ ├── QueryUiConfigurationRoutesImpl.scala │ │ │ ├── QueryUiConfigurationState.scala │ │ │ ├── QueryUiCypherApiMethods.scala │ │ │ ├── QueryUiRoutesImpl.scala │ │ │ ├── QuineAppOpenApiDocs.scala │ │ │ ├── QuineAppRoutes.scala │ │ │ ├── StandingQueryInterfaceV2.scala │ │ │ ├── StandingQueryRoutesV1Impl.scala │ │ │ ├── StandingQueryStoreV1.scala │ │ │ ├── Util.scala │ │ │ ├── WebSocketQueryProtocolServer.scala │ │ │ ├── exts/ │ │ │ │ ├── PekkoQuineEndpoints.scala │ │ │ │ ├── ServerEntitiesWithExamples.scala │ │ │ │ ├── ServerQuineEndpoints.scala │ │ │ │ ├── ServerRequestTimeoutOps.scala │ │ │ │ └── circe/ │ │ │ │ └── JsonEntitiesFromSchemas.scala │ │ │ └── websocketquinepattern/ │ │ │ ├── LSPActor.scala │ │ │ └── WebSocketQuinePatternServer.scala │ │ ├── util/ │ │ │ ├── AtLeastOnceCypherQuery.scala │ │ │ ├── OpenApiRenderer.scala │ │ │ ├── QuineLoggables.scala │ │ │ └── StringOps.scala │ │ └── v2api/ │ │ ├── OssApiMethods.scala │ │ ├── QuineOssV2OpenApiDocs.scala │ │ ├── V2ApiInfo.scala │ │ ├── V2OssRoutes.scala │ │ ├── converters/ │ │ │ ├── Api2ToOutputs2.scala │ │ │ ├── ApiToIngest.scala │ │ │ ├── ApiToStanding.scala │ │ │ ├── ApiToUiStyling.scala │ │ │ ├── IngestToApi.scala │ │ │ ├── UiStylingToApi.scala │ │ │ └── package.scala │ │ ├── definitions/ │ │ │ ├── AlgorithmApiMethods.scala │ │ │ ├── ApiCommand.scala │ │ │ ├── ApiUiStyling.scala │ │ │ ├── CommonParameters.scala │ │ │ ├── CypherApiMethods.scala │ │ │ ├── DebugApiMethods.scala │ │ │ ├── ParallelismParameter.scala │ │ │ ├── QueryEffects.scala │ │ │ ├── QuineApiMethods.scala │ │ │ ├── QuineIdCodec.scala │ │ │ ├── QuineIdSchemas.scala │ │ │ ├── TapirDecodeErrorHandler.scala │ │ │ ├── TapirRoutes.scala │ │ │ ├── V2QueryExecutor.scala │ │ │ ├── V2QueryWebSocketFlow.scala │ │ │ ├── V2QuineEndpointDefinitions.scala │ │ │ ├── ingest2/ │ │ │ │ ├── ApiIngest.scala │ │ │ │ └── DeadLetterQueueOutput.scala │ │ │ ├── outputs/ │ │ │ │ └── QuineDestinationSteps.scala │ │ │ └── query/ │ │ │ └── standing/ │ │ │ ├── Predicate.scala │ │ │ ├── StandingQuery.scala │ │ │ ├── StandingQueryOutputStructure.scala │ │ │ ├── StandingQueryPattern.scala │ │ │ ├── StandingQueryResultTransformation.scala │ │ │ ├── StandingQueryResultWorkflow.scala │ │ │ └── StandingQueryStats.scala │ │ └── endpoints/ │ │ ├── V2AlgorithmEndpoints.scala │ │ ├── V2CypherEndpoints.scala │ │ ├── V2DebugEndpoints.scala │ │ ├── V2IngestEndpoints.scala │ │ ├── V2QueryWebSocketEndpoints.scala │ │ ├── V2QuineAdministrationEndpoints.scala │ │ ├── V2StandingEndpoints.scala │ │ ├── V2UiStylingEndpoints.scala │ │ └── Visibility.scala │ └── test/ │ ├── resources/ │ │ ├── addressbook.desc │ │ ├── addressbook.proto │ │ ├── application.conf │ │ ├── documented_cassandra_config.conf │ │ ├── documented_config.conf │ │ ├── ingest_test_script/ │ │ │ ├── README.md │ │ │ ├── ingest_test.py │ │ │ └── requirements.txt │ │ ├── multi_file_proto_test/ │ │ │ ├── README.md │ │ │ ├── data/ │ │ │ │ ├── encode_examples.sh │ │ │ │ ├── example_anyzone.binpb │ │ │ │ ├── example_anyzone.txtpb │ │ │ │ ├── example_zone_0.binpb │ │ │ │ ├── example_zone_0.txtpb │ │ │ │ ├── example_zone_1.binpb │ │ │ │ ├── example_zone_1.txtpb │ │ │ │ ├── example_zone_2.binpb │ │ │ │ ├── example_zone_2.txtpb │ │ │ │ ├── example_zone_3.binpb │ │ │ │ └── example_zone_3.txtpb │ │ │ └── schema/ │ │ │ ├── argus.proto │ │ │ ├── azeroth.proto │ │ │ ├── compile_schema.sh │ │ │ ├── warcraft.desc │ │ │ └── zone_rework.proto │ │ ├── protobuf_test.binpb │ │ ├── recipes/ │ │ │ ├── full.json │ │ │ └── full.yaml │ │ ├── trivial.cypher │ │ └── yaml/ │ │ ├── invalid.yaml │ │ ├── wikipedia-example.json │ │ └── wikipedia-example.yaml │ └── scala/ │ └── com/ │ └── thatdot/ │ └── quine/ │ ├── CirceCodecTestSupport.scala │ ├── app/ │ │ ├── CmdArgsTest.scala │ │ ├── ImproveQuineCodecSpec.scala │ │ ├── ImproveQuineGenerators.scala │ │ ├── QuineAppCodecSpec.scala │ │ ├── QuineAppGenerators.scala │ │ ├── QuineAppTelemetryTest.scala │ │ ├── RecipeTest.scala │ │ ├── RecipeV2Test.scala │ │ ├── config/ │ │ │ ├── ClickHouseSecurityTest.scala │ │ │ ├── ConfigGenerators.scala │ │ │ ├── ConfigRoundTripSpec.scala │ │ │ ├── QuineConfigTest.scala │ │ │ ├── WebServerConfigTest.scala │ │ │ └── errors/ │ │ │ └── ConfigErrorFormatterSpec.scala │ │ ├── data/ │ │ │ ├── QuineDataFoldablesFromSpec.scala │ │ │ └── QuineDataFoldersToSpec.scala │ │ ├── ingest/ │ │ │ ├── DelimitedIngestSrcDefTest.scala │ │ │ ├── KafkaSettingsValidatorTest.scala │ │ │ ├── RawValuesIngestSrcDefTest.scala │ │ │ ├── WritableInputStream.scala │ │ │ └── serialization/ │ │ │ ├── ContentDecoderTest.scala │ │ │ ├── CypherProtobufConversionsTest.scala │ │ │ ├── ImportFormatTest.scala │ │ │ └── ProtobufTest.scala │ │ ├── model/ │ │ │ ├── ingest/ │ │ │ │ └── util/ │ │ │ │ └── AwsOpsSpec.scala │ │ │ └── ingest2/ │ │ │ └── sources/ │ │ │ └── KafkaSourceSpec.scala │ │ ├── routes/ │ │ │ ├── QueryUiCypherApiMethodsQuinePatternEnabledSpec.scala │ │ │ ├── RouteHardeningOpsSpec.scala │ │ │ └── websocketquinepattern/ │ │ │ ├── JsonRpcNotification.scala │ │ │ ├── JsonRpcRequest.scala │ │ │ ├── JsonRpcResponse.scala │ │ │ └── WebSocketQuinePatternServerTest.scala │ │ └── v2api/ │ │ └── definitions/ │ │ ├── ingest2/ │ │ │ ├── KafkaDlqSecretParamsSpec.scala │ │ │ └── KafkaIngestSecretParamsSpec.scala │ │ └── outputs/ │ │ └── KafkaDestinationSecretParamsSpec.scala │ ├── convert/ │ │ └── Api2ToOutputs2KafkaSpec.scala │ ├── graph/ │ │ ├── FakeQuineGraph.scala │ │ └── StandingQueryTest.scala │ ├── ingest2/ │ │ ├── IngestCodecSpec.scala │ │ ├── IngestGenerators.scala │ │ ├── IngestSourceTestSupport.scala │ │ ├── V2IngestEntitiesCodecSpec.scala │ │ ├── V2IngestEntitiesGenerators.scala │ │ ├── V2IngestEntitiesPreservingCodecSpec.scala │ │ ├── codec/ │ │ │ └── FrameDecoderSpec.scala │ │ ├── source/ │ │ │ └── DecodedSourceSpec.scala │ │ ├── sources/ │ │ │ ├── DelimitedSourcesSpec.scala │ │ │ ├── FileLikeSourcesSpec.scala │ │ │ ├── FramedSourceSpec.scala │ │ │ └── KafkaFoldableSpec.scala │ │ └── transformation/ │ │ ├── DataFoldableSpec.scala │ │ ├── FoldableArbitraryHelpers.scala │ │ └── QuineJavaScriptSpec.scala │ ├── outputs/ │ │ ├── StandingQueryOutputCodecSpec.scala │ │ └── StandingQueryOutputGenerators.scala │ ├── routes/ │ │ ├── PostToEndpointSecretParamsSpec.scala │ │ └── WriteToKafkaSecretParamsSpec.scala │ └── v2api/ │ ├── ApiUiStylingCodecSpec.scala │ ├── ApiUiStylingGenerators.scala │ ├── EndpointValidationSpec.scala │ ├── V2AlgorithmEndpointCodecSpec.scala │ ├── V2AlgorithmEndpointGenerators.scala │ ├── V2ApiCommonGenerators.scala │ ├── V2CypherCodecSpec.scala │ ├── V2CypherEndpointCodecSpec.scala │ ├── V2CypherEndpointGenerators.scala │ ├── V2DebugEndpointCodecSpec.scala │ ├── V2DebugEndpointGenerators.scala │ ├── V2IngestEndpointCodecSpec.scala │ ├── V2IngestEndpointGenerators.scala │ ├── V2QueryWebSocketFlowSpec.scala │ ├── V2QuineAdministrationEndpointCodecSpec.scala │ ├── V2QuineAdministrationEndpointGenerators.scala │ ├── V2StandingEndpointCodecSpec.scala │ └── V2StandingEndpointGenerators.scala ├── quine-browser/ │ ├── common.webpack.config.js │ ├── dev/ │ │ ├── .gitignore │ │ ├── README.md │ │ ├── index.html │ │ ├── package.json │ │ ├── startup.js │ │ ├── tsconfig.json │ │ └── vite.config.ts │ ├── dev.webpack.config.js │ ├── prod.webpack.config.js │ ├── src/ │ │ └── main/ │ │ ├── resources/ │ │ │ └── index.css │ │ └── scala/ │ │ └── com/ │ │ └── thatdot/ │ │ └── quine/ │ │ ├── Util.scala │ │ ├── routes/ │ │ │ ├── ClientRoutes.scala │ │ │ ├── V2WebSocketQueryClient.scala │ │ │ ├── WebSocketQueryClient.scala │ │ │ └── exts/ │ │ │ └── ClientQuineEndpoints.scala │ │ └── webapp/ │ │ ├── History.scala │ │ ├── LaminarRoot.scala │ │ ├── QuineInteractiveTS/ │ │ │ ├── _components/ │ │ │ │ ├── ConfigurationPortal.tsx │ │ │ │ ├── IngestPortal.tsx │ │ │ │ ├── QueryOutputPortal.tsx │ │ │ │ ├── StandingQueryPortal.tsx │ │ │ │ ├── _componentStyles.ts │ │ │ │ └── index.ts │ │ │ ├── _hooks/ │ │ │ │ └── useInterval.ts │ │ │ ├── _services/ │ │ │ │ ├── adminService.ts │ │ │ │ ├── index.ts │ │ │ │ ├── ingestStreamService.ts │ │ │ │ └── standingQueryService.ts │ │ │ ├── _utils/ │ │ │ │ └── api.ts │ │ │ ├── index.tsx │ │ │ └── react.d.tsx │ │ ├── QuineOssNavItems.scala │ │ ├── Styles.scala │ │ ├── Sugar.scala │ │ ├── components/ │ │ │ ├── BoxPlot.scala │ │ │ ├── ContextMenu.scala │ │ │ ├── CypherResultsTable.scala │ │ │ ├── HybridViewsRenderer.scala │ │ │ ├── Loader.scala │ │ │ ├── ManualHistogramPlot.scala │ │ │ ├── PlotOrientation.scala │ │ │ ├── Plotly.scala │ │ │ ├── PlotlyFacade.scala │ │ │ ├── RenderStrategy.scala │ │ │ ├── StoplightElements.scala │ │ │ ├── SunburstPlot.scala │ │ │ ├── ToolbarButton.scala │ │ │ ├── VisNetwork.scala │ │ │ ├── dashboard/ │ │ │ │ ├── Card.scala │ │ │ │ ├── CounterSummaryCard.scala │ │ │ │ ├── MetricsDashboard.scala │ │ │ │ ├── MetricsDashboardRenderer.scala │ │ │ │ ├── ProgressBarMeter.scala │ │ │ │ ├── ShardInfoCard.scala │ │ │ │ └── TimerSummaryCard.scala │ │ │ └── sidebar/ │ │ │ ├── CoreUISidebar.scala │ │ │ ├── NavItem.scala │ │ │ └── NavTitle.scala │ │ ├── package.scala │ │ ├── queryui/ │ │ │ ├── Counters.scala │ │ │ ├── Event.scala │ │ │ ├── GraphVisualization.scala │ │ │ ├── HistoryNavigationButtons.scala │ │ │ ├── MessageBar.scala │ │ │ ├── PinTracker.scala │ │ │ ├── QueryTypes.scala │ │ │ ├── QueryUi.scala │ │ │ ├── SvgSnapshot.scala │ │ │ ├── TopBar.scala │ │ │ └── VisNetworkVisualization.scala │ │ ├── router/ │ │ │ ├── AppRouter.scala │ │ │ ├── QuineOssPage.scala │ │ │ ├── QuineOssRouter.scala │ │ │ └── QuineOssRoutes.scala │ │ ├── util/ │ │ │ └── LaminarUtils.scala │ │ └── views/ │ │ ├── DocsV1View.scala │ │ ├── DocsV2View.scala │ │ ├── ExplorationUiView.scala │ │ ├── MetricsView.scala │ │ └── QuineOssViews.scala │ └── tsconfig.json ├── quine-cassandra-persistor/ │ └── src/ │ ├── main/ │ │ ├── boilerplate/ │ │ │ └── com/ │ │ │ └── thatdot/ │ │ │ └── quine/ │ │ │ └── util/ │ │ │ └── TN.scala.template │ │ └── scala/ │ │ └── com/ │ │ └── thatdot/ │ │ └── quine/ │ │ └── persistor/ │ │ └── cassandra/ │ │ ├── CassandraPersistor.scala │ │ ├── DomainGraphNodes.scala │ │ ├── DomainIndexEvents.scala │ │ ├── Journals.scala │ │ ├── MetaData.scala │ │ ├── PrimeCassandraPersistor.scala │ │ ├── QuinePatterns.scala │ │ ├── Snapshots.scala │ │ ├── StandingQueries.scala │ │ ├── StandingQueryStates.scala │ │ ├── aws/ │ │ │ ├── Journals.scala │ │ │ ├── KeyspacesPersistor.scala │ │ │ └── Snapshots.scala │ │ ├── support/ │ │ │ ├── CassandraCodecs.scala │ │ │ ├── CassandraColumn.scala │ │ │ ├── CassandraStatementSettings.scala │ │ │ ├── CassandraTable.scala │ │ │ ├── TableDefinition.scala │ │ │ └── syntax.scala │ │ └── vanilla/ │ │ ├── CassandraPersistor.scala │ │ ├── Journals.scala │ │ └── Snapshots.scala │ └── test/ │ └── scala/ │ └── com/ │ └── thatdot/ │ └── quine/ │ └── persistor/ │ ├── CassandraPersistorSpec.scala │ └── KeyspacesPersistorSpec.scala ├── quine-core/ │ └── src/ │ ├── main/ │ │ ├── fbs/ │ │ │ ├── base.fbs │ │ │ ├── cypher.fbs │ │ │ ├── domainindexevent.fbs │ │ │ ├── journal.fbs │ │ │ ├── snapshot.fbs │ │ │ ├── standingquery.fbs │ │ │ └── standingquerystates.fbs │ │ ├── resources/ │ │ │ ├── logback.xml │ │ │ ├── quine-pekko-overrides.conf │ │ │ └── reference.conf │ │ ├── scala/ │ │ │ └── com/ │ │ │ └── thatdot/ │ │ │ └── quine/ │ │ │ ├── exceptions/ │ │ │ │ ├── DuplicateIngestException.scala │ │ │ │ ├── FileIngestSecurityException.scala │ │ │ │ ├── JavaScriptException.scala │ │ │ │ ├── KafkaValidationException.scala │ │ │ │ ├── KinesisConfigurationError.scala │ │ │ │ ├── NamespaceNotFoundException.scala │ │ │ │ └── ShardIterationException.scala │ │ │ ├── graph/ │ │ │ │ ├── AbstractNodeActor.scala │ │ │ │ ├── AlgorithmGraph.scala │ │ │ │ ├── ApiShutdownReason.scala │ │ │ │ ├── BaseGraph.scala │ │ │ │ ├── BaseNodeActor.scala │ │ │ │ ├── BaseNodeActorView.scala │ │ │ │ ├── CypherOpsGraph.scala │ │ │ │ ├── DomainGraphNodeRegistry.scala │ │ │ │ ├── EventTime.scala │ │ │ │ ├── Expiration.scala │ │ │ │ ├── GraphNodeHashCode.scala │ │ │ │ ├── GraphNotReadyException.scala │ │ │ │ ├── GraphQueryPattern.scala │ │ │ │ ├── GraphService.scala │ │ │ │ ├── GraphShardActor.scala │ │ │ │ ├── IllegalHistoricalUpdate.scala │ │ │ │ ├── LiteralOpsGraph.scala │ │ │ │ ├── MasterStream.scala │ │ │ │ ├── NamespaceSqIndex.scala │ │ │ │ ├── NodeActor.scala │ │ │ │ ├── NodeAndShardSupervisorStrategy.scala │ │ │ │ ├── NodeEvent.scala │ │ │ │ ├── NodeSnapshot.scala │ │ │ │ ├── NodeWakeupFailedException.scala │ │ │ │ ├── QuineIdProviders.scala │ │ │ │ ├── QuineRuntimeFutureException.scala │ │ │ │ ├── StandingQueryId.scala │ │ │ │ ├── StandingQueryInfo.scala │ │ │ │ ├── StandingQueryOpsGraph.scala │ │ │ │ ├── StandingQueryResult.scala │ │ │ │ ├── StaticNodeActorSupport.scala │ │ │ │ ├── StaticNodeSupport.scala │ │ │ │ ├── StaticShardGraph.scala │ │ │ │ ├── WatchableEventType.scala │ │ │ │ ├── behavior/ │ │ │ │ │ ├── ActorClock.scala │ │ │ │ │ ├── AlgorithmBehavior.scala │ │ │ │ │ ├── CypherBehavior.scala │ │ │ │ │ ├── DomainNodeIndexBehavior.scala │ │ │ │ │ ├── DomainNodeTests.scala │ │ │ │ │ ├── GoToSleepBehavior.scala │ │ │ │ │ ├── LiteralCommandBehavior.scala │ │ │ │ │ ├── MultipleValuesStandingQueryBehavior.scala │ │ │ │ │ ├── PriorityStashingBehavior.scala │ │ │ │ │ ├── QuinePatternQueryBehavior.scala │ │ │ │ │ └── StandingQueryBehavior.scala │ │ │ │ ├── cypher/ │ │ │ │ │ ├── AggregationFunc.scala │ │ │ │ │ ├── CompiledExpr.scala │ │ │ │ │ ├── CompiledQuery.scala │ │ │ │ │ ├── Exception.scala │ │ │ │ │ ├── Expr.scala │ │ │ │ │ ├── Func.scala │ │ │ │ │ ├── Interpreter.scala │ │ │ │ │ ├── MultipleValuesResultsReporter.scala │ │ │ │ │ ├── MultipleValuesStandingQuery.scala │ │ │ │ │ ├── MultipleValuesStandingQueryState.scala │ │ │ │ │ ├── Proc.scala │ │ │ │ │ ├── ProcedureExecutionLocation.scala │ │ │ │ │ ├── Query.scala │ │ │ │ │ ├── QueryContext.scala │ │ │ │ │ ├── RunningCypherQuery.scala │ │ │ │ │ ├── SkipOptimizingActor.scala │ │ │ │ │ ├── Type.scala │ │ │ │ │ ├── UserDefinedFunction.scala │ │ │ │ │ ├── UserDefinedProcedure.scala │ │ │ │ │ ├── VisitedVariableEdgeMatches.scala │ │ │ │ │ └── quinepattern/ │ │ │ │ │ ├── QueryPlan.scala │ │ │ │ │ ├── QueryPlanner.scala │ │ │ │ │ ├── QueryStateBuilder.scala │ │ │ │ │ ├── QueryStateHost.scala │ │ │ │ │ ├── QuinePattern.scala │ │ │ │ │ ├── QuinePatternExpressionInterpreter.scala │ │ │ │ │ ├── QuinePatternFunction.scala │ │ │ │ │ ├── QuinePatternHelpers.scala │ │ │ │ │ └── procedures/ │ │ │ │ │ ├── GetFilteredEdgesProcedure.scala │ │ │ │ │ ├── HelpBuiltinsProcedure.scala │ │ │ │ │ ├── QuinePatternProcedure.scala │ │ │ │ │ └── RecentNodesProcedure.scala │ │ │ │ ├── edges/ │ │ │ │ │ ├── AbstractEdgeCollectionView.scala │ │ │ │ │ ├── EdgeCollection.scala │ │ │ │ │ ├── EdgeCollectionView.scala │ │ │ │ │ ├── EdgeIndex.scala │ │ │ │ │ ├── EdgeProcessor.scala │ │ │ │ │ ├── MemoryFirstEdgeProcessor.scala │ │ │ │ │ ├── PersistorFirstEdgeProcessor.scala │ │ │ │ │ ├── ReverseOrderedEdgeCollection.scala │ │ │ │ │ └── UnorderedEdgeCollection.scala │ │ │ │ ├── messaging/ │ │ │ │ │ ├── AlgorithmMessage.scala │ │ │ │ │ ├── BaseMessage.scala │ │ │ │ │ ├── CypherMessage.scala │ │ │ │ │ ├── ExactlyOnceAskActor.scala │ │ │ │ │ ├── ExactlyOnceAskNodeActor.scala │ │ │ │ │ ├── ExactlyOnceTimeoutException.scala │ │ │ │ │ ├── GiveUpWaiting.scala │ │ │ │ │ ├── LiteralMessage.scala │ │ │ │ │ ├── LocalShardRef.scala │ │ │ │ │ ├── NodeActorMailbox.scala │ │ │ │ │ ├── QuineIdOps.scala │ │ │ │ │ ├── QuineMessage.scala │ │ │ │ │ ├── QuineRef.scala │ │ │ │ │ ├── QuineRefOps.scala │ │ │ │ │ ├── QuineResponse.scala │ │ │ │ │ ├── ResultHandler.scala │ │ │ │ │ ├── ShardActorMailbox.scala │ │ │ │ │ ├── ShardMessage.scala │ │ │ │ │ ├── ShardRef.scala │ │ │ │ │ └── StandingQueryMessage.scala │ │ │ │ ├── metrics/ │ │ │ │ │ ├── BinaryHistogramCounter.scala │ │ │ │ │ ├── HostQuineMetrics.scala │ │ │ │ │ └── implicits.scala │ │ │ │ ├── package.scala │ │ │ │ └── quinepattern/ │ │ │ │ ├── NonNodeActor.scala │ │ │ │ ├── QuinePatternLoader.scala │ │ │ │ ├── QuinePatternOpsGraph.scala │ │ │ │ └── QuinePatternRegistry.scala │ │ │ ├── migrations/ │ │ │ │ ├── MigrationError.scala │ │ │ │ └── MigrationVersion.scala │ │ │ ├── model/ │ │ │ │ ├── DGBOps.scala │ │ │ │ ├── DomainGraphBranch.scala │ │ │ │ ├── DomainGraphNode.scala │ │ │ │ ├── DomainNodeEquiv.scala │ │ │ │ ├── EdgeDirection.scala │ │ │ │ ├── HalfEdge.scala │ │ │ │ ├── Milliseconds.scala │ │ │ │ ├── NodeComponents.scala │ │ │ │ ├── PropertyValue.scala │ │ │ │ ├── QuineIdProvider.scala │ │ │ │ ├── QuineValue.scala │ │ │ │ └── package.scala │ │ │ ├── persistor/ │ │ │ │ ├── BinaryFormat.scala │ │ │ │ ├── BloomFilteredPersistor.scala │ │ │ │ ├── EmptyPersistor.scala │ │ │ │ ├── ExceptionWrappingPersistenceAgent.scala │ │ │ │ ├── InMemoryPersistor.scala │ │ │ │ ├── IncompatibleVersion.scala │ │ │ │ ├── PackedFlatBufferBinaryFormat.scala │ │ │ │ ├── PartitionedPersistenceAgent.scala │ │ │ │ ├── PersistenceAgent.scala │ │ │ │ ├── PersistenceConfig.scala │ │ │ │ ├── PrimePersistor.scala │ │ │ │ ├── ShardedPersistor.scala │ │ │ │ ├── StatelessPrimePersistor.scala │ │ │ │ ├── UnifiedPrimePersistor.scala │ │ │ │ ├── Version.scala │ │ │ │ ├── WrappedPersistenceAgent.scala │ │ │ │ └── codecs/ │ │ │ │ ├── DomainGraphNodeCodec.scala │ │ │ │ ├── DomainIndexEventCodec.scala │ │ │ │ ├── MultipleValuesStandingQueryStateCodec.scala │ │ │ │ ├── NodeChangeEventCodec.scala │ │ │ │ ├── PersistenceCodec.scala │ │ │ │ ├── QueryPlanCodec.scala │ │ │ │ ├── QuineValueCodec.scala │ │ │ │ ├── SnapshotCodec.scala │ │ │ │ └── StandingQueryCodec.scala │ │ │ └── util/ │ │ │ ├── BaseError.scala │ │ │ ├── Config.scala │ │ │ ├── DeduplicationCache.scala │ │ │ ├── ExpiringLruSet.scala │ │ │ ├── Extractors.scala │ │ │ ├── FromSingleExecutionContext.scala │ │ │ ├── Funnels.scala │ │ │ ├── FutureHelpers.scala │ │ │ ├── FutureResult.scala │ │ │ ├── GraphWithContextExt.scala │ │ │ ├── Hashing.scala │ │ │ ├── InterpM.scala │ │ │ ├── Loggable.scala │ │ │ ├── LoopbackPort.scala │ │ │ ├── MonadHelpers.scala │ │ │ ├── Packing.scala │ │ │ ├── PekkoStreams.scala │ │ │ ├── Pretty.scala │ │ │ ├── ProgressCounter.scala │ │ │ ├── QuineDispatchers.scala │ │ │ ├── Retry.scala │ │ │ ├── ReverseIterator.scala │ │ │ ├── ReversibleLinkedHashSet.scala │ │ │ ├── StringInput.scala │ │ │ ├── StrongUUID.scala │ │ │ ├── Tls.scala │ │ │ ├── Valve.scala │ │ │ └── ValveFlow.scala │ │ └── scala-2.13/ │ │ └── scala/ │ │ └── compat/ │ │ └── CompatBuildFrom.scala │ └── test/ │ ├── resources/ │ │ ├── application.conf │ │ └── logback-test.xml │ └── scala/ │ └── com/ │ └── thatdot/ │ └── quine/ │ ├── graph/ │ │ ├── ArbitraryInstances.scala │ │ ├── DomainGraphNodeRegistryTest.scala │ │ ├── EventTimeTest.scala │ │ ├── GraphNodeHashCodeTest.scala │ │ ├── GraphQueryPatternTest.scala │ │ ├── HalfEdgeGen.scala │ │ ├── HistoricalQueryTests.scala │ │ ├── ScalaTestInstances.scala │ │ ├── SerializationTests.scala │ │ ├── StandingQueryResultTest.scala │ │ ├── TestDataFactory.scala │ │ ├── cypher/ │ │ │ ├── MultipleValuesResultsReporterTest.scala │ │ │ └── quinepattern/ │ │ │ ├── OptionalStateCorrectnessTest.scala │ │ │ ├── PR3981BugRegressionTest.scala │ │ │ ├── PropertyAccessTest.scala │ │ │ ├── QueryPlanRuntimeTest.scala │ │ │ ├── QueryPlannerTest.scala │ │ │ ├── StateInstallationTest.scala │ │ │ └── TestPropertyAccess.scala │ │ ├── edges/ │ │ │ ├── EdgeCollectionTests.scala │ │ │ ├── ReverseOrderedEdgeCollectionTests.scala │ │ │ ├── SyncEdgeCollectionTests.scala │ │ │ └── UnorderedEdgeCollectionTests.scala │ │ └── standing/ │ │ ├── AllPropertiesState.scala │ │ ├── CrossStateTests.scala │ │ ├── EdgeSubscriptionReciprocalStateTests.scala │ │ ├── FilterMapStateTests.scala │ │ ├── LabelsStateTests.scala │ │ ├── LocalIdStateTests.scala │ │ ├── LocalPropertyStateTests.scala │ │ ├── StandingQueryStateHarness.scala │ │ ├── SubscribeAcrossEdgeStateTests.scala │ │ └── UnitSqStateTests.scala │ ├── model/ │ │ ├── DomainGraphBranchTest.scala │ │ └── DomainGraphNodeTest.scala │ ├── persistor/ │ │ ├── InMemoryPersistorSpec.scala │ │ ├── InvariantWrapper.scala │ │ └── PersistenceAgentSpec.scala │ ├── test/ │ │ ├── tagobjects/ │ │ │ └── IntegrationTest.scala │ │ └── tags/ │ │ └── IntegrationTest.java │ └── util/ │ ├── HexConversionsTest.scala │ ├── LoggableTest.scala │ ├── PackingTests.scala │ ├── PrettyTests.scala │ ├── SizeAndTimeBoundedCacheTest.scala │ ├── StrongUUIDTest.scala │ └── TestLogging.scala ├── quine-cypher/ │ └── src/ │ ├── main/ │ │ ├── java/ │ │ │ └── com/ │ │ │ └── thatdot/ │ │ │ └── quine/ │ │ │ └── graph/ │ │ │ └── cypher/ │ │ │ ├── CypherUDF.java │ │ │ └── CypherUDP.java │ │ └── scala/ │ │ └── com/ │ │ └── thatdot/ │ │ └── quine/ │ │ ├── bolt/ │ │ │ ├── Protocol.scala │ │ │ ├── Serialization.scala │ │ │ └── Structure.scala │ │ ├── compiler/ │ │ │ └── cypher/ │ │ │ ├── CompM.scala │ │ │ ├── Expression.scala │ │ │ ├── Functions.scala │ │ │ ├── GetFilteredEdges.scala │ │ │ ├── Graph.scala │ │ │ ├── ParametersIndex.scala │ │ │ ├── Plan.scala │ │ │ ├── Procedures.scala │ │ │ ├── QueryPart.scala │ │ │ ├── QueryScopeInfo.scala │ │ │ ├── ReifyTime.scala │ │ │ ├── StandingQueryPatterns.scala │ │ │ ├── UncompiledQueryIdentity.scala │ │ │ ├── Variables.scala │ │ │ ├── WithFreeVariables.scala │ │ │ ├── WithQuery.scala │ │ │ └── package.scala │ │ └── utils/ │ │ ├── CypherLoggables.scala │ │ └── MonadVia.scala │ └── test/ │ ├── resources/ │ │ └── application.conf │ └── scala/ │ └── com/ │ └── thatdot/ │ └── quine/ │ ├── Bolt.scala │ ├── BoltSerializations.scala │ ├── CypherAggregations.scala │ ├── CypherEquality.scala │ ├── CypherErrors.scala │ ├── CypherExpressions.scala │ ├── CypherFunctions.scala │ ├── CypherLists.scala │ ├── CypherMatch.scala │ ├── CypherMatchPerformance.scala │ ├── CypherMatrix.scala │ ├── CypherMerge.scala │ ├── CypherRecursiveSubQuery.scala │ ├── CypherReturn.scala │ ├── CypherShortestPath.scala │ ├── CypherStrings.scala │ ├── CypherSubQueries.scala │ ├── CypherTemporal.scala │ ├── QueryStaticTest.scala │ ├── SkipOptimizationsTest.scala │ ├── VariableLengthRelationshipPattern.scala │ └── compiler/ │ └── cypher/ │ ├── CypherComplete.scala │ ├── CypherHarness.scala │ ├── CypherMutate.scala │ ├── HistoricalQueryTests.scala │ ├── OrderedEdgesTest.scala │ ├── SkipUninterestingNodesTest.scala │ └── StandingQueryPatternsTest.scala ├── quine-docs/ │ └── src/ │ ├── main/ │ │ └── scala/ │ │ └── com/ │ │ └── thatdot/ │ │ └── quine/ │ │ └── docs/ │ │ ├── GenerateCypherTables.scala │ │ ├── GenerateOpenApi.scala │ │ ├── GenerateOpenApiV2.scala │ │ └── GenerateRecipeDirectory.scala │ └── test/ │ └── scala/ │ └── com/ │ └── thatdot/ │ └── quine/ │ └── docs/ │ ├── GenerateOpenApiTest.scala │ └── GenerateOpenApiTestV2.scala ├── quine-endpoints/ │ └── src/ │ ├── main/ │ │ └── scala/ │ │ └── com/ │ │ └── thatdot/ │ │ └── quine/ │ │ ├── routes/ │ │ │ ├── AdministrationRoutes.scala │ │ │ ├── AlgorithmRoutes.scala │ │ │ ├── DebugOpsRoutes.scala │ │ │ ├── IngestRoutes.scala │ │ │ ├── QueryProtocol.scala │ │ │ ├── QueryUiConfigurationRoutes.scala │ │ │ ├── QueryUiRoutes.scala │ │ │ ├── QuickQuery.scala │ │ │ ├── StandingQueryRoutes.scala │ │ │ └── exts/ │ │ │ ├── AnySchema.scala │ │ │ ├── EndpointsWithCustomErrorText.scala │ │ │ ├── EntitiesWithExamples.scala │ │ │ └── QuineEndpoints.scala │ │ └── v2api/ │ │ └── routes/ │ │ ├── V2MetricsRoutes.scala │ │ ├── V2QueryUiConfigurationRoutes.scala │ │ └── V2QueryUiRoutes.scala │ └── test/ │ └── scala/ │ └── com/ │ └── thatdot/ │ └── quine/ │ └── routes/ │ └── AwsSchemaSpec.scala ├── quine-endpoints2/ │ └── src/ │ └── main/ │ └── scala/ │ └── com/ │ └── thatdot/ │ └── api/ │ └── v2/ │ ├── QueryWebSocketProtocol.scala │ ├── TapirCirceUnifiedConfig.scala │ └── TypeDiscriminatorConfig.scala ├── quine-gremlin/ │ └── src/ │ ├── main/ │ │ └── scala/ │ │ └── com/ │ │ └── thatdot/ │ │ └── quine/ │ │ └── gremlin/ │ │ ├── Exceptions.scala │ │ ├── GremlinLexer.scala │ │ ├── GremlinParser.scala │ │ ├── GremlinQueryRunner.scala │ │ ├── GremlinTypes.scala │ │ ├── GremlinValue.scala │ │ └── package.scala │ └── test/ │ ├── application.conf │ └── scala/ │ └── com/ │ └── thatdot/ │ └── quine/ │ └── gremlin/ │ ├── ErrorMessages.scala │ ├── GremlinHarness.scala │ └── SimpleQueries.scala ├── quine-language/ │ └── src/ │ ├── main/ │ │ ├── antlr4/ │ │ │ └── Cypher.g4 │ │ ├── java/ │ │ │ └── com/ │ │ │ └── thatdot/ │ │ │ └── quine/ │ │ │ └── language/ │ │ │ ├── server/ │ │ │ │ ├── QuineLanguageServer.java │ │ │ │ └── QuineTextDocumentService.java │ │ │ └── testclient/ │ │ │ └── QuineLanguageClient.java │ │ └── scala/ │ │ └── com/ │ │ └── thatdot/ │ │ └── quine/ │ │ ├── cypher/ │ │ │ ├── CollectingErrorListener.scala │ │ │ ├── ast/ │ │ │ │ └── AST.scala │ │ │ ├── phases/ │ │ │ │ ├── LexerPhase.scala │ │ │ │ ├── MaterializationPhase.scala │ │ │ │ ├── ParserPhase.scala │ │ │ │ └── SymbolAnalysis.scala │ │ │ ├── utils/ │ │ │ │ └── Helpers.scala │ │ │ └── visitors/ │ │ │ ├── ast/ │ │ │ │ ├── CreateVisitor.scala │ │ │ │ ├── EffectVisitor.scala │ │ │ │ ├── ForeachVisitor.scala │ │ │ │ ├── InQueryCallVisitor.scala │ │ │ │ ├── MatchClauseVisitor.scala │ │ │ │ ├── MultiPartQueryVisitor.scala │ │ │ │ ├── NodeLabelVisitor.scala │ │ │ │ ├── PatternVisitor.scala │ │ │ │ ├── ProjectionBodyVisitor.scala │ │ │ │ ├── ProjectionItemVisitor.scala │ │ │ │ ├── QueryVisitor.scala │ │ │ │ ├── ReadingClauseVisitor.scala │ │ │ │ ├── RegularQueryVisitor.scala │ │ │ │ ├── ReturnVisitor.scala │ │ │ │ ├── SetItemVisitor.scala │ │ │ │ ├── SinglePartQueryVisitor.scala │ │ │ │ ├── SingleQueryVisitor.scala │ │ │ │ ├── UnwindClauseVisitor.scala │ │ │ │ ├── UpdatingClauseVisitor.scala │ │ │ │ ├── WhereClauseVisitor.scala │ │ │ │ ├── WithVisitor.scala │ │ │ │ ├── expressions/ │ │ │ │ │ ├── AddSubtractVisitor.scala │ │ │ │ │ ├── AndVisitor.scala │ │ │ │ │ ├── AtomVisitor.scala │ │ │ │ │ ├── ComparisonVisitor.scala │ │ │ │ │ ├── DoubleVisitor.scala │ │ │ │ │ ├── ExpressionVisitor.scala │ │ │ │ │ ├── FunctionInvocationVisitor.scala │ │ │ │ │ ├── IntegerVisitor.scala │ │ │ │ │ ├── LiteralVisitor.scala │ │ │ │ │ ├── MapLiteralVisitor.scala │ │ │ │ │ ├── MultiplyDivideModuloVisitor.scala │ │ │ │ │ ├── NonArithmeticOperatorVisitor.scala │ │ │ │ │ ├── NumberVisitor.scala │ │ │ │ │ ├── OrExpressionVisitor.scala │ │ │ │ │ ├── ParameterVisitor.scala │ │ │ │ │ ├── PartialComparisonVisitor.scala │ │ │ │ │ ├── PowerOfVisitor.scala │ │ │ │ │ ├── PropertyVisitor.scala │ │ │ │ │ ├── StringListNullVisitor.scala │ │ │ │ │ ├── UnaryAddSubtractVisitor.scala │ │ │ │ │ ├── VariableVisitor.scala │ │ │ │ │ └── XorVisitor.scala │ │ │ │ └── patterns/ │ │ │ │ ├── AnonymousPatternPartVisitor.scala │ │ │ │ ├── MatchPatternVisitor.scala │ │ │ │ ├── NodePatternVisitor.scala │ │ │ │ ├── PatternElementChainVisitor.scala │ │ │ │ ├── PatternElementVisitor.scala │ │ │ │ ├── PatternExpVisitor.scala │ │ │ │ ├── PatternPartVisitor.scala │ │ │ │ └── RelationshipPatternVisitor.scala │ │ │ └── semantic/ │ │ │ ├── AddSubtractVisitor.scala │ │ │ ├── AndVisitor.scala │ │ │ ├── AnonymousPatternPartVisitor.scala │ │ │ ├── AtomVisitor.scala │ │ │ ├── ComparisonVisitor.scala │ │ │ ├── CreateVisitor.scala │ │ │ ├── DoubleVisitor.scala │ │ │ ├── ExpressionVisitor.scala │ │ │ ├── FunctionInvocationVisitor.scala │ │ │ ├── InQueryCallVisitor.scala │ │ │ ├── IntegerVisitor.scala │ │ │ ├── LiteralVisitor.scala │ │ │ ├── MapLiteralVisitor.scala │ │ │ ├── MatchClauseVisitor.scala │ │ │ ├── MultiPartQueryVisitor.scala │ │ │ ├── MultiplyDivideModuloVisitor.scala │ │ │ ├── NodeLabelVisitor.scala │ │ │ ├── NodePatternVisitor.scala │ │ │ ├── NonArithmeticOperatorVisitor.scala │ │ │ ├── NotVisitor.scala │ │ │ ├── NumberVisitor.scala │ │ │ ├── OrExpressionVisitor.scala │ │ │ ├── ParameterVisitor.scala │ │ │ ├── PartialComparisonVisitor.scala │ │ │ ├── PatternElementChainVisitor.scala │ │ │ ├── PatternElementVisitor.scala │ │ │ ├── PatternPartVisitor.scala │ │ │ ├── PatternVisitor.scala │ │ │ ├── PowerOfVisitor.scala │ │ │ ├── ProjectionBodyVisitor.scala │ │ │ ├── ProjectionItemVisitor.scala │ │ │ ├── PropertyExpressionVisitor.scala │ │ │ ├── QueryVisitor.scala │ │ │ ├── ReadingClauseVisitor.scala │ │ │ ├── RegularQueryVisitor.scala │ │ │ ├── RelationshipPatternVisitor.scala │ │ │ ├── ReturnVisitor.scala │ │ │ ├── SetItemVisitor.scala │ │ │ ├── SinglePartQueryVisitor.scala │ │ │ ├── SingleQueryVisitor.scala │ │ │ ├── StringListNullVisitor.scala │ │ │ ├── UnaryAddOrSubtractVisitor.scala │ │ │ ├── UnwindClauseVisitor.scala │ │ │ ├── UpdatingClauseVisitor.scala │ │ │ ├── VariableVisitor.scala │ │ │ ├── WhereVisitor.scala │ │ │ └── XorVisitor.scala │ │ └── language/ │ │ ├── Cypher.scala │ │ ├── ast/ │ │ │ └── AST.scala │ │ ├── diagnostic/ │ │ │ └── Diagnostic.scala │ │ ├── domain/ │ │ │ └── Graph.scala │ │ ├── phases/ │ │ │ ├── CompilerPhase.scala │ │ │ ├── CompilerState.scala │ │ │ ├── Phase.scala │ │ │ ├── TypeCheckingPhase.scala │ │ │ └── Upgrade.scala │ │ ├── prettyprint/ │ │ │ ├── ASTInstances.scala │ │ │ ├── BaseInstances.scala │ │ │ ├── CypherASTInstances.scala │ │ │ ├── PrettyPrint.scala │ │ │ ├── ResultInstances.scala │ │ │ ├── SymbolAnalysisInstances.scala │ │ │ ├── TypeInstances.scala │ │ │ └── package.scala │ │ ├── semantic/ │ │ │ └── Semantics.scala │ │ ├── server/ │ │ │ ├── ContextAwareLanguageService.scala │ │ │ ├── Helpers.scala │ │ │ └── SimpleTrie.scala │ │ ├── testclient/ │ │ │ ├── QuineLanguageClient.scala │ │ │ └── TestProgram.scala │ │ └── types/ │ │ └── Type.scala │ └── test/ │ └── scala/ │ └── com/ │ └── thatdot/ │ └── quine/ │ ├── cypher/ │ │ ├── phases/ │ │ │ ├── LexerPhaseTest.scala │ │ │ ├── ParserPhaseTest.scala │ │ │ └── PhaseCompositionTest.scala │ │ └── visitors/ │ │ └── ast/ │ │ ├── AddSubtractVisitorTests.scala │ │ ├── AndVisitorTests.scala │ │ ├── AtomVisitorTests.scala │ │ ├── ComparisonVisitorTests.scala │ │ ├── DoubleVisitorTests.scala │ │ ├── ExpressionVisitorTests.scala │ │ ├── IntegerVisitorTests.scala │ │ ├── LiteralVisitorTests.scala │ │ ├── MapLiteralVisitorTests.scala │ │ ├── MultiplyDivideModuloVisitorTests.scala │ │ ├── NonArithmeticOperatorVisitorTests.scala │ │ ├── NumberVisitorTests.scala │ │ ├── OrExpressionVisitorTests.scala │ │ ├── ParameterVisitorTests.scala │ │ ├── PartialComparisonVisitorTests.scala │ │ ├── PowerOfVisitorTests.scala │ │ ├── PropertyVisitorTests.scala │ │ ├── UnaryAddSubtractVisitorTests.scala │ │ └── XorVisitorTests.scala │ └── language/ │ ├── diagnostic/ │ │ └── DiagnosticTest.scala │ ├── parser/ │ │ └── ParserTests.scala │ ├── phases/ │ │ ├── AlphaRenamingTests.scala │ │ ├── MaterializationTests.scala │ │ ├── PipelineExplorer.scala │ │ └── SymbolAnalysisTests.scala │ ├── prettyprint/ │ │ └── PrettyPrintTest.scala │ ├── semantics/ │ │ └── SemanticAnalysisTests.scala │ ├── server/ │ │ ├── ContextAwareLanguageServiceTest.scala │ │ ├── LanguageServerHelper.scala │ │ ├── QuineLanguageServerTest.scala │ │ ├── QuineTextDocumentServiceTest.scala │ │ └── TextDocumentServiceHelper.scala │ └── types/ │ ├── GraphElementTypeTests.scala │ ├── TypeCheckerTests.scala │ ├── TypeEntryDuplicateTest.scala │ └── TypeSystemTest.scala ├── quine-mapdb-persistor/ │ └── src/ │ ├── main/ │ │ └── scala/ │ │ └── com/ │ │ └── thatdot/ │ │ └── quine/ │ │ └── persistor/ │ │ ├── MapDbGlobalPersistor.scala │ │ └── MapDbPersistor.scala │ └── test/ │ └── scala/ │ └── com/ │ └── thatdot/ │ └── quine/ │ └── persistor/ │ ├── MapDbPersistorSpec.scala │ └── MapDbPersistorTests.scala ├── quine-rocksdb-persistor/ │ └── src/ │ ├── main/ │ │ └── scala/ │ │ └── com/ │ │ └── thatdot/ │ │ └── quine/ │ │ └── persistor/ │ │ ├── RocksDbPersistor.scala │ │ └── RocksDbPrimePersistor.scala │ └── test/ │ └── scala/ │ └── com/ │ └── thatdot/ │ └── quine/ │ └── persistor/ │ ├── RocksDbKeyEncodingTest.scala │ ├── RocksDbPersistorSpec.scala │ └── RocksDbPersistorTests.scala ├── quine-serialization/ │ └── src/ │ └── main/ │ └── scala/ │ └── com/ │ └── thatdot/ │ └── quine/ │ └── serialization/ │ ├── AvroSchemaCache.scala │ ├── EncoderDecoder.scala │ ├── ProtobufSchemaCache.scala │ ├── QuineValueToProtobuf.scala │ ├── SchemaError.scala │ └── data/ │ ├── QuineSerializationFoldablesFrom.scala │ └── QuineSerializationFoldersTo.scala ├── visnetwork-facade/ │ └── src/ │ └── main/ │ └── scala/ │ └── com/ │ └── thatdot/ │ └── visnetwork/ │ ├── DataSet.scala │ ├── Events.scala │ ├── Network.scala │ └── package.scala └── vite-shared/ ├── base.config.ts ├── fixtures/ │ ├── metrics.ts │ ├── query-results.ts │ └── ui-config.ts ├── index.ts ├── package.json ├── plugins/ │ ├── mock-api-factory.ts │ └── serve-scalajs-bundle.ts ├── tsconfig.json └── utils/ └── mime-types.ts ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a report to help us improve title: '' labels: '' assignees: '' --- **Describe the bug** A clear and concise description of what the bug is. **To Reproduce** Steps to reproduce the behavior: 1. Go to '...' 2. Click on '....' 3. Scroll down to '....' 4. See error **Expected behavior** A clear and concise description of what you expected to happen. **Screenshots** If applicable, add screenshots to help explain your problem. **Desktop (please complete the following information):** - OS: [e.g. iOS] - Browser [e.g. chrome, safari] - Version [e.g. 22] **Smartphone (please complete the following information):** - Device: [e.g. iPhone6] - OS: [e.g. iOS8.1] - Browser [e.g. stock browser, safari] - Version [e.g. 22] **Additional context** Add any other context about the problem here. ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea for this project title: '' labels: '' assignees: '' --- **Is your feature request related to a problem? Please describe.** A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] **Describe the solution you'd like** A clear and concise description of what you want to happen. **Describe alternatives you've considered** A clear and concise description of any alternative solutions or features you've considered. **Additional context** Add any other context or screenshots about the feature request here. ================================================ FILE: .github/PULL_REQUEST_TEMPLATE.md ================================================ # Description Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change. Fixes # (issue) ## Type of change Please delete options that are not relevant. - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] This change requires a documentation update # How Has This Been Tested? Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration - [ ] Test A - [ ] Test B **Test Configuration**: * Firmware version: * Hardware: * Toolchain: * SDK: # Checklist: - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my own code - [ ] I have commented my code, particularly in hard-to-understand areas - [ ] I have made corresponding changes to the documentation - [ ] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged and published in downstream modules ================================================ FILE: .github/actions/notify-slack-on-failure/action.yml ================================================ name: Notify Slack on CI Failure description: Posts a failure notification to Slack via incoming webhook inputs: job-name: description: Human-readable name of the failed job required: true webhook-url: description: Slack incoming webhook URL required: true runs: using: composite steps: - name: Notify Slack uses: slackapi/slack-github-action@v2.1.0 with: webhook: ${{ inputs.webhook-url }} webhook-type: incoming-webhook payload: | { "text": "CI ${{ inputs.job-name }} failed on ${{ github.repository }}", "blocks": [ { "type": "section", "text": { "type": "mrkdwn", "text": ":red_circle: *CI ${{ inputs.job-name }} failed*\n*Repo:* ${{ github.repository }}\n*Branch:* main\n*Commit:* <${{ github.server_url }}/${{ github.repository }}/commit/${{ github.sha }}|${{ github.sha }}>\n*Run:* <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View workflow run>" } } ] } ================================================ FILE: .github/workflows/ci.yml ================================================ name: CI concurrency: group: ci-${{ github.head_ref }} cancel-in-progress: true on: push: branches: - main pull_request: workflow_dispatch: env: JAVA_OPTS: >- -Xms4096M -Xmx4096M -Xss6M -Dfile.encoding=UTF-8 --add-opens java.base/java.lang=ALL-UNNAMED jobs: test_scala: name: Test runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions/cache@v3 name: Cache Embedded Cassandra with: path: | ~/.embedded-cassandra key: cassandra-${{ hashFiles('**/*.sbt') }}-${{ hashFiles('project/**') }} - uses: coursier/cache-action@v6 with: extraKey: '2.13' - uses: actions/setup-java@v4 with: distribution: 'temurin' java-version: '21' - name: Set up scala uses: sbt/setup-sbt@159bc2bcdce6cc8f23f9faa80a0efc07632b17b9 - run: sbt -v test quine/assembly quine-docs/generateDocs 'scalafixAll --check' - name: Notify Slack on failure if: failure() && github.ref == 'refs/heads/main' uses: ./.github/actions/notify-slack-on-failure with: job-name: Test webhook-url: ${{ secrets.SLACK_WEBHOOK_URL }} scalafmt: name: Scalafmt runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: coursier/cache-action@v6 with: extraKey: 'fmt' - uses: actions/setup-java@v4 with: distribution: 'temurin' java-version: '21' - name: Set up scala uses: sbt/setup-sbt@159bc2bcdce6cc8f23f9faa80a0efc07632b17b9 - run: sbt -v scalafmtCheckAll scalafmtSbtCheck - name: Notify Slack on failure if: failure() && github.ref == 'refs/heads/main' uses: ./.github/actions/notify-slack-on-failure with: job-name: Scalafmt webhook-url: ${{ secrets.SLACK_WEBHOOK_URL }} ================================================ FILE: .github/workflows/copy.bara.sky ================================================ SOT_REPO = "git@github.com:thatdot/quine-plus.git" SOT_BRANCH = "main" DESTINATION_REPO = "git@github.com:thatdot/quine.git" DESTINATION_BRANCH = "main" COMMITTER = "thatbot-copy[bot] <98922356+thatbot-copy[bot]@users.noreply.github.com>" LOCAL_SOT = "file:///usr/src/app" PROJECT_LEVEL_INCLUDE = [ "build.properties", "Dependencies.scala", "dependencySchemes.sbt", "FlatcPlugin.scala", "Packaging.scala", "GitVersion.scala", "Docker.scala", "Ecr.scala", "ParadoxThatdot.scala", "plugins.sbt", "QuineSettings.scala", "ScalaFix.scala" ] PUSH_INCLUDE = [ "public/**", ".github/workflows/copybara.yml", ".github/workflows/copy.bara.sky", ".scalafix.conf", ".scalafmt.conf", ".gitignore", ] + ["project/" + f for f in PROJECT_LEVEL_INCLUDE] PUSH_EXCLUDE = [] PUSH_TRANSFORMATIONS = [ ] PR_INCLUDE = ["**"] PR_EXCLUDE = [] PR_TRANSFORMATIONS = [ core.move("", "public", paths = glob(["**"])), core.move("public/.github/workflows/", ".github/workflows/", paths = glob(["copybara.yml", "copy.bara.sky"])), core.move("public/.scalafix.conf", ".scalafix.conf"), core.move("public/.scalafmt.conf", ".scalafmt.conf"), core.move("public/.gitignore", ".gitignore"), core.move("public/project/", "project/", paths = glob(PROJECT_LEVEL_INCLUDE)), ] SCRUB_MESSAGE = [ # Replace anything beginning "ENTERPRISE:" (until "PUBLIC:" if present, or else to the end of the message with \z) metadata.scrubber("ENTERPRISE:\\s(?:.|\n)*?(?:PUBLIC:\\s|\\z)"), # Best effort to remove references to internal PRs that will be dead links publicly metadata.scrubber(" \\(#\\d+\\)$"), # remove any QU-XXXX numbers on their own lines (case insensitive) metadata.scrubber("^[\\r\\f ]*[qQ][uU]-\\d+[\\r\\f ]*\\n"), ] def cancel_after_frozen(ctx): ctx.console.verbose("TODO add a way to freeze private copies of PRs") if False: return ctx.console.error("Internal copy of PR is write-protected") else: return ctx.success() # Push workflow core.workflow( name = "push", origin = git.origin( url = LOCAL_SOT if LOCAL_SOT else SOT_REPO, ref = SOT_BRANCH, ), destination = git.github_destination( url = DESTINATION_REPO, push = DESTINATION_BRANCH, ), origin_files = glob(PUSH_INCLUDE, exclude = PUSH_EXCLUDE), authoring = authoring.pass_thru(default = COMMITTER), mode = "ITERATIVE", transformations = SCRUB_MESSAGE + [ metadata.restore_author("ORIGINAL_AUTHOR", search_all_changes = True), metadata.expose_label("COPYBARA_INTEGRATE_REVIEW"), ] + (PUSH_TRANSFORMATIONS if PUSH_TRANSFORMATIONS else core.reverse(PR_TRANSFORMATIONS)), ) # Init workflow core.workflow( name = "initialize", origin = git.origin( url = LOCAL_SOT if LOCAL_SOT else SOT_REPO, ref = SOT_BRANCH, ), destination = git.github_destination( url = DESTINATION_REPO, push = DESTINATION_BRANCH, ), origin_files = glob(PUSH_INCLUDE, exclude = PUSH_EXCLUDE), authoring = authoring.pass_thru(default = COMMITTER), mode = "SQUASH", transformations = [metadata.use_last_change()] + core.reverse(PR_TRANSFORMATIONS), ) # Pull Request workflow core.workflow( name = "pr", origin = git.github_pr_origin( # NB will not accept PRs with submodules url = DESTINATION_REPO, branch = DESTINATION_BRANCH, ), destination = git.github_pr_destination( url = SOT_REPO, destination_ref = SOT_BRANCH, integrates = [], ), destination_files = glob(PUSH_INCLUDE, exclude = PUSH_EXCLUDE), origin_files = glob(PR_INCLUDE if PR_INCLUDE else ["**"], exclude = PR_EXCLUDE), authoring = authoring.pass_thru(default = COMMITTER), mode = "CHANGE_REQUEST", set_rev_id = False, transformations = [ cancel_after_frozen, metadata.save_author("ORIGINAL_AUTHOR"), metadata.expose_label("GITHUB_PR_NUMBER", new_name = "Closes", separator = DESTINATION_REPO.replace("git@github.com:", " ").replace(".git", "#")), ] + PR_TRANSFORMATIONS, ) ================================================ FILE: .github/workflows/copybara.yml ================================================ name: Copy Commits to thatdot/quine Repo on: push: branches: - main pull_request_target: workflow_dispatch: inputs: copybaraArgs: description: "Arguments to be passed to the copybara agent" required: false default: "" type: string copybaraWorkflow: description: "Which copybara action to run" required: false type: choice options: - "" - initialize - push - pr jobs: clone-code: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v6 with: fetch-depth: 0 - name: Cache Docker registry uses: actions/cache@v5 with: path: /tmp/docker-registry key: universal - name: Generate token id: generate-token uses: tibdex/github-app-token@3beb63f4bd073e61482598c45c71c1019b59b73a # v2.1.0 (archived) with: app_id: 169359 # thatBot Copy Application on thatBot account private_key: ${{ secrets.THATBOT_COPY_KEY }} - name: Update cached Copybara docker image run: | docker run -d -p 5000:5000 --restart=always --name registry -v /tmp/docker-registry:/var/lib/registry registry:2 && npx wait-on tcp:5000 docker pull localhost:5000/copybara:latest || docker pull public.ecr.aws/p0a2o6c9/copybara:latest docker tag public.ecr.aws/p0a2o6c9/copybara:latest localhost:5000/copybara:latest && docker push localhost:5000/copybara:latest || true - name: Run Copybara uses: thatdot/copybara-action@73152945ea4bb6c57b3d68e74787bdc1f80392ab # main@2024-05-13 with: copybara_image: localhost:5000/copybara # thatdot-managed copy of olivr/copybara (itself a build of https://github.com/google/copybara/blob/master/Dockerfile) copybara_image_tag: latest custom_config: ".github/workflows/copy.bara.sky" copybara_options: ${{ github.event.inputs.copybaraArgs }} workflow: ${{ github.event.inputs.copybaraWorkflow }} ssh_key: ${{ secrets.COPYBARA_SSH_KEY }} access_token: ${{ steps.generate-token.outputs.token }} committer: "thatbot-copy[bot] <98922356+thatbot-copy[bot]@users.noreply.github.com>" # INV: should match COMMITTER in copy.bara.sky -- this one is used for the commits, the one in there is used as a default author sot_branch: main sot_repo: thatdot/quine-plus destination_branch: main destination_repo: thatdot/quine ================================================ FILE: .gitignore ================================================ *.db *.data *.class # likely binaries quine-*.jar novelty-*.jar # local settings .java-version .jvmopts local.sbt local.conf **/secret.conf # bloop and metals .bloop .bsp # metals project/metals.sbt .metals # vs code .vscode # sbt project/project/ project/target/ target/ .sbtopts # virtual machine crash logs (http://www.java.com/en/download/help/error_hotspot.xml) hs_err_pid* replay_pid* # eclipse build/ .classpath .project .settings .worksheet bin/ .cache # intellij idea *.iml *.ipr *.iws .idea # mac .DS_Store # python test public/quine/src/test/resources/ingest_test_script/venv/ quine/src/test/resources/ingest_test_script/venv/ # file mirroring management public/project/ # vim *.swp *.swo # Snyk .dccache # run/test outputs metrics-logs # exclusions !/scripts/build/ !/lib/dasho-annotations.jar # SBOM files *.bom.json # Playwright MCP Artifacts .playwright-mcp # npm/node (Vite dev workspaces - development only) node_modules/ /package-lock.json .vite ================================================ FILE: .scalafix.conf ================================================ // .scalafix.conf rules = [ OrganizeImports ExplicitResultTypes LeakingImplicitClassVal DisableSyntax // "github:ohze/scalafix-rules/FinalObject" ] OrganizeImports { groupedImports = AggressiveMerge groups = [ "re:javax?\\.", // a re: prefix denotes a regex, this will group java. and javax. packages together "scala.", "org.apache.pekko.", "*", "com.thatdot." ] } // Prohibit auto-derivation imports that bypass explicit codec configuration. // https://docs.google.com/document/d/1E5MaCuRZ4F1wCx3lI9FmZFIYC8Ov5TFU8DBMj-THAsk/ // explains why we insist on explicit codec configuration. DisableSyntax.regex = [ { id = "noCirceAuto" pattern = "import io\\.circe\\.generic.*\\.auto\\..*" message = "Prohibited: `io.circe.generic.[extras.]auto` is too slow and problematic. Use `io.circe.generic.[extras.]semiauto.derive[Configured](De|En)coder` instead (probably with `V2ApiConfiguration.typeDiscriminatorConfig.asCirce` in scope)." }, { id = "noTapirAuto" pattern = "import sttp\\.tapir\\.generic\\.auto\\..*" message = "Prohibited: `sttp.tapir.generic.auto` is too slow and problematic. Use explicit `Schema.derived` or `deriveSchema` with proper configuration." }, { id = "noPureconfigAuto" pattern = "import pureconfig\\.generic\\.auto\\._" message = "Prohibited: `pureconfig.generic.auto._` causes shapeless macro explosion. Use `pureconfig.generic.semiauto.deriveConvert` with explicit ConfigConvert instances for nested types." } ] ================================================ FILE: .scalafmt.conf ================================================ version = 2.7.5 // scala-steward:off // Additional style conventions not enforced by scalafmt: // - mark `case class`es and `case object`s `final` wherever possible // - prefer `sealed abstract class` over `sealed trait` wherever possible // - when in doubt, https://nrinaudo.github.io/scala-best-practices/ has sensible recommendations maxColumn = 120 align.preset = none continuationIndent { callSite = 2 defnSite = 2 ctorSite = 2 } newlines.afterCurlyLambda = preserve literals.float = Upper literals.hexDigits = Upper trailingCommas = always rewrite.rules = [ RedundantBraces, RedundantParens, SortModifiers, PreferCurlyFors, ] unindentTopLevelOperators = true indentOperator.preset = akka project.excludeFilters=[".*/com/thatdot/quine/app/util/OpenApiRenderer\\.scala"] ================================================ FILE: CODE_OF_CONDUCT.md ================================================ # Code of Conduct ## Code of Conduct for the Quine Community thatDot is dedicated to providing the best community possible for the Quine community. Our goal is to provide the opportunity for community participants to learn, communicate, contribute and collaborate. The Community Code of Conduct governs how we all participate and behave. As such we are committed to creating a diverse, harassment-free experience for everyone, regardless of gender, sexual orientation, disability, physical appearance, body size, race, or religion. We do not tolerate harassment of community participants in any form. Any form of written, social or verbal communication that can be offensive or harassing to any community member, participant or staff is not allowed. Community participants violating these rules may be sanctioned or expelled from the community. ## Expectations for All Community Members ### Be kind. All community participants should feel welcome, regardless of their personal background. Please be polite, courteous, and considerate to fellow participants. No offensive comments regarding to gender, sexual orientation, disability, physical appearance, body size, race, or religion will be tolerated. ### Be respectful. We expect all participants to be respectful when communicating with other participants, even when differences of opinion arise. Participants are expected to work together to resolve disagreements constructively and respectfully. Disagreement is no excuse for poor manners. Please be patient. ### Reach out and ask for help. Please inform our community operator or forum moderator if you feel a violation has taken place and our staff will address the situation. Ask questions if you are unsure and be helpful to those who ask. You can also contact [community@quine.io](mailto:community@quine.io) ### Communicate and collaborate. The concept of the community is based on working together and participants will gain the most from the community by actively participating and communicating effectively. As such, we encourage collaboration and communication as long as they are conducted in a positive and constructive way. ### Continue. This list is not exhaustive or complete. Please use your own good judgement on proper behavior and contribute to a productive and pleasant community experience. ## How To Report Inappropriate Behavior If a community participant engages in harassing behavior, community staff may take any action they consider appropriate, including expulsion from the community. If you are being harassed or know of someone else is being harassed, please inform our community staff immediately by contacting [community@quine.io](mailto:community@quine.io) We expect participants to abide by these rules at all community-related activities. Thank you for your cooperation. ## Privacy Policy We understand that privacy is important to our community participants and users of these products and services. Our [privacy policy](https://www.thatdot.com/privacy/) explains how we collect, use, share, and protect personal information. ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing The community is the heart of all open-source projects. We welcome contributions from all people and strive to build a welcoming and open community of contributors, users, participants, speakers, lurkers, and anyone else who comes by. ## Code of Conduct All community members must be good citizens; be sure to read the [Code of Conduct](https://github.com/streaminggraph/recipes/blob/main/code-of-conduct.md) page to understand what this means. ## Contributing Code Code contributions can be made through Github. We welcome all contributors and any improvements to Quine, the website, recipes, etc. ## Contribution License All contributions to the Quine repository are released under the same license as the Quine project overall. For details, see the license in the Github repository. ================================================ FILE: LICENSE ================================================ MIT License with Commons Clause Copyright © 2014 Ryan Wright; © 2019 thatDot, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. "Commons Clause" License Condition v1.0 The Software is provided to you by the Licensor under the License, as defined below, subject to the following condition. Without limiting other conditions in the License, the grant of rights under the License will not include, and the License does not grant to you, right to Sell the Software. For purposes of the foregoing, "Sell" means practicing any or all of the rights granted to you under the License to provide to third parties, for a fee or other consideration (including without limitation fees for hosting or consulting/support services related to the Software), a product or service whose value derives, entirely or substantially, from the functionality of the Software. Any license notice or attribution required by the License must also include this Commons Cause License Condition notice. Software: Quine License: MIT Licensor: thatDot, Inc. ================================================ FILE: README.md ================================================ ![Build and Test](https://github.com/thatdot/quine/workflows/CI/badge.svg) [![GitHub Release](https://img.shields.io/github/v/release/thatdot/quine)](https://github.com/thatdot/quine/releases) [![Docker Pulls](https://img.shields.io/docker/pulls/thatdot/quine)](https://hub.docker.com/r/thatdot/quine) [![slack](https://img.shields.io/badge/slack-Quine-brightgreen.svg?logo=slack)](https://that.re/quine-slack)

Quine is a streaming graph interpreter; a server-side program that consumes data, builds it into a stateful graph structure, and runs live computation on that graph to answer questions or compute results. Those results stream out in real-time.

You interact with Quine by connecting it to a data source (Kafka, Kinesis Data Stream, SQS, files, stdin, etc.) and using regular database queries to stream that data in, build the graph structure, and find important patterns. Three design choices define Quine, setting it apart from all event stream processing systems: 1. A graph-structured data model 2. An asynchronous actor-based graph computational model 3. Standing queries Standing queries live inside the graph and automatically propagate the incremental results computed from both historical data and new streaming data. Once matches are found, standing queries trigger actions using those results (e.g., execute code, transform other data in the graph, publish data to another system like Apache Kafka or Kinesis). ![](https://uploads-ssl.webflow.com/61f0aecf55af2565526f6a95/62d8b7a7a13f0ca333a8b115_R9g-L0bLE2nguGQ3BRektSDq1d4L9Gtzao1fK3wuwgkX_iGkcgtGYlOR2u3p6DsWbrIrZbUPY6VtLULwj2BoIO2-gVUngIcrk-z-9H3u7a6QPIM7sqBRrkatR1YxA7WLR5CuvP3ZCo6JypuAWww23g.png) All together, Quine can: * Consume high-volume streaming event data * Convert it into durable, versioned, connected data * Monitor that connected data for complex structures or values * Trigger arbitrary computation on the event of each match This collection of capabilities is profoundly powerful! It represents a complete system for stateful event-driven arbitrary computation in a platform scalable to any size of data or desired throughput. Read the docs at [quine.io](https://quine.io) to learn more. ## Building from source In order to build Quine locally, you'll need to have the following installed: * A recent version of the Java Development Kit (17 or newer) * The [`sbt` build tool](https://www.scala-sbt.org/download.html) * Yarn 0.22.0+ (for frontend components of `quine-browser` subproject) Then: ``` sbt compile # compile all projects sbt test # compile and run all projects' tests sbt fixall # reformat and lint all source files sbt quine/run # to build and run Quine sbt quine/assembly # assemble Quine into a jar ``` ## Launch Quine: Run Quine from an executable `.jar` file built from this repo or downloaded from the repo [releases](https://github.com/thatdot/quine/releases) page. ```shell ❯ java -jar quine-x.x.x.jar -h Quine universal program Usage: quine [options] -W, --disable-web-service disable Quine web service -p, --port web service port (default is 8080) -r, --recipe name, file, or URL follow the specified recipe -x, --recipe-value key=value recipe parameter substitution --force-config disable recipe configuration defaults --no-delete disable deleting data file when process exits -h, --help -v, --version print Quine program version ``` For example, to run the [Wikipedia page ingest](https://quine.io/recipes/wikipedia/) getting started recipe: ``` shell ❯ java -jar quine-x.x.x.jar -r wikipedia ``` With Docker installed, run Quine from Docker Hub. ``` shell ❯ docker run -p 8080:8080 thatdot/quine ``` The [quick start](https://quine.io/getting-started/quick-start/) guide will get you up and running the first time, ingesting data, and submitting your first query. ## Quine Recipes Quine recipes are a great way to get started developing with Quine. A recipe is a document that contains all the information necessary for Quine to execute any data processing task. Ingest data from batch sources like `.json` or `.csv` files hosted locally, or connect to streaming data sources from Kafka or Kinesis. [Recipes](https://quine.io/components/recipe-ref-manual/) are `yaml` documents containing the configuration for components including: * [Ingest Streams](https://quine.io/components/ingest-sources/) to read streaming data from sources and update graph data * [Standing Queries](https://quine.io/components/standing-queries/) to transform graph data, and to produce aggregates and other outputs * UI configuration to specialize the web user interface for the use-case that is the subject of the Recipe Please see [Quine's Recipe repository](https://quine.io/recipes/) for a list of available Recipes. Or create your own and contribute it back to the community for others to use. ## Contributing to Quine The community is the heart of all open-source projects. We welcome contributions from all people and strive to build a welcoming and open community of contributors, users, participants, speakers, and lurkers. Everyone is welcome. More information is included in our [contribution](https://github.com/thatdot/quine/blob/main/CONTRIBUTING.md) guidelines. ================================================ FILE: api/src/main/scala/com/thatdot/api/codec/SecretCodecs.scala ================================================ package com.thatdot.api.codec import io.circe.{Decoder, Encoder} import com.thatdot.common.security.Secret /** Circe codecs for [[Secret]] values. */ object SecretCodecs { /** Encoder that uses `Secret.toString` for redaction. * This is the default encoder and should be used for HTTP API responses. */ implicit val secretEncoder: Encoder[Secret] = Encoder.encodeString.contramap(_.toString) /** Creates an encoder that preserves the actual value for persistence and cluster communication. * Requires a witness (`import Secret.Unsafe._`) to call, making the intent explicit. * WARNING: Only use this encoder for internal storage paths, never for external HTTP responses. * This method is intentionally NOT implicit to prevent accidental use in API contexts. */ def preservingEncoder(implicit ev: Secret.UnsafeAccess): Encoder[Secret] = Encoder.encodeString.contramap(_.unsafeValue) /** Decoder that wraps incoming strings in a Secret. */ implicit val secretDecoder: Decoder[Secret] = Decoder.decodeString.map(Secret(_)) } ================================================ FILE: api/src/main/scala/com/thatdot/api/schema/SecretSchemas.scala ================================================ package com.thatdot.api.schema import sttp.tapir.Schema import com.thatdot.common.security.Secret /** Tapir schemas for [[Secret]] values. */ object SecretSchemas { /** Schema that represents Secret as a string in OpenAPI. * * The schema maps Secret to/from String using Secret.apply for creation * and Secret.toString for serialization (which redacts the value). */ implicit val secretSchema: Schema[Secret] = Schema.string.map((s: String) => Some(Secret(s)))(_.toString) } ================================================ FILE: api/src/main/scala/com/thatdot/api/v2/ApiErrors.scala ================================================ package com.thatdot.api.v2 import java.util.UUID import io.circe.generic.extras.semiauto.{deriveConfiguredDecoder, deriveConfiguredEncoder} import io.circe.{Decoder, Encoder} import sttp.model.StatusCode import sttp.tapir.{EndpointOutput, Schema, statusCode} import com.thatdot.api.v2.TypeDiscriminatorConfig.instances._ import com.thatdot.api.v2.schema.TapirJsonConfig.jsonBody import com.thatdot.common.logging.Log._ import com.thatdot.quine.util.BaseError /** Errors that api v2 cares to distinguish for reporting */ sealed trait ErrorType { val message: String } /** The types of errors that the api knows how to distinguish and report * * Should be extended for all errors we want to be distinguished in an api response. * See: [[BaseError]] for future extension. */ object ErrorType { /** General Api error that we don't have any extra information about */ case class ApiError(message: String) extends ErrorType object ApiError { implicit lazy val schema: Schema[ApiError] = Schema.derived implicit val encoder: Encoder[ApiError] = deriveConfiguredEncoder implicit val decoder: Decoder[ApiError] = deriveConfiguredDecoder } /** Api error type for any sort of Decode Failure * * Used currently for a custom decode failure handler passed to Pekko Server Options. */ case class DecodeError(message: String, help: Option[String] = None) extends ErrorType object DecodeError { implicit lazy val schema: Schema[DecodeError] = Schema.derived implicit val encoder: Encoder[DecodeError] = deriveConfiguredEncoder implicit val decoder: Decoder[DecodeError] = deriveConfiguredDecoder } /** Api error type for any Cypher Error * * This could be further broken down based upon CypherException later. */ case class CypherError(message: String) extends ErrorType object CypherError { implicit lazy val schema: Schema[CypherError] = Schema.derived implicit val encoder: Encoder[CypherError] = deriveConfiguredEncoder implicit val decoder: Decoder[CypherError] = deriveConfiguredDecoder } implicit lazy val schema: Schema[ErrorType] = Schema.derived implicit val encoder: Encoder[ErrorType] = deriveConfiguredEncoder implicit val decoder: Decoder[ErrorType] = deriveConfiguredDecoder } trait HasErrors extends Product with Serializable { def errors: List[ErrorType] } /** Provides the types of error codes that the api can give back to a user. * * Maps directly to http error codes (400s to 500s) * They are combined with Coproduct from shapeless where used. This should be updated to Union in scala 3. */ object ErrorResponse { case class ServerError(errors: List[ErrorType]) extends HasErrors case class BadRequest(errors: List[ErrorType]) extends HasErrors case class NotFound(errors: List[ErrorType]) extends HasErrors case class Unauthorized(errors: List[ErrorType]) extends HasErrors case class ServiceUnavailable(errors: List[ErrorType]) extends HasErrors implicit private val errorListSchema: Schema[List[ErrorType]] = ErrorType.schema.asIterable[List] object ServerError { def apply(error: String): ServerError = ServerError(List(ErrorType.ApiError(error))) def apply(error: ErrorType): ServerError = ServerError(List(error)) def apply(error: BaseError): ServerError = ServerError( List(ErrorType.ApiError(error.getMessage)), ) def ofErrors(errors: List[BaseError]): ServerError = ServerError( errors.map(err => ErrorType.ApiError(err.getMessage)), ) implicit lazy val schema: Schema[ServerError] = Schema.derived implicit val encoder: Encoder[ServerError] = deriveConfiguredEncoder implicit val decoder: Decoder[ServerError] = deriveConfiguredDecoder } // It would be nice to take away the below methods once we have our errors properly coded. object BadRequest { def apply(error: String): BadRequest = BadRequest(List(ErrorType.ApiError(error))) def apply(error: ErrorType): BadRequest = BadRequest(List(error)) def apply(error: BaseError): BadRequest = BadRequest(List(ErrorType.ApiError(error.getMessage))) def ofErrorStrings(errors: List[String]): BadRequest = BadRequest(errors.map(err => ErrorType.ApiError(err))) def ofErrors(errors: List[BaseError]): BadRequest = BadRequest( errors.map(err => ErrorType.ApiError(err.getMessage)), ) implicit lazy val schema: Schema[BadRequest] = Schema.derived implicit val encoder: Encoder[BadRequest] = deriveConfiguredEncoder implicit val decoder: Decoder[BadRequest] = deriveConfiguredDecoder } object NotFound { def apply(error: String): NotFound = NotFound(List(ErrorType.ApiError(error))) def apply(error: ErrorType): NotFound = NotFound(List(error)) def apply(error: BaseError): NotFound = NotFound(List(ErrorType.ApiError(error.getMessage))) def ofErrors(errors: List[BaseError]): NotFound = NotFound(errors.map(err => ErrorType.ApiError(err.getMessage))) implicit lazy val schema: Schema[NotFound] = Schema.derived implicit val encoder: Encoder[NotFound] = deriveConfiguredEncoder implicit val decoder: Decoder[NotFound] = deriveConfiguredDecoder } object Unauthorized { def apply(reason: String): Unauthorized = Unauthorized(List(ErrorType.ApiError(reason))) def apply(reason: ErrorType) = new Unauthorized(List(reason)) implicit lazy val schema: Schema[Unauthorized] = Schema.derived implicit val encoder: Encoder[Unauthorized] = deriveConfiguredEncoder implicit val decoder: Decoder[Unauthorized] = deriveConfiguredDecoder implicit val loggable: AlwaysSafeLoggable[Unauthorized] = unauthorized => s"Unauthorized: ${unauthorized.errors.mkString("[", ", ", "]")}" } object ServiceUnavailable { def apply(error: String): ServiceUnavailable = ServiceUnavailable(List(ErrorType.ApiError(error))) def apply(error: ErrorType): ServiceUnavailable = ServiceUnavailable(List(error)) def apply(error: BaseError): ServiceUnavailable = ServiceUnavailable(List(ErrorType.ApiError(error.getMessage))) def ofErrors(errors: List[BaseError]): ServiceUnavailable = ServiceUnavailable( errors.map(err => ErrorType.ApiError(err.getMessage)), ) implicit lazy val schema: Schema[ServiceUnavailable] = Schema.derived implicit val encoder: Encoder[ServiceUnavailable] = deriveConfiguredEncoder implicit val decoder: Decoder[ServiceUnavailable] = deriveConfiguredDecoder } } object ErrorResponseHelpers extends LazySafeLogging { /** Default error catching for server logic. Could use a second look once more errors are codified */ def toServerError(e: Throwable)(implicit logConfig: LogConfig): ErrorResponse.ServerError = { val correlationId = UUID.randomUUID().toString logger.error(log"Internal server error [correlationId=${Safe(correlationId)}]" withException e) ErrorResponse.ServerError( s"An internal error occurred. Reference ID: $correlationId", ) } /** Convert IllegalArgumentException to BadRequest with the exception's message */ def toBadRequest(e: IllegalArgumentException): ErrorResponse.BadRequest = ErrorResponse.BadRequest(e.getMessage) def serverError(possibleReasons: String*)(implicit enc: Encoder[ErrorResponse.ServerError], dec: Decoder[ErrorResponse.ServerError], sch: Schema[ErrorResponse.ServerError], ): EndpointOutput[ErrorResponse.ServerError] = statusCode(StatusCode.InternalServerError).and { jsonBody[ErrorResponse.ServerError] .description(ErrorText.serverErrorDescription(possibleReasons: _*)) } def badRequestError(possibleReasons: String*)(implicit enc: Encoder[ErrorResponse.BadRequest], dec: Decoder[ErrorResponse.BadRequest], sch: Schema[ErrorResponse.BadRequest], ): EndpointOutput[ErrorResponse.BadRequest] = statusCode(StatusCode.BadRequest).and { jsonBody[ErrorResponse.BadRequest] .description(ErrorText.badRequestDescription(possibleReasons: _*)) } def notFoundError(possibleReasons: String*)(implicit enc: Encoder[ErrorResponse.NotFound], dec: Decoder[ErrorResponse.NotFound], sch: Schema[ErrorResponse.NotFound], ): EndpointOutput[ErrorResponse.NotFound] = statusCode(StatusCode.NotFound).and { jsonBody[ErrorResponse.NotFound] .description(ErrorText.notFoundDescription(possibleReasons: _*)) } def unauthorizedError(possibleReasons: String*)(implicit enc: Encoder[ErrorResponse.Unauthorized], dec: Decoder[ErrorResponse.Unauthorized], sch: Schema[ErrorResponse.Unauthorized], ): EndpointOutput[ErrorResponse.Unauthorized] = statusCode(StatusCode.Unauthorized).and { jsonBody[ErrorResponse.Unauthorized] .description(ErrorText.unauthorizedErrorDescription(possibleReasons: _*)) } } object ErrorText { private def notFoundDoc = """Not Found | |The resource referenced was not found. | |%s | |""".stripMargin private def badRequestDoc = s"""Bad Request | | Something in your request is invalid, and could not be processed. | Review your request and attempt to submit it again. | | %s | | Contact support if you continue to have issues. | |""".stripMargin private val serverErrorDoc = s"""Internal Server Error | | Encountered an unexpected condition that prevented processing your request. | | %s | | Contact support if you continue to have issues. | |""".stripMargin private val unauthorizedDoc = s"""Unauthorized | |Permission to access a protected resource not found | |%s | |""".stripMargin /** Manually generate a markdown bullet list from the list of message strings. */ private def buildErrorMessage(docs: String, messages: Seq[String]): String = if (messages.isEmpty) docs.format("") else { val bulletSeparator = "\n - " val msgString = f"Possible reasons:$bulletSeparator${messages.mkString(bulletSeparator)}" docs.format(msgString) } def badRequestDescription(messages: String*): String = buildErrorMessage(badRequestDoc, messages) def notFoundDescription(messages: String*): String = buildErrorMessage(notFoundDoc, messages) def serverErrorDescription(messages: String*): String = buildErrorMessage(serverErrorDoc, messages) def unauthorizedErrorDescription(messages: String*): String = buildErrorMessage(unauthorizedDoc, messages) } ================================================ FILE: api/src/main/scala/com/thatdot/api/v2/AwsCredentials.scala ================================================ package com.thatdot.api.v2 import io.circe.generic.extras.semiauto.{deriveConfiguredDecoder, deriveConfiguredEncoder} import io.circe.{Decoder, Encoder} import sttp.tapir.Schema import sttp.tapir.Schema.annotations.{description, encodedExample, title} import com.thatdot.api.codec.SecretCodecs import com.thatdot.api.v2.TypeDiscriminatorConfig.instances.circeConfig import com.thatdot.common.security.Secret @title("AWS Credentials") @description( "Explicit AWS access key and secret to use. If not provided, defaults to environmental credentials according to the default AWS credential chain. See: .", ) final case class AwsCredentials( @encodedExample("ATIAXNKBTSB57V2QF11X") accessKeyId: Secret, @encodedExample("MDwbQe5XT4uOA3jQB/FhPaZpJdFkW13ryAL29bAk") secretAccessKey: Secret, ) object AwsCredentials { import com.thatdot.api.codec.SecretCodecs.{secretEncoder, secretDecoder} /** Encoder that redacts credential values for API responses. */ implicit val encoder: Encoder[AwsCredentials] = deriveConfiguredEncoder implicit val decoder: Decoder[AwsCredentials] = deriveConfiguredDecoder implicit lazy val schema: Schema[AwsCredentials] = { import com.thatdot.api.schema.SecretSchemas.secretSchema Schema.derived } /** Encoder that preserves credential values for persistence. * Requires witness (`import Secret.Unsafe._`) to call. */ def preservingEncoder(implicit ev: Secret.UnsafeAccess): Encoder[AwsCredentials] = PreservingCodecs.encoder } /** Separate object to avoid implicit scope pollution. */ private object PreservingCodecs { def encoder(implicit ev: Secret.UnsafeAccess): Encoder[AwsCredentials] = { // Shadow the redacting encoder with the preserving version implicit val secretEncoder: Encoder[Secret] = SecretCodecs.preservingEncoder deriveConfiguredEncoder } } ================================================ FILE: api/src/main/scala/com/thatdot/api/v2/AwsRegion.scala ================================================ package com.thatdot.api.v2 import io.circe.{Decoder, Encoder} import sttp.tapir.Schema import sttp.tapir.Schema.annotations.{description, encodedExample, title} @title("AWS Region") @description( "AWS region code. e.g. `us-west-2`. If not provided, defaults according to the default AWS region provider chain. See: .", ) final case class AwsRegion( @encodedExample("us-west-2") region: String, ) object AwsRegion { implicit val encoder: Encoder[AwsRegion] = Encoder.encodeString.contramap(_.region) implicit val decoder: Decoder[AwsRegion] = Decoder.decodeString.map(AwsRegion(_)) implicit lazy val schema: Schema[AwsRegion] = Schema.string[AwsRegion] } ================================================ FILE: api/src/main/scala/com/thatdot/api/v2/RatesSummary.scala ================================================ package com.thatdot.api.v2 import io.circe.generic.extras.semiauto.{deriveConfiguredDecoder, deriveConfiguredEncoder} import io.circe.{Decoder, Encoder} import sttp.tapir.Schema import sttp.tapir.Schema.annotations.{description, title} import com.thatdot.api.v2.TypeDiscriminatorConfig.instances.circeConfig @title("Rates Summary") @description("Summary statistics about a metered rate.") final case class RatesSummary( @description("Number of items metered") count: Long, @description("Approximate rate per second in the last minute") oneMinute: Double, @description("Approximate rate per second in the last five minutes") fiveMinute: Double, @description("Approximate rate per second in the last fifteen minutes") fifteenMinute: Double, @description("Approximate rate per second since the meter was started") overall: Double, ) object RatesSummary { implicit val encoder: Encoder[RatesSummary] = deriveConfiguredEncoder implicit val decoder: Decoder[RatesSummary] = deriveConfiguredDecoder implicit lazy val schema: Schema[RatesSummary] = Schema.derived } ================================================ FILE: api/src/main/scala/com/thatdot/api/v2/SaslJaasConfig.scala ================================================ package com.thatdot.api.v2 import io.circe.generic.extras.semiauto.{deriveConfiguredDecoder, deriveConfiguredEncoder} import io.circe.{Decoder, Encoder} import sttp.tapir.Schema import com.thatdot.api.codec.SecretCodecs import com.thatdot.api.codec.SecretCodecs._ import com.thatdot.api.schema.SecretSchemas._ import com.thatdot.api.v2.TypeDiscriminatorConfig.instances.circeConfig import com.thatdot.common.logging.Log.AlwaysSafeLoggable import com.thatdot.common.security.Secret /** SASL/JAAS configuration for Kafka authentication. * * Represents the structured form of Kafka's `sasl.jaas.config` property. Each subtype * corresponds to a specific SASL mechanism supported by Kafka. * * @see [[https://kafka.apache.org/41/security/authentication-using-sasl Kafka SASL Authentication]] */ sealed trait SaslJaasConfig object SaslJaasConfig { implicit val encoder: Encoder[SaslJaasConfig] = deriveConfiguredEncoder implicit val decoder: Decoder[SaslJaasConfig] = deriveConfiguredDecoder implicit lazy val schema: Schema[SaslJaasConfig] = Schema.derived /** Encoder that preserves credential values for persistence. * Requires witness (`import Secret.Unsafe._`) to call. */ def preservingEncoder(implicit ev: Secret.UnsafeAccess): Encoder[SaslJaasConfig] = { // Shadow the redacting encoder with the preserving version implicit val secretEncoder: Encoder[Secret] = SecretCodecs.preservingEncoder // Derive encoders for subtypes that contain secrets implicit val plainLoginEncoder: Encoder[PlainLogin] = deriveConfiguredEncoder implicit val scramLoginEncoder: Encoder[ScramLogin] = deriveConfiguredEncoder implicit val oauthBearerLoginEncoder: Encoder[OAuthBearerLogin] = deriveConfiguredEncoder deriveConfiguredEncoder } /** Format a SASL/JAAS configuration as a Kafka JAAS config string. * * @param config * the SASL/JAAS configuration to format * @param renderSecret * function to render secret values (e.g., redact or expose) * @return * a JAAS configuration string */ private def formatJaasString(config: SaslJaasConfig, renderSecret: Secret => String): String = config match { case PlainLogin(username, password) => s"""org.apache.kafka.common.security.plain.PlainLoginModule required username="$username" password="${renderSecret( password, )}";""" case ScramLogin(username, password) => s"""org.apache.kafka.common.security.scram.ScramLoginModule required username="$username" password="${renderSecret( password, )}";""" case OAuthBearerLogin(clientId, clientSecret, scope, tokenEndpointUrl) => val scopePart = scope.map(s => s""" scope="$s"""").getOrElse("") val tokenUrlPart = tokenEndpointUrl.map(u => s""" sasl.oauthbearer.token.endpoint.url="$u"""").getOrElse("") s"""org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule required clientId="$clientId" clientSecret="${renderSecret( clientSecret, )}"$scopePart$tokenUrlPart;""" } /** Loggable instance for SaslJaasConfig that outputs JAAS format with redacted secrets. * * Produces output in Kafka's native JAAS config string format, making logs directly * comparable to Kafka documentation and examples. Passwords and client secrets are * shown as "****". */ implicit val logSaslJaasConfig: AlwaysSafeLoggable[SaslJaasConfig] = formatJaasString(_, _ => "****") /** Convert a SASL/JAAS configuration to Kafka's JAAS config string format. * * Requires an unsafe access witness to extract the secret values. * * @param config * the SASL/JAAS configuration to convert * @param ev * witness that the caller has acknowledged unsafe access to secrets * @return * a JAAS configuration string suitable for Kafka's `sasl.jaas.config` property */ def toJaasConfigString(config: SaslJaasConfig)(implicit ev: Secret.UnsafeAccess): String = formatJaasString(config, _.unsafeValue) } /** PLAIN authentication mechanism for Kafka SASL. * * Uses simple username/password authentication. The password is transmitted in cleartext * (though typically over TLS), so this mechanism should only be used with SSL/TLS encryption. * * Corresponds to Kafka's `org.apache.kafka.common.security.plain.PlainLoginModule`. * * @param username * SASL username for authentication * @param password * SASL password (redacted in API responses and logs) * @see [[https://kafka.apache.org/41/security/authentication-using-sasl/#authentication-using-saslplain Kafka SASL/PLAIN]] */ final case class PlainLogin( username: String, password: Secret, ) extends SaslJaasConfig object PlainLogin { implicit val encoder: Encoder[PlainLogin] = deriveConfiguredEncoder implicit val decoder: Decoder[PlainLogin] = deriveConfiguredDecoder implicit lazy val schema: Schema[PlainLogin] = Schema.derived } /** SCRAM (Salted Challenge Response Authentication Mechanism) for Kafka SASL. * * A more secure alternative to PLAIN that does not transmit the password in cleartext. * Kafka supports SCRAM-SHA-256 and SCRAM-SHA-512 variants. * * Corresponds to Kafka's `org.apache.kafka.common.security.scram.ScramLoginModule`. * * @param username * SASL username for authentication * @param password * SASL password (redacted in API responses and logs) * @see [[https://kafka.apache.org/41/security/authentication-using-sasl/#authentication-using-saslscram Kafka SASL/SCRAM]] */ final case class ScramLogin( username: String, password: Secret, ) extends SaslJaasConfig object ScramLogin { implicit val encoder: Encoder[ScramLogin] = deriveConfiguredEncoder implicit val decoder: Decoder[ScramLogin] = deriveConfiguredDecoder implicit lazy val schema: Schema[ScramLogin] = Schema.derived } /** OAuth Bearer authentication mechanism for Kafka SASL. * * Uses OAuth 2.0 client credentials flow to obtain access tokens for Kafka authentication. * The client authenticates with the OAuth provider using client ID and secret, then uses * the resulting token to authenticate with Kafka. * * Corresponds to Kafka's `org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule`. * * @param clientId * OAuth 2.0 client identifier * @param clientSecret * OAuth 2.0 client secret (redacted in API responses and logs) * @param scope * Optional OAuth scope(s) to request * @param tokenEndpointUrl * Optional OAuth token endpoint URL (if not using OIDC discovery) * @see [[https://kafka.apache.org/41/security/authentication-using-sasl/#authentication-using-sasloauthbearer Kafka SASL/OAUTHBEARER]] */ final case class OAuthBearerLogin( clientId: String, clientSecret: Secret, scope: Option[String] = None, tokenEndpointUrl: Option[String] = None, ) extends SaslJaasConfig object OAuthBearerLogin { implicit val encoder: Encoder[OAuthBearerLogin] = deriveConfiguredEncoder implicit val decoder: Decoder[OAuthBearerLogin] = deriveConfiguredDecoder implicit lazy val schema: Schema[OAuthBearerLogin] = Schema.derived } ================================================ FILE: api/src/main/scala/com/thatdot/api/v2/ShowShort.scala ================================================ package com.thatdot.api.v2 trait ShowShort[-A] { def showShort(a: A): String } trait ShowShortOps { implicit class ShortShower[A: ShowShort](a: A) { def showShort: String = ShowShort[A].showShort(a) } } object ShowShort { def apply[A](implicit instance: ShowShort[A]): ShowShort[A] = instance implicit def eitherShowShort[A: ShowShort, B: ShowShort]: ShowShort[Either[A, B]] = (eitherAB: Either[A, B]) => eitherAB.fold(ShowShort[A].showShort, ShowShort[B].showShort) implicit def hasErrorsShowShort[A <: HasErrors]: ShowShort[A] = (hasErrors: A) => s"[${hasErrors.errors.map(_.message).mkString(", ")}]" object syntax extends ShowShortOps } ================================================ FILE: api/src/main/scala/com/thatdot/api/v2/SuccessEnvelope.scala ================================================ package com.thatdot.api.v2 import io.circe.generic.extras.Configuration import io.circe.generic.extras.semiauto.{deriveConfiguredDecoder, deriveConfiguredEncoder} import io.circe.{Decoder, Encoder} import sttp.tapir.Schema sealed trait SuccessEnvelope[+Content] sealed trait CreatedOrNoContent[+Content] extends SuccessEnvelope[Content] sealed trait CreatedOrOk[+Content] extends SuccessEnvelope[Content] object SuccessEnvelope { implicit private val defaultConfig: Configuration = Configuration.default.withDefaults case class Ok[Content](content: Content, message: Option[String] = None, warnings: List[String] = Nil) extends SuccessEnvelope[Content] with CreatedOrOk[Content] object Ok { implicit def schema[A](implicit inner: Schema[A]): Schema[Ok[A]] = Schema.derived implicit def encoder[A: Encoder]: Encoder[Ok[A]] = deriveConfiguredEncoder implicit def decoder[A: Decoder]: Decoder[Ok[A]] = deriveConfiguredDecoder } case class Created[Content](content: Content, message: Option[String] = None, warnings: List[String] = Nil) extends SuccessEnvelope[Content] with CreatedOrNoContent[Content] with CreatedOrOk[Content] object Created { implicit def schema[A](implicit inner: Schema[A]): Schema[Created[A]] = Schema.derived implicit def encoder[A: Encoder]: Encoder[Created[A]] = deriveConfiguredEncoder implicit def decoder[A: Decoder]: Decoder[Created[A]] = deriveConfiguredDecoder } case class Accepted( message: String = "Request accepted. Starting to process task.", monitorUrl: Option[String] = None, ) extends SuccessEnvelope[Nothing] object Accepted { implicit lazy val schema: Schema[Accepted] = Schema.derived implicit val encoder: Encoder[Accepted] = deriveConfiguredEncoder implicit val decoder: Decoder[Accepted] = deriveConfiguredDecoder } case object NoContent extends SuccessEnvelope[Nothing] with CreatedOrNoContent[Nothing] { implicit lazy val schema: Schema[NoContent.type] = Schema.derived implicit val encoder: Encoder[NoContent.type] = Encoder.encodeUnit.contramap(_ => ()) implicit val decoder: Decoder[NoContent.type] = Decoder.decodeUnit.map(_ => NoContent) } } ================================================ FILE: api/src/main/scala/com/thatdot/api/v2/V2EndpointDefinitions.scala ================================================ package com.thatdot.api.v2 import java.nio.charset.{Charset, StandardCharsets} import java.util.concurrent.TimeUnit import scala.concurrent.duration.FiniteDuration import scala.concurrent.{ExecutionContext, Future} import scala.util.{Failure, Success} import io.circe.{Decoder, Encoder} import shapeless.ops.coproduct.{Basis, CoproductToEither, Inject} import shapeless.{:+:, CNil, Coproduct} import sttp.tapir.CodecFormat.TextPlain import sttp.tapir.DecodeResult.Value import sttp.tapir._ import com.thatdot.api.v2.ErrorResponse.{BadRequest, ServerError} import com.thatdot.api.v2.ErrorResponseHelpers.{toBadRequest, toServerError} import com.thatdot.api.v2.schema.TapirJsonConfig import com.thatdot.common.logging.Log._ import com.thatdot.common.quineid.QuineId import com.thatdot.quine.model.{Milliseconds, QuineIdProvider} trait V2EndpointDefinitions extends TapirJsonConfig with LazySafeLogging { implicit protected def logConfig: LogConfig type AtTime = Milliseconds // ------- id ---------------- protected def toQuineId(s: String): DecodeResult[QuineId] = idProvider.qidFromPrettyString(s) match { case Success(id) => Value(id) case Failure(_) => DecodeResult.Error(s, new IllegalArgumentException(s"'$s' is not a valid QuineId")) } // TODO Use Tapir Validator IdProvider.validate val idProvider: QuineIdProvider implicit val quineIdCodec: Codec[String, QuineId, TextPlain] = Codec.string.mapDecode(toQuineId)(idProvider.qidToPrettyString) /** Since timestamps get encoded as milliseconds since 1970 in the REST API, * it is necessary to define the serialization/deserialization to/from a long. */ protected def toAtTime(rawTime: Long): DecodeResult[AtTime] = { val now = System.currentTimeMillis if (rawTime > now) DecodeResult.Error(rawTime.toString, new IllegalArgumentException(s"Times in the future are not supported.")) else Value(Milliseconds(rawTime)) } /** Schema for an at time */ implicit val atTimeEndpointCodec: Codec[String, AtTime, TextPlain] = Codec.long.mapDecode(toAtTime)(_.millis) val atTimeParameter: EndpointInput.Query[Option[AtTime]] = query[Option[AtTime]]("atTime") .description( "An integer timestamp in milliseconds since the Unix epoch representing the historical moment to query.", ) // ------ timeout ------------- implicit val timeoutCodec: Codec[String, FiniteDuration, TextPlain] = Codec.long.mapDecode(l => DecodeResult.Value(FiniteDuration(l, TimeUnit.MILLISECONDS)))(_.toMillis) val timeoutParameter: EndpointInput.Query[FiniteDuration] = query[FiniteDuration]("timeout") .description("Milliseconds to wait before the HTTP request times out.") .default(FiniteDuration.apply(20, TimeUnit.SECONDS)) type EndpointBase = Endpoint[Unit, Unit, ServerError, Unit, Any] /** Base for api/v2 endpoints with common errors * * @param basePaths Provided base Paths will be appended in order, i.e. `endpoint("a","b") == /api/v2/a/b` */ def rawEndpoint( basePaths: String*, ): Endpoint[Unit, Unit, Nothing, Unit, Any] = infallibleEndpoint .in(basePaths.foldLeft("api" / "v2")((path, segment) => path / segment)) def yamlBody[T]()(implicit schema: Schema[T], encoder: Encoder[T], decoder: Decoder[T], ): EndpointIO.Body[String, T] = stringBodyAnyFormat(YamlCodec.createCodec[T](), StandardCharsets.UTF_8) def jsonOrYamlBody[T](tOpt: Option[T] = None)(implicit schema: Schema[T], encoder: Encoder[T], decoder: Decoder[T], ): EndpointIO.OneOfBody[T, T] = tOpt match { case None => oneOfBody[T](jsonBody[T], yamlBody[T]()) case Some(t) => oneOfBody[T](jsonBody[T].example(t), yamlBody[T]().example(t)) } def textBody[T](codec: Codec[String, T, TextPlain]): EndpointIO.Body[String, T] = stringBodyAnyFormat(codec, Charset.defaultCharset()) /** Used to produce an endpoint that only has ServerErrors that are caught here. * * - Wraps server logic in tapir endpoints for catching any exception and lifting to ServerError(500 code). */ def recoverServerError[In, Out]( fa: Future[In], )(outToResponse: In => Out): Future[Either[ServerError, Out]] = { implicit val ec: ExecutionContext = ExecutionContext.parasitic fa.map(out => Right(outToResponse(out))).recover(t => Left(toServerError(t))) } /** Recover from errors that could cause the provided future to fail. Errors are represented as any shape Coproduct * * - Wraps server logic in tapir endpoints for catching any exception and lifting to ServerError(500 code). * - Used when the input error type, `Err`, is itself a Coproduct that does not contain ServerError. * - The Left of the output Either will itself be a nested either with all coproduct elements accounted for. * This is used for tapir endpoint definition as the errorOut type * - When the Coproduct has size greater than 2 the tapir Either and CoproductToEither is swapped. * to fix this map the errorOut to be swapped for the endpoint: `_.mapErrorOut(err => err.swap)(err => err.swap)` */ def recoverServerErrorEither[In, Out, Err <: Coproduct]( fa: Future[Either[Err, In]], )(outToResponse: In => Out)(implicit basis: Basis[ServerError :+: Err, Err], c2e: CoproductToEither[ServerError :+: Err], ): Future[Either[c2e.Out, Out]] = { implicit val ec: ExecutionContext = ExecutionContext.parasitic fa.map { case Left(err) => Left(c2e(err.embed[ServerError :+: Err])) case Right(value) => Right(outToResponse(value)) }.recover(t => Left(c2e(Coproduct[ServerError :+: Err](toServerError(t))))) } /** Recover from errors that could cause the provided future to fail. Errors are represented as a Coproduct * with ServerError explicitly the head of the Coproduct `Err` in the provided Future. * * - Wraps server logic in tapir endpoints for catching any exception and lifting to ServerError(500 code). * - Used when the input error type, `Err`, is itself a Coproduct that does contain ServerError * - The Left of the output Either will itself be a nested either with all coproduct elements accounted for. * This is used for tapir endpoint definition as the errorOut type * - When the Coproduct has size greater than 2 the tapir Either and CoproductToEither is swapped. * to fix this map the errorOut to be swapped for the endpoint: `_.mapErrorOut(err => err.swap)(err => err.swap)` */ def recoverServerErrorEitherWithServerError[In, Out, Err <: Coproduct]( fa: Future[Either[ServerError :+: Err, In]], )(outToResponse: In => Out)(implicit basis: Basis[ServerError :+: Err, ServerError :+: Err], c2e: CoproductToEither[ServerError :+: Err], ): Future[Either[c2e.Out, Out]] = { implicit val ec: ExecutionContext = ExecutionContext.parasitic fa.map { case Left(err) => Left(c2e(err.embed[ServerError :+: Err])) case Right(value) => Right(outToResponse(value)) }.recover(t => Left(c2e(Coproduct[ServerError :+: Err](toServerError(t))))) } /** Recover from errors that could cause the provided future to fail. Errors should likely not be represented * as a Coproduct in the input provided Future * * - Wraps server logic in tapir endpoints for catching any exception and lifting to ServerError(500 code). * - Used when the input error type, `Err`, is not a Coproduct itself. * - The Left of the output Either will itself be an Either[ServerError, Err] with all coproduct elements accounted for. * This is used for tapir endpoint definition as the errorOut type */ def recoverServerErrorEitherFlat[In, Out, Err]( fa: Future[Either[Err, In]], )(outToResponse: In => Out)(implicit c2e: CoproductToEither[ServerError :+: Err :+: CNil], ): Future[Either[c2e.Out, Out]] = { implicit val ec: ExecutionContext = ExecutionContext.parasitic fa.map { case Left(err) => Left(c2e(Coproduct[ServerError :+: Err :+: CNil](err))) case Right(value) => Right(outToResponse(value)) }.recover(t => Left(c2e(Coproduct[ServerError :+: Err :+: CNil](toServerError(t))))) } /** Like recoverServerErrorEither but routes IllegalArgumentException to BadRequest. * Use for endpoints where BadRequest is in the error coproduct and user input errors * (like require() failures) should return 400 instead of 500. * * - Wraps server logic in tapir endpoints for catching any exception. * - IllegalArgumentException is lifted to BadRequest (400 code). * - Other exceptions are lifted to ServerError (500 code). * - Used when the input error type, `Err`, is itself a Coproduct that contains BadRequest (BadRequest :+: NotFound :+: CNil). * - The Left of the output Either will itself be a nested either with all coproduct elements accounted for. * This is used for tapir endpoint definition as the errorOut type. */ def recoverServerErrorEitherWithUserError[In, Out, Err <: Coproduct]( fa: Future[Either[Err, In]], )(outToResponse: In => Out)(implicit basisErr: Basis[ServerError :+: Err, Err], injectBadRequest: Inject[Err, BadRequest], c2e: CoproductToEither[ServerError :+: Err], ): Future[Either[c2e.Out, Out]] = { implicit val ec: ExecutionContext = ExecutionContext.parasitic fa.map { case Left(err) => Left(c2e(err.embed[ServerError :+: Err])) case Right(value) => Right(outToResponse(value)) }.recover { case iae: IllegalArgumentException => val badReq = injectBadRequest(toBadRequest(iae)) Left(c2e(badReq.embed[ServerError :+: Err])) case t => Left(c2e(Coproduct[ServerError :+: Err](toServerError(t)))) } } } ================================================ FILE: api/src/main/scala/com/thatdot/api/v2/YamlCodec.scala ================================================ package com.thatdot.api.v2 import io.circe._ import io.circe.syntax._ import sttp.model.MediaType import sttp.tapir.{Codec, CodecFormat, DecodeResult, Schema} case class YamlCodecFormat() extends CodecFormat { override val mediaType: MediaType = MediaType("application", "yaml") } object YamlCodec { def createCodec[T]()(implicit tSchema: Schema[T], encoder: Encoder[T], decoder: Decoder[T], ): Codec[String, T, YamlCodecFormat] = new Codec[String, T, YamlCodecFormat] { override def rawDecode(s: String): DecodeResult[T] = yaml.parser.parse(s).flatMap(_.as[T]) match { case Left(fail: Error) => DecodeResult.Error(s, fail) case Right(t) => DecodeResult.Value[T](t) } override def encode(h: T): String = yaml.Printer(dropNullKeys = true).pretty(h.asJson) override def schema: Schema[T] = tSchema override def format = YamlCodecFormat() } } ================================================ FILE: api/src/main/scala/com/thatdot/api/v2/codec/DisjointEither.scala ================================================ package com.thatdot.api.v2.codec import io.circe.{Decoder, Encoder} /** Evidence that A and B are structurally disjoint in JSON (one is primitive, * one is object, etc.) enabling unambiguous Either encoding without a wrapper object. * * When two types serialize to structurally distinguishable JSON (e.g., a string vs * an object), we can encode `Either[A, B]` directly as either A's or B's JSON * representation, and decode by attempting A first, then B. * * Usage: * {{{ * import com.thatdot.api.v2.codec.DisjointEither.syntax._ * import com.thatdot.api.v2.codec.DisjointEvidence.JsonObjLike * * // Mark your case class as object-like * implicit val myTypeObjLike: JsonObjLike[MyType] = new JsonObjLike[MyType] {} * * // Now Either[String, MyType] has encoder/decoder automatically * val codec: Encoder[Either[String, MyType]] = implicitly * }}} */ sealed trait DisjointEvidence[A, B] object DisjointEvidence { /** Marker for JSON primitive types (String, Int, Boolean, etc.) */ trait JsonPrim[A] /** Marker for JSON array-like types (List, Set, etc.) */ trait JsonListLike[A] /** Marker for JSON object-like types (case classes, Map, etc.) */ trait JsonObjLike[A] // Built-in JsonPrim instances implicit val jsonPrimInt: JsonPrim[Int] = new JsonPrim[Int] {} implicit val jsonPrimString: JsonPrim[String] = new JsonPrim[String] {} implicit val jsonPrimBoolean: JsonPrim[Boolean] = new JsonPrim[Boolean] {} // Built-in JsonListLike instances implicit def jsonListLikeList[A]: JsonListLike[List[A]] = new JsonListLike[List[A]] {} implicit def jsonListLikeSet[A]: JsonListLike[Set[A]] = new JsonListLike[Set[A]] {} // Built-in JsonObjLike instances implicit def jsonObjLikeMap[K, V]: JsonObjLike[Map[K, V]] = new JsonObjLike[Map[K, V]] {} // Disjoint evidence derivations (6 combinations of Prim/List/Obj) implicit def primObj[A: JsonPrim, B: JsonObjLike]: DisjointEvidence[A, B] = new DisjointEvidence[A, B] {} implicit def objPrim[A: JsonObjLike, B: JsonPrim]: DisjointEvidence[A, B] = new DisjointEvidence[A, B] {} implicit def primList[A: JsonPrim, B: JsonListLike]: DisjointEvidence[A, B] = new DisjointEvidence[A, B] {} implicit def listPrim[A: JsonListLike, B: JsonPrim]: DisjointEvidence[A, B] = new DisjointEvidence[A, B] {} implicit def listObj[A: JsonListLike, B: JsonObjLike]: DisjointEvidence[A, B] = new DisjointEvidence[A, B] {} implicit def objList[A: JsonObjLike, B: JsonListLike]: DisjointEvidence[A, B] = new DisjointEvidence[A, B] {} } /** Provides Either codecs when disjointness evidence exists. * * Mix in `DisjointEitherOps` or import `DisjointEither.syntax._` to get * implicit `Encoder[Either[A, B]]` and `Decoder[Either[A, B]]` when * `DisjointEvidence[A, B]` is available. */ object DisjointEither { object syntax extends DisjointEitherOps } trait DisjointEitherOps { implicit def disjointEitherEncoder[A, B](implicit ev: DisjointEvidence[A, B], encodeA: Encoder[A], encodeB: Encoder[B], ): Encoder[Either[A, B]] = { case Left(value) => encodeA(value) case Right(value) => encodeB(value) } implicit def disjointEitherDecoder[A, B](implicit ev: DisjointEvidence[A, B], decodeA: Decoder[A], decodeB: Decoder[B], ): Decoder[Either[A, B]] = decodeA.map(Left(_)).or(decodeB.map(Right(_))) } ================================================ FILE: api/src/main/scala/com/thatdot/api/v2/codec/ThirdPartyCodecs.scala ================================================ package com.thatdot.api.v2.codec import java.nio.charset.Charset import java.time.Instant import scala.util.Try import io.circe.{Decoder, Encoder} /** Circe codecs for third-party types that cannot have implicits in their companion objects. * * Usage: * {{{ * import com.thatdot.api.v2.codec.ThirdPartyCodecs.jdk._ * }}} * * @see [[com.thatdot.api.v2.schema.ThirdPartySchemas]] for Tapir schemas (OpenAPI documentation) */ object ThirdPartyCodecs { /** Circe codecs for JDK types */ object jdk { implicit val charsetEncoder: Encoder[Charset] = Encoder.encodeString.contramap(_.name) implicit val charsetDecoder: Decoder[Charset] = Decoder.decodeString.map(s => Charset.forName(s)) implicit val instantEncoder: Encoder[Instant] = Encoder.encodeString.contramap(_.toString) implicit val instantDecoder: Decoder[Instant] = Decoder.decodeString.emapTry(s => Try(Instant.parse(s))) } } ================================================ FILE: api/src/main/scala/com/thatdot/api/v2/outputs/DestinationSteps.scala ================================================ package com.thatdot.api.v2.outputs import io.circe.{Decoder, Encoder} import com.thatdot.api.v2.{AwsCredentials, AwsRegion, SaslJaasConfig} import com.thatdot.common.security.Secret /** The ADT for shared result destinations. These correspond to the API types in each product, but only exist * for more convenient lowering to an interpreter. It's easier to automatically derive a conversion between * structurally identical case classes than to separately write the lowering function that allocates resources * for the interpreter. * * They also provide a place to define metadata for use in Tapir Schema annotations. */ sealed trait DestinationSteps object DestinationSteps { val title = "Destination Steps" val description = "Steps that transform results on their way to a destination." final case class Drop() extends DestinationSteps object Drop { val title = "Drop" val description = "Effectively no destination at all, this does nothing but forget the data sent to it." } final case class File( path: String, ) extends DestinationSteps // with Format // Return this when prepared to support Protobuf (or more) in File writes object File { val propertyEncodedExampleForPath = "/temp/results.out" val description: String = """Writes each result as a single-line JSON record. |For the format of the result, see "Standing Query Result Output".""".stripMargin val title = "Write JSON to File" } final case class HttpEndpoint( url: String, parallelism: Int = HttpEndpoint.propertyDefaultValueForParallelism, headers: Map[String, Secret] = Map.empty, ) extends DestinationSteps object HttpEndpoint { val propertyEncodedExampleForUrl = "https://results.example.com/result-type" val propertyDefaultValueForParallelism = 8 val propertyDefaultValueForHeaders: Map[String, Secret] = Map.empty val propertyDescriptionForHeaders = "Additional HTTP headers to include in the request. Header values are redacted in API responses." val description = "Makes an HTTP[S] POST for each result. For the format of the result, see \"Standing Query Result Output\"." val title = "POST to HTTP[S] Webhook" } case class KafkaPropertyValue(s: String) extends AnyVal object KafkaPropertyValue { import io.circe.syntax.EncoderOps import sttp.tapir.Schema implicit val encoder: Encoder[KafkaPropertyValue] = Encoder.encodeString.contramap(_.s) implicit val decoder: Decoder[KafkaPropertyValue] = Decoder.decodeString.map(KafkaPropertyValue.apply) implicit val schema: Schema[KafkaPropertyValue] = Schema.string[KafkaPropertyValue] private val exampleKafkaProperties: Map[String, KafkaPropertyValue] = Map( "security.protocol" -> KafkaPropertyValue("SSL"), "ssl.keystore.type" -> KafkaPropertyValue("PEM"), "ssl.keystore.certificate.chain" -> KafkaPropertyValue("/path/to/file/containing/certificate/chain"), "ssl.key.password" -> KafkaPropertyValue("private_key_password"), "ssl.truststore.type" -> KafkaPropertyValue("PEM"), "ssl.truststore.certificates" -> KafkaPropertyValue("/path/to/truststore/certificate"), ) implicit lazy val mapSchema: Schema[Map[String, KafkaPropertyValue]] = Schema .schemaForMap[KafkaPropertyValue](schema) .encodedExample(exampleKafkaProperties.asJson) } final case class Kafka( topic: String, bootstrapServers: String, format: OutputFormat = Kafka.propertyDefaultValueForFormat, sslKeystorePassword: Option[Secret] = None, sslTruststorePassword: Option[Secret] = None, sslKeyPassword: Option[Secret] = None, saslJaasConfig: Option[SaslJaasConfig] = None, kafkaProperties: Map[String, KafkaPropertyValue] = Kafka.propertyDefaultValueForKafkaProperties, ) extends DestinationSteps with Format object Kafka { val propertyEncodedExampleForBootstrapServers = "kafka.svc.cluster.local:9092" val propertyEncodedExampleForTopic = "example-topic" val propertyDefaultValueForFormat: OutputFormat = OutputFormat.JSON val propertyDefaultValueForKafkaProperties: Map[String, KafkaPropertyValue] = Map.empty val propertyDefaultValueEncodedForKafkaProperties: Some[String] = Some("{}") val propertyDescriptionForKafkaProperties: String = """Map of Kafka producer properties. |See """.stripMargin val description = "Publishes provided data to the specified Apache Kafka topic." val title = "Publish to Kafka Topic" } final case class Kinesis( credentials: Option[AwsCredentials], region: Option[AwsRegion], streamName: String, format: OutputFormat = Kinesis.propertyDefaultValueForFormat, kinesisParallelism: Option[Int], kinesisMaxBatchSize: Option[Int], kinesisMaxRecordsPerSecond: Option[Int], kinesisMaxBytesPerSecond: Option[Int], ) extends DestinationSteps with Format object Kinesis { val propertyEncodedExampleForStreamName = "example-stream" val propertyDefaultValueForFormat: OutputFormat = OutputFormat.JSON val description = "Publishes provided data to the specified Amazon Kinesis stream." val title = "Publish to Kinesis Data Stream" } final case class ReactiveStream( address: String = ReactiveStream.propertyDefaultValueForAddress, port: Int, format: OutputFormat, ) extends DestinationSteps with Format object ReactiveStream { val propertyDescriptionForAddress = "The address to bind the reactive stream server on." val propertyDefaultValueForAddress = "localhost" val propertyDescriptionForPort = "The port to bind the reactive stream server on." val description: String = """Broadcasts data to a created Reactive Stream. Other thatDot products can subscribe to Reactive Streams. |⚠️ Warning: Reactive Stream outputs do not function correctly when running in a cluster.""".stripMargin val title = "Broadcast to Reactive Stream" } final case class SNS( credentials: Option[AwsCredentials], region: Option[AwsRegion], topic: String, format: OutputFormat, ) extends DestinationSteps with Format object SNS { val propertyEncodedExampleForTopic = "example-topic" val propertyDescriptionForTopic = "ARN of the topic to publish to." val description: String = """Publishes an AWS SNS record to the provided topic. |⚠️ Double check your credentials and topic ARN! If writing to SNS fails, the write will |be retried indefinitely. If the error is unfixable (e.g., the topic or credentials |cannot be found), the outputs will never be emitted and the Standing Query this output |is attached to may stop running.""".stripMargin // Use StringOps#asOneLine when that is accessible val title = "Publish to SNS Topic" } final case class StandardOut() extends DestinationSteps object StandardOut { val description = "Prints each result as a single-line JSON object to stdout on the application server." val title = "Log JSON to Console" } } ================================================ FILE: api/src/main/scala/com/thatdot/api/v2/outputs/Format.scala ================================================ package com.thatdot.api.v2.outputs trait Format { val format: OutputFormat } ================================================ FILE: api/src/main/scala/com/thatdot/api/v2/outputs/OutputFormat.scala ================================================ package com.thatdot.api.v2.outputs import io.circe.generic.extras.semiauto.{deriveConfiguredDecoder, deriveConfiguredEncoder} import io.circe.{Decoder, Encoder} import sttp.tapir.Schema import sttp.tapir.Schema.annotations.{description, encodedExample, title} import com.thatdot.api.v2.TypeDiscriminatorConfig.instances.circeConfig @title("Result Output Format") sealed trait OutputFormat object OutputFormat { implicit lazy val protobufSchema: Schema[Protobuf] = Schema.derived implicit lazy val schema: Schema[OutputFormat] = Schema.derived @title("JSON") @encodedExample("JSON") case object JSON extends OutputFormat @title("Protobuf") @encodedExample("""{ | "type": "Protobuf", | "schemaUrl": "conf/protobuf-schemas/example_schema.desc", | "typeName": "ExampleType" |}""".stripMargin) final case class Protobuf( @description( "URL (or local filename) of the Protobuf .desc file to load that contains the desired typeName to serialize to", ) @encodedExample("conf/protobuf-schemas/example_schema.desc") schemaUrl: String, @description("message type name to use (from the given .desc file) as the message type") @encodedExample("ExampleType") typeName: String, ) extends OutputFormat implicit val encoder: Encoder[OutputFormat] = deriveConfiguredEncoder implicit val decoder: Decoder[OutputFormat] = deriveConfiguredDecoder } ================================================ FILE: api/src/main/scala/com/thatdot/api/v2/schema/TapirJsonConfig.scala ================================================ package com.thatdot.api.v2.schema import io.circe.Printer import sttp.tapir.json.circe.TapirJsonCirce /** Provides `jsonBody[T]` for endpoint definitions, using overridden settings. */ trait TapirJsonConfig extends TapirJsonCirce { override def jsonPrinter: Printer = TapirJsonConfig.printer } object TapirJsonConfig extends TapirJsonConfig { /** Circe JSON printer that will * - Drop null values from output JSON * - Use no indentation (compact output) */ private val printer: Printer = Printer(dropNullValues = true, indent = "") } ================================================ FILE: api/src/main/scala/com/thatdot/api/v2/schema/ThirdPartySchemas.scala ================================================ package com.thatdot.api.v2.schema import java.nio.charset.Charset import java.time.Instant import scala.util.{Failure, Success, Try} import cats.data.NonEmptyList import io.circe.Json import sttp.tapir.CodecFormat.TextPlain import sttp.tapir.{Codec, DecodeResult, Schema} /** Tapir schemas for third-party types that cannot have implicits in their companion objects. * * Usage: * {{{ * import com.thatdot.api.v2.schema.ThirdPartySchemas.cats._ * import com.thatdot.api.v2.schema.ThirdPartySchemas.circe._ * import com.thatdot.api.v2.schema.ThirdPartySchemas.jdk._ * }}} * * @see [[com.thatdot.api.v2.codec.ThirdPartyCodecs]] for Circe codecs (JSON serialization) */ object ThirdPartySchemas { /** Schemas for `cats` data types */ object cats { implicit def nonEmptyListSchema[A](implicit inner: Schema[A]): Schema[NonEmptyList[A]] = Schema.schemaForIterable[A, List].map(list => NonEmptyList.fromList(list))(_.toList) } /** Schemas for Circe types */ object circe { implicit lazy val jsonSchema: Schema[Json] = Schema.any[Json] implicit lazy val mapStringJsonSchema: Schema[Map[String, Json]] = Schema.schemaForMap[String, Json](identity) implicit lazy val seqJsonSchema: Schema[Seq[Json]] = jsonSchema.asIterable[Seq] implicit lazy val seqSeqJsonSchema: Schema[Seq[Seq[Json]]] = seqJsonSchema.asIterable[Seq] } /** Schemas for JDK types */ object jdk { implicit val charsetCodec: Codec[String, Charset, TextPlain] = Codec.string.mapDecode(s => Try(Charset.forName(s)) match { case Success(charset) => DecodeResult.Value(charset) case Failure(e) => DecodeResult.Error(s"Invalid charset: $s", e) }, )(_.toString) implicit lazy val charsetSchema: Schema[Charset] = charsetCodec.schema implicit val instantCodec: Codec[String, Instant, TextPlain] = Codec.string.mapDecode(s => Try(Instant.parse(s)) match { case Success(instant) => DecodeResult.Value(instant) case Failure(e) => DecodeResult.Error(s"Invalid instant: $s", e) }, )(_.toString) implicit lazy val instantSchema: Schema[Instant] = instantCodec.schema } } ================================================ FILE: api/src/test/scala/com/thatdot/api/codec/SecretCodecsSpec.scala ================================================ package com.thatdot.api.codec import io.circe.Json import io.circe.syntax.EncoderOps import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import com.thatdot.api.codec.SecretCodecs._ import com.thatdot.common.security.Secret class SecretCodecsSpec extends AnyWordSpec with Matchers { "secretEncoder" should { "redact the actual value" in { val secret = Secret("AKIAIOSFODNN7EXAMPLE") secret.asJson shouldBe Json.fromString("Secret(****)") } } "secretDecoder" should { "wrap incoming string and preserve value internally" in { import Secret.Unsafe._ val originalValue = "my-secret-value" val json = Json.fromString(originalValue) val decoded = json.as[Secret].getOrElse(fail("Failed to decode Secret")) decoded.toString shouldBe "Secret(****)" decoded.unsafeValue shouldBe originalValue } } "preservingEncoder" should { "preserve actual credential value" in { import Secret.Unsafe._ val value = "real-credential-value" Secret(value).asJson(preservingEncoder) shouldBe Json.fromString(value) } "produce different output than standard encoder" in { import Secret.Unsafe._ val secret = Secret("credential") secret.asJson(secretEncoder) shouldNot be(secret.asJson(preservingEncoder)) } "preserve value through roundtrip" in { import Secret.Unsafe._ val originalValue = "AKIAIOSFODNN7EXAMPLE" val secret = Secret(originalValue) val json = secret.asJson(preservingEncoder) val decoded = json.as[Secret].getOrElse(fail("Failed to decode Secret")) decoded.unsafeValue shouldBe originalValue } } } ================================================ FILE: api/src/test/scala/com/thatdot/api/v2/ApiErrorsCodecSpec.scala ================================================ package com.thatdot.api.v2 import io.circe.syntax.EncoderOps import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks class ApiErrorsCodecSpec extends AnyFunSuite with Matchers with ScalaCheckDrivenPropertyChecks { import ErrorTypeGenerators.Arbs._ import ErrorResponseGenerators.Arbs._ test("ErrorType.ApiError encodes message field") { forAll { (error: ErrorType) => val json = error.asJson json.hcursor.get[String]("message") shouldBe Right(error.message) } } test("ErrorType.DecodeError encodes optional help field") { forAll { (error: ErrorType.DecodeError) => val json = error.asJson error.help match { case Some(h) => json.hcursor.get[String]("help") shouldBe Right(h) case None => json.hcursor.get[String]("help").isLeft shouldBe true } } } test("ErrorType encodes with type discriminator") { (ErrorType.ApiError("msg"): ErrorType).asJson.hcursor.get[String]("type") shouldBe Right("ApiError") (ErrorType.DecodeError("msg"): ErrorType).asJson.hcursor.get[String]("type") shouldBe Right("DecodeError") (ErrorType.CypherError("msg"): ErrorType).asJson.hcursor.get[String]("type") shouldBe Right("CypherError") } test("ErrorResponse.ServerError encodes errors list") { forAll { (error: ErrorResponse.ServerError) => val json = error.asJson val errorsArray = json.hcursor.downField("errors").focus.flatMap(_.asArray) errorsArray.isDefined shouldBe true errorsArray.get.size shouldBe error.errors.size } } test("ErrorResponse.BadRequest encodes errors list") { forAll { (error: ErrorResponse.BadRequest) => val json = error.asJson val errorsArray = json.hcursor.downField("errors").focus.flatMap(_.asArray) errorsArray.get.size shouldBe error.errors.size } } test("ErrorResponse.NotFound encodes errors list") { forAll { (error: ErrorResponse.NotFound) => val json = error.asJson val errorsArray = json.hcursor.downField("errors").focus.flatMap(_.asArray) errorsArray.get.size shouldBe error.errors.size } } test("ErrorResponse.Unauthorized encodes errors list") { forAll { (error: ErrorResponse.Unauthorized) => val json = error.asJson val errorsArray = json.hcursor.downField("errors").focus.flatMap(_.asArray) errorsArray.get.size shouldBe error.errors.size } } test("ErrorResponse.ServiceUnavailable encodes errors list") { forAll { (error: ErrorResponse.ServiceUnavailable) => val json = error.asJson val errorsArray = json.hcursor.downField("errors").focus.flatMap(_.asArray) errorsArray.get.size shouldBe error.errors.size } } test("ErrorResponse types preserve error content when encoded") { val errorList = List(ErrorType.ApiError("error1"), ErrorType.CypherError("error2")) val serverError = ErrorResponse.ServerError(errorList) val json = serverError.asJson val errorsArray = json.hcursor.downField("errors").focus.flatMap(_.asArray).get errorsArray.size shouldBe 2 errorsArray.head.hcursor.get[String]("message") shouldBe Right("error1") errorsArray.head.hcursor.get[String]("type") shouldBe Right("ApiError") errorsArray(1).hcursor.get[String]("message") shouldBe Right("error2") errorsArray(1).hcursor.get[String]("type") shouldBe Right("CypherError") } } ================================================ FILE: api/src/test/scala/com/thatdot/api/v2/AwsCredentialsCodecSpec.scala ================================================ package com.thatdot.api.v2 import io.circe.Json import io.circe.syntax.EncoderOps import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks import com.thatdot.common.security.Secret class AwsCredentialsCodecSpec extends AnyWordSpec with Matchers with ScalaCheckDrivenPropertyChecks { import AwsGenerators.Arbs._ "AwsCredentials encoder" should { "redact credentials in JSON output" in { val creds = AwsCredentials( accessKeyId = Secret("AKIAIOSFODNN7EXAMPLE"), secretAccessKey = Secret("wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"), ) creds.asJson shouldBe Json.obj( "accessKeyId" -> Json.fromString("Secret(****)"), "secretAccessKey" -> Json.fromString("Secret(****)"), ) } } "AwsCredentials decoder" should { "decode JSON with plain strings" in { import Secret.Unsafe._ val json = Json.obj( "accessKeyId" -> Json.fromString("AKIATEST"), "secretAccessKey" -> Json.fromString("secretkey123"), ) val creds = json.as[AwsCredentials].getOrElse(fail("Failed to decode AwsCredentials")) creds.accessKeyId.unsafeValue shouldBe "AKIATEST" creds.secretAccessKey.unsafeValue shouldBe "secretkey123" } "decode values correctly for any credentials (property-based)" in { import Secret.Unsafe._ forAll { (creds: AwsCredentials) => val originalAccessKey = creds.accessKeyId.unsafeValue val originalSecretKey = creds.secretAccessKey.unsafeValue val inputJson = Json.obj( "accessKeyId" -> Json.fromString(originalAccessKey), "secretAccessKey" -> Json.fromString(originalSecretKey), ) val decoded = inputJson.as[AwsCredentials].getOrElse(fail("Failed to decode AwsCredentials")) decoded.accessKeyId.unsafeValue shouldBe originalAccessKey decoded.secretAccessKey.unsafeValue shouldBe originalSecretKey } } } "AwsCredentials.preservingEncoder" should { "preserve credential values in JSON output" in { import Secret.Unsafe._ val accessKey = "AKIAIOSFODNN8EXAMPLE" val secretKey = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" val creds = AwsCredentials( accessKeyId = Secret(accessKey), secretAccessKey = Secret(secretKey), ) val json = creds.asJson(AwsCredentials.preservingEncoder) json shouldBe Json.obj( "accessKeyId" -> Json.fromString(accessKey), "secretAccessKey" -> Json.fromString(secretKey), ) } "produce different output than standard encoder" in { import Secret.Unsafe._ val accessKey = "AKIA123" val secretKey = "secret456" val creds = AwsCredentials( accessKeyId = Secret(accessKey), secretAccessKey = Secret(secretKey), ) val redacted = creds.asJson val preserved = creds.asJson(AwsCredentials.preservingEncoder) redacted.hcursor.downField("accessKeyId") shouldNot be(preserved.hcursor.downField("accessKeyId")) redacted.hcursor.downField("secretAccessKey") shouldNot be(preserved.hcursor.downField("secretAccessKey")) } "preserve values through roundtrip (property-based)" in { import Secret.Unsafe._ forAll { (creds: AwsCredentials) => val originalAccessKey = creds.accessKeyId.unsafeValue val originalSecretKey = creds.secretAccessKey.unsafeValue val json = creds.asJson(AwsCredentials.preservingEncoder) val decoded = json.as[AwsCredentials].getOrElse(fail("Failed to decode AwsCredentials")) decoded.accessKeyId.unsafeValue shouldBe originalAccessKey decoded.secretAccessKey.unsafeValue shouldBe originalSecretKey } } } } ================================================ FILE: api/src/test/scala/com/thatdot/api/v2/AwsGenerators.scala ================================================ package com.thatdot.api.v2 import org.scalacheck.{Arbitrary, Gen} import com.thatdot.common.security.Secret import com.thatdot.quine.ScalaPrimitiveGenerators object AwsGenerators { import ScalaPrimitiveGenerators.Gens.nonEmptyAlphaNumStr object Gens { val awsCredentials: Gen[AwsCredentials] = for { accessKey <- nonEmptyAlphaNumStr secretKey <- nonEmptyAlphaNumStr } yield AwsCredentials(Secret(accessKey), Secret(secretKey)) val optAwsCredentials: Gen[Option[AwsCredentials]] = Gen.option(awsCredentials) val awsRegion: Gen[AwsRegion] = Gen.oneOf("us-east-1", "us-west-2", "eu-west-1", "ap-northeast-1").map(AwsRegion.apply) val optAwsRegion: Gen[Option[AwsRegion]] = Gen.option(awsRegion) } object Arbs { implicit val arbAwsCredentials: Arbitrary[AwsCredentials] = Arbitrary(Gens.awsCredentials) implicit val arbOptAwsCredentials: Arbitrary[Option[AwsCredentials]] = Arbitrary(Gens.optAwsCredentials) implicit val arbAwsRegion: Arbitrary[AwsRegion] = Arbitrary(Gens.awsRegion) implicit val arbOptAwsRegion: Arbitrary[Option[AwsRegion]] = Arbitrary(Gens.optAwsRegion) } } ================================================ FILE: api/src/test/scala/com/thatdot/api/v2/AwsRegionCodecSpec.scala ================================================ package com.thatdot.api.v2 import io.circe.Json import io.circe.syntax.EncoderOps import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks class AwsRegionCodecSpec extends AnyFunSuite with Matchers with ScalaCheckDrivenPropertyChecks { import AwsGenerators.Arbs._ test("AwsRegion encodes as plain string") { val region = AwsRegion("us-west-2") region.asJson shouldBe Json.fromString("us-west-2") } test("AwsRegion decodes from plain string") { val json = Json.fromString("us-west-2") json.as[AwsRegion] shouldBe Right(AwsRegion("us-west-2")) } test("AwsRegion roundtrips encode/decode") { forAll { (region: AwsRegion) => region.asJson.as[AwsRegion] shouldBe Right(region) } } test("Option[AwsRegion] roundtrips encode/decode") { forAll { (region: Option[AwsRegion]) => region.asJson.as[Option[AwsRegion]] shouldBe Right(region) } } } ================================================ FILE: api/src/test/scala/com/thatdot/api/v2/ErrorResponseGenerators.scala ================================================ package com.thatdot.api.v2 import org.scalacheck.{Arbitrary, Gen} import com.thatdot.quine.ScalaPrimitiveGenerators object ErrorResponseGenerators { import ScalaPrimitiveGenerators.Gens.smallPosNum import ErrorTypeGenerators.Gens.errorType object Gens { val errorList: Gen[List[ErrorType]] = smallPosNum.flatMap(Gen.listOfN(_, errorType)) val serverError: Gen[ErrorResponse.ServerError] = errorList.map(ErrorResponse.ServerError(_)) val badRequest: Gen[ErrorResponse.BadRequest] = errorList.map(ErrorResponse.BadRequest(_)) val notFound: Gen[ErrorResponse.NotFound] = errorList.map(ErrorResponse.NotFound(_)) val unauthorized: Gen[ErrorResponse.Unauthorized] = errorList.map(ErrorResponse.Unauthorized(_)) val serviceUnavailable: Gen[ErrorResponse.ServiceUnavailable] = errorList.map(ErrorResponse.ServiceUnavailable(_)) } object Arbs { implicit val serverError: Arbitrary[ErrorResponse.ServerError] = Arbitrary(Gens.serverError) implicit val badRequest: Arbitrary[ErrorResponse.BadRequest] = Arbitrary(Gens.badRequest) implicit val notFound: Arbitrary[ErrorResponse.NotFound] = Arbitrary(Gens.notFound) implicit val unauthorized: Arbitrary[ErrorResponse.Unauthorized] = Arbitrary(Gens.unauthorized) implicit val serviceUnavailable: Arbitrary[ErrorResponse.ServiceUnavailable] = Arbitrary(Gens.serviceUnavailable) } } ================================================ FILE: api/src/test/scala/com/thatdot/api/v2/ErrorTypeGenerators.scala ================================================ package com.thatdot.api.v2 import org.scalacheck.{Arbitrary, Gen} import com.thatdot.quine.ScalaPrimitiveGenerators object ErrorTypeGenerators { import ScalaPrimitiveGenerators.Gens.{nonEmptyAlphaNumStr, optNonEmptyAlphaNumStr} object Gens { val apiError: Gen[ErrorType.ApiError] = nonEmptyAlphaNumStr.map(ErrorType.ApiError(_)) val decodeError: Gen[ErrorType.DecodeError] = for { message <- nonEmptyAlphaNumStr help <- optNonEmptyAlphaNumStr } yield ErrorType.DecodeError(message, help) val cypherError: Gen[ErrorType.CypherError] = nonEmptyAlphaNumStr.map(ErrorType.CypherError(_)) val errorType: Gen[ErrorType] = Gen.oneOf(apiError, decodeError, cypherError) } object Arbs { implicit val apiError: Arbitrary[ErrorType.ApiError] = Arbitrary(Gens.apiError) implicit val decodeError: Arbitrary[ErrorType.DecodeError] = Arbitrary(Gens.decodeError) implicit val cypherError: Arbitrary[ErrorType.CypherError] = Arbitrary(Gens.cypherError) implicit val errorType: Arbitrary[ErrorType] = Arbitrary(Gens.errorType) } } ================================================ FILE: api/src/test/scala/com/thatdot/api/v2/SaslJaasConfigCodecSpec.scala ================================================ package com.thatdot.api.v2 import io.circe.syntax.EncoderOps import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import com.thatdot.common.security.Secret /** Tests for [[SaslJaasConfig]] Circe codec behavior. * * Verifies that: * - Secret fields (password, clientSecret) are redacted in JSON output * - Non-sensitive fields (username, clientId) are NOT redacted * - Decoder can reconstruct case classes from JSON */ class SaslJaasConfigCodecSpec extends AnyFunSuite with Matchers { test("PlainLogin encoder redacts password") { val login = PlainLogin(username = "alice", password = Secret("test-pw")) val json = login.asJson json.hcursor.get[String]("password") shouldBe Right("Secret(****)") } test("PlainLogin encoder does NOT redact username") { val login = PlainLogin(username = "alice", password = Secret("test-pw")) val json = login.asJson json.hcursor.get[String]("username") shouldBe Right("alice") } test("PlainLogin decoder reconstructs from JSON") { import Secret.Unsafe._ val json = io.circe.parser .parse("""{"username": "alice", "password": "test-pw"}""") .getOrElse(fail("Failed to parse JSON")) val decoded = json.as[PlainLogin].getOrElse(fail("Failed to decode PlainLogin")) decoded.username shouldBe "alice" decoded.password.unsafeValue shouldBe "test-pw" } test("ScramLogin encoder redacts password") { val login = ScramLogin(username = "bob", password = Secret("secret123")) val json = login.asJson json.hcursor.get[String]("password") shouldBe Right("Secret(****)") } test("ScramLogin encoder does NOT redact username") { val login = ScramLogin(username = "bob", password = Secret("secret123")) val json = login.asJson json.hcursor.get[String]("username") shouldBe Right("bob") } test("ScramLogin decoder reconstructs from JSON") { import Secret.Unsafe._ val json = io.circe.parser .parse("""{"username": "bob", "password": "secret123"}""") .getOrElse(fail("Failed to parse JSON")) val decoded = json.as[ScramLogin].getOrElse(fail("Failed to decode ScramLogin")) decoded.username shouldBe "bob" decoded.password.unsafeValue shouldBe "secret123" } test("OAuthBearerLogin encoder redacts clientSecret") { val login = OAuthBearerLogin( clientId = "my-client", clientSecret = Secret("oauth-secret"), scope = Some("read:data"), tokenEndpointUrl = Some("https://auth.example.com/token"), ) val json = login.asJson json.hcursor.get[String]("clientSecret") shouldBe Right("Secret(****)") } test("OAuthBearerLogin encoder does NOT redact clientId") { val login = OAuthBearerLogin( clientId = "my-client", clientSecret = Secret("oauth-secret"), scope = Some("read:data"), tokenEndpointUrl = Some("https://auth.example.com/token"), ) val json = login.asJson json.hcursor.get[String]("clientId") shouldBe Right("my-client") } test("OAuthBearerLogin encoder does NOT redact scope") { val login = OAuthBearerLogin( clientId = "my-client", clientSecret = Secret("oauth-secret"), scope = Some("read:data"), tokenEndpointUrl = None, ) val json = login.asJson json.hcursor.get[Option[String]]("scope") shouldBe Right(Some("read:data")) } test("OAuthBearerLogin encoder does NOT redact tokenEndpointUrl") { val login = OAuthBearerLogin( clientId = "my-client", clientSecret = Secret("oauth-secret"), scope = None, tokenEndpointUrl = Some("https://auth.example.com/token"), ) val json = login.asJson json.hcursor.get[Option[String]]("tokenEndpointUrl") shouldBe Right(Some("https://auth.example.com/token")) } test("OAuthBearerLogin decoder reconstructs from JSON with all fields") { import Secret.Unsafe._ val json = io.circe.parser .parse( """{"clientId": "my-client", "clientSecret": "oauth-secret", "scope": "read:data", "tokenEndpointUrl": "https://auth.example.com/token"}""", ) .getOrElse(fail("Failed to parse JSON")) val decoded = json.as[OAuthBearerLogin].getOrElse(fail("Failed to decode OAuthBearerLogin")) decoded.clientId shouldBe "my-client" decoded.clientSecret.unsafeValue shouldBe "oauth-secret" decoded.scope shouldBe Some("read:data") decoded.tokenEndpointUrl shouldBe Some("https://auth.example.com/token") } test("OAuthBearerLogin decoder applies defaults for optional fields") { import Secret.Unsafe._ val json = io.circe.parser .parse("""{"clientId": "my-client", "clientSecret": "oauth-secret"}""") .getOrElse(fail("Failed to parse JSON")) val decoded = json.as[OAuthBearerLogin].getOrElse(fail("Failed to decode OAuthBearerLogin")) decoded.clientId shouldBe "my-client" decoded.clientSecret.unsafeValue shouldBe "oauth-secret" decoded.scope shouldBe None decoded.tokenEndpointUrl shouldBe None } test("SaslJaasConfig sealed trait encodes with type discriminator") { val plain: SaslJaasConfig = PlainLogin(username = "alice", password = Secret("pw")) val scram: SaslJaasConfig = ScramLogin(username = "bob", password = Secret("pw")) val oauth: SaslJaasConfig = OAuthBearerLogin(clientId = "client", clientSecret = Secret("secret")) plain.asJson.hcursor.get[String]("type") shouldBe Right("PlainLogin") scram.asJson.hcursor.get[String]("type") shouldBe Right("ScramLogin") oauth.asJson.hcursor.get[String]("type") shouldBe Right("OAuthBearerLogin") } test("SaslJaasConfig decoder routes to correct subtype via type discriminator") { import Secret.Unsafe._ val plainJson = io.circe.parser .parse("""{"type": "PlainLogin", "username": "alice", "password": "pw"}""") .getOrElse(fail("Failed to parse JSON")) val decoded = plainJson.as[SaslJaasConfig].getOrElse(fail("Failed to decode SaslJaasConfig")) decoded shouldBe a[PlainLogin] val plain = decoded.asInstanceOf[PlainLogin] plain.username shouldBe "alice" plain.password.unsafeValue shouldBe "pw" } test("toJaasConfigString produces PlainLoginModule JAAS string for PlainLogin") { import Secret.Unsafe._ val login = PlainLogin(username = "alice", password = Secret("my-password")) val jaasString = SaslJaasConfig.toJaasConfigString(login) jaasString shouldBe """org.apache.kafka.common.security.plain.PlainLoginModule required username="alice" password="my-password";""" } test("toJaasConfigString produces ScramLoginModule JAAS string for ScramLogin") { import Secret.Unsafe._ val login = ScramLogin(username = "bob", password = Secret("scram-secret")) val jaasString = SaslJaasConfig.toJaasConfigString(login) jaasString shouldBe """org.apache.kafka.common.security.scram.ScramLoginModule required username="bob" password="scram-secret";""" } test("toJaasConfigString produces OAuthBearerLoginModule JAAS string for OAuthBearerLogin") { import Secret.Unsafe._ val login = OAuthBearerLogin( clientId = "my-client", clientSecret = Secret("oauth-secret"), ) val jaasString = SaslJaasConfig.toJaasConfigString(login) jaasString should include("org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule required") jaasString should include("""clientId="my-client"""") jaasString should include("""clientSecret="oauth-secret"""") jaasString should endWith(";") } test("toJaasConfigString includes scope in OAuthBearerLogin JAAS string when present") { import Secret.Unsafe._ val login = OAuthBearerLogin( clientId = "my-client", clientSecret = Secret("oauth-secret"), scope = Some("read:data write:data"), ) val jaasString = SaslJaasConfig.toJaasConfigString(login) jaasString should include("""scope="read:data write:data"""") } test("toJaasConfigString includes tokenEndpointUrl in OAuthBearerLogin JAAS string when present") { import Secret.Unsafe._ val login = OAuthBearerLogin( clientId = "my-client", clientSecret = Secret("oauth-secret"), tokenEndpointUrl = Some("https://auth.example.com/token"), ) val jaasString = SaslJaasConfig.toJaasConfigString(login) jaasString should include("""sasl.oauthbearer.token.endpoint.url="https://auth.example.com/token"""") } test("preservingEncoder preserves PlainLogin password") { import Secret.Unsafe._ val login: SaslJaasConfig = PlainLogin(username = "alice", password = Secret("real-password")) val encoder = SaslJaasConfig.preservingEncoder val json = encoder(login) json.hcursor.get[String]("password") shouldBe Right("real-password") json.hcursor.get[String]("username") shouldBe Right("alice") } test("preservingEncoder preserves ScramLogin password") { import Secret.Unsafe._ val login: SaslJaasConfig = ScramLogin(username = "bob", password = Secret("scram-secret")) val encoder = SaslJaasConfig.preservingEncoder val json = encoder(login) json.hcursor.get[String]("password") shouldBe Right("scram-secret") json.hcursor.get[String]("username") shouldBe Right("bob") } test("preservingEncoder preserves OAuthBearerLogin clientSecret") { import Secret.Unsafe._ val login: SaslJaasConfig = OAuthBearerLogin( clientId = "my-client", clientSecret = Secret("oauth-secret"), scope = Some("read:data"), ) val encoder = SaslJaasConfig.preservingEncoder val json = encoder(login) json.hcursor.get[String]("clientSecret") shouldBe Right("oauth-secret") json.hcursor.get[String]("clientId") shouldBe Right("my-client") json.hcursor.get[Option[String]]("scope") shouldBe Right(Some("read:data")) } test("preservingEncoder includes type discriminator") { import Secret.Unsafe._ val plain: SaslJaasConfig = PlainLogin(username = "alice", password = Secret("pw")) val scram: SaslJaasConfig = ScramLogin(username = "bob", password = Secret("pw")) val oauth: SaslJaasConfig = OAuthBearerLogin(clientId = "client", clientSecret = Secret("secret")) val encoder = SaslJaasConfig.preservingEncoder encoder(plain).hcursor.get[String]("type") shouldBe Right("PlainLogin") encoder(scram).hcursor.get[String]("type") shouldBe Right("ScramLogin") encoder(oauth).hcursor.get[String]("type") shouldBe Right("OAuthBearerLogin") } } ================================================ FILE: api/src/test/scala/com/thatdot/api/v2/SaslJaasConfigGenerators.scala ================================================ package com.thatdot.api.v2 import org.scalacheck.{Arbitrary, Gen} import com.thatdot.common.security.Secret import com.thatdot.quine.ScalaPrimitiveGenerators object SaslJaasConfigGenerators { import ScalaPrimitiveGenerators.Gens.nonEmptyAlphaNumStr object Gens { // This may be worth putting in into a SecretGenerators, but more likely after we pull quine-common into quine-plus val secret: Gen[Secret] = nonEmptyAlphaNumStr.map(Secret(_)) // This may be worth putting in into a SecretGenerators, but more likely after we pull quine-common into quine-plus val optSecret: Gen[Option[Secret]] = Gen.option(secret) val plainLogin: Gen[PlainLogin] = for { username <- nonEmptyAlphaNumStr password <- secret } yield PlainLogin(username, password) val scramLogin: Gen[ScramLogin] = for { username <- nonEmptyAlphaNumStr password <- secret } yield ScramLogin(username, password) val oauthBearerLogin: Gen[OAuthBearerLogin] = for { clientId <- nonEmptyAlphaNumStr clientSecret <- secret scope <- Gen.option(nonEmptyAlphaNumStr) tokenEndpointUrl <- Gen.option(nonEmptyAlphaNumStr.map(s => s"https://$s.example.com/oauth/token")) } yield OAuthBearerLogin(clientId, clientSecret, scope, tokenEndpointUrl) val saslJaasConfig: Gen[SaslJaasConfig] = Gen.oneOf(plainLogin, scramLogin, oauthBearerLogin) val optSaslJaasConfig: Gen[Option[SaslJaasConfig]] = Gen.option(saslJaasConfig) } object Arbs { implicit val arbSecret: Arbitrary[Secret] = Arbitrary(Gens.secret) implicit val arbOptSecret: Arbitrary[Option[Secret]] = Arbitrary(Gens.optSecret) implicit val arbPlainLogin: Arbitrary[PlainLogin] = Arbitrary(Gens.plainLogin) implicit val arbScramLogin: Arbitrary[ScramLogin] = Arbitrary(Gens.scramLogin) implicit val arbOAuthBearerLogin: Arbitrary[OAuthBearerLogin] = Arbitrary(Gens.oauthBearerLogin) implicit val arbSaslJaasConfig: Arbitrary[SaslJaasConfig] = Arbitrary(Gens.saslJaasConfig) implicit val arbOptSaslJaasConfig: Arbitrary[Option[SaslJaasConfig]] = Arbitrary(Gens.optSaslJaasConfig) } } ================================================ FILE: api/src/test/scala/com/thatdot/api/v2/SaslJaasConfigLoggableSpec.scala ================================================ package com.thatdot.api.v2 import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import com.thatdot.common.security.Secret /** Tests for [[SaslJaasConfig]] Loggable instance behavior. * * Verifies that: * - Sensitive fields (password, clientSecret) are redacted as "****" in logged output * - Non-sensitive fields (username, clientId, scope, tokenEndpointUrl) are visible * - The format matches the expected pattern for each subtype */ class SaslJaasConfigLoggableSpec extends AnyFunSuite with Matchers { import SaslJaasConfig.logSaslJaasConfig test("PlainLogin logs in JAAS format with username visible and password redacted") { val login = PlainLogin(username = "alice", password = Secret("jaas-queen")) val logged = logSaslJaasConfig.safe(login) logged shouldBe """org.apache.kafka.common.security.plain.PlainLoginModule required username="alice" password="****";""" } test("ScramLogin logs in JAAS format with username visible and password redacted") { val login = ScramLogin(username = "bob", password = Secret("scram-secret")) val logged = logSaslJaasConfig.safe(login) logged shouldBe """org.apache.kafka.common.security.scram.ScramLoginModule required username="bob" password="****";""" } test("OAuthBearerLogin logs in JAAS format with clientId visible and clientSecret redacted") { val login = OAuthBearerLogin( clientId = "my-client", clientSecret = Secret("oauth-secret"), scope = Some("read:data"), tokenEndpointUrl = Some("https://auth.example.com/token"), ) val logged = logSaslJaasConfig.safe(login) logged shouldBe """org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule required clientId="my-client" clientSecret="****" scope="read:data" sasl.oauthbearer.token.endpoint.url="https://auth.example.com/token";""" } test("OAuthBearerLogin logs in JAAS format without optional fields when absent") { val login = OAuthBearerLogin( clientId = "my-client", clientSecret = Secret("oauth-secret"), ) val logged = logSaslJaasConfig.safe(login) logged shouldBe """org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule required clientId="my-client" clientSecret="****";""" } test("PlainLogin password is indistinguishable regardless of actual value") { val login1 = PlainLogin(username = "alice", password = Secret("password1")) val login2 = PlainLogin(username = "alice", password = Secret("different-password")) val logged1 = logSaslJaasConfig.safe(login1) val logged2 = logSaslJaasConfig.safe(login2) logged1 shouldBe logged2 } test("ScramLogin password is indistinguishable regardless of actual value") { val login1 = ScramLogin(username = "bob", password = Secret("password1")) val login2 = ScramLogin(username = "bob", password = Secret("different-password")) val logged1 = logSaslJaasConfig.safe(login1) val logged2 = logSaslJaasConfig.safe(login2) logged1 shouldBe logged2 } test("OAuthBearerLogin clientSecret is indistinguishable regardless of actual value") { val login1 = OAuthBearerLogin(clientId = "client", clientSecret = Secret("secret1")) val login2 = OAuthBearerLogin(clientId = "client", clientSecret = Secret("different-secret")) val logged1 = logSaslJaasConfig.safe(login1) val logged2 = logSaslJaasConfig.safe(login2) logged1 shouldBe logged2 } } ================================================ FILE: api/src/test/scala/com/thatdot/api/v2/SuccessEnvelopeCodecSpec.scala ================================================ package com.thatdot.api.v2 import io.circe.syntax.EncoderOps import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks class SuccessEnvelopeCodecSpec extends AnyFunSuite with Matchers with ScalaCheckDrivenPropertyChecks { import SuccessEnvelopeGenerators.Arbs._ test("SuccessEnvelope.Ok encodes content field") { forAll { (envelope: SuccessEnvelope.Ok[String]) => val json = envelope.asJson json.hcursor.get[String]("content") shouldBe Right(envelope.content) } } test("SuccessEnvelope.Ok encodes optional message field") { forAll { (envelope: SuccessEnvelope.Ok[String]) => val json = envelope.asJson envelope.message match { case Some(msg) => json.hcursor.get[String]("message") shouldBe Right(msg) case None => json.hcursor.get[Option[String]]("message") shouldBe Right(None) } } } test("SuccessEnvelope.Ok encodes warnings list") { forAll { (envelope: SuccessEnvelope.Ok[String]) => val json = envelope.asJson json.hcursor.get[List[String]]("warnings") shouldBe Right(envelope.warnings) } } test("SuccessEnvelope.Ok roundtrips encode/decode") { forAll { (envelope: SuccessEnvelope.Ok[String]) => val json = envelope.asJson val decoded = json.as[SuccessEnvelope.Ok[String]] decoded shouldBe Right(envelope) } } test("SuccessEnvelope.Created encodes content field") { forAll { (envelope: SuccessEnvelope.Created[String]) => val json = envelope.asJson json.hcursor.get[String]("content") shouldBe Right(envelope.content) } } test("SuccessEnvelope.Created encodes optional message field") { forAll { (envelope: SuccessEnvelope.Created[String]) => val json = envelope.asJson envelope.message match { case Some(msg) => json.hcursor.get[String]("message") shouldBe Right(msg) case None => json.hcursor.get[Option[String]]("message") shouldBe Right(None) } } } test("SuccessEnvelope.Created encodes warnings list") { forAll { (envelope: SuccessEnvelope.Created[String]) => val json = envelope.asJson json.hcursor.get[List[String]]("warnings") shouldBe Right(envelope.warnings) } } test("SuccessEnvelope.Created roundtrips encode/decode") { forAll { (envelope: SuccessEnvelope.Created[String]) => val json = envelope.asJson val decoded = json.as[SuccessEnvelope.Created[String]] decoded shouldBe Right(envelope) } } test("SuccessEnvelope.Accepted encodes message field") { forAll { (envelope: SuccessEnvelope.Accepted) => val json = envelope.asJson json.hcursor.get[String]("message") shouldBe Right(envelope.message) } } test("SuccessEnvelope.Accepted encodes optional monitorUrl field") { forAll { (envelope: SuccessEnvelope.Accepted) => val json = envelope.asJson envelope.monitorUrl match { case Some(url) => json.hcursor.get[String]("monitorUrl") shouldBe Right(url) case None => json.hcursor.get[Option[String]]("monitorUrl") shouldBe Right(None) } } } test("SuccessEnvelope.Accepted roundtrips encode/decode") { forAll { (envelope: SuccessEnvelope.Accepted) => val json = envelope.asJson val decoded = json.as[SuccessEnvelope.Accepted] decoded shouldBe Right(envelope) } } test("SuccessEnvelope.NoContent encodes to unit-like JSON") { val json = SuccessEnvelope.NoContent.asJson // NoContent is encoded as unit, which is an empty object json shouldBe io.circe.Json.obj() } test("SuccessEnvelope.NoContent roundtrips encode/decode") { val json = SuccessEnvelope.NoContent.asJson val decoded = json.as[SuccessEnvelope.NoContent.type] decoded shouldBe Right(SuccessEnvelope.NoContent) } test("SuccessEnvelope.Ok works with Int content") { forAll { (envelope: SuccessEnvelope.Ok[Int]) => val json = envelope.asJson json.hcursor.get[Int]("content") shouldBe Right(envelope.content) json.as[SuccessEnvelope.Ok[Int]] shouldBe Right(envelope) } } test("SuccessEnvelope.Ok works with List[String] content") { forAll { (envelope: SuccessEnvelope.Ok[List[String]]) => val json = envelope.asJson json.as[SuccessEnvelope.Ok[List[String]]] shouldBe Right(envelope) } } test("SuccessEnvelope.Ok encoder outputs all fields including defaults") { val envelope = SuccessEnvelope.Ok("test", None, Nil) val json = envelope.asJson json.hcursor.get[String]("content") shouldBe Right("test") json.hcursor.get[Option[String]]("message") shouldBe Right(None) json.hcursor.get[List[String]]("warnings") shouldBe Right(Nil) } test("SuccessEnvelope.Created encoder outputs all fields including defaults") { val envelope = SuccessEnvelope.Created("test", None, Nil) val json = envelope.asJson json.hcursor.get[String]("content") shouldBe Right("test") json.hcursor.get[Option[String]]("message") shouldBe Right(None) json.hcursor.get[List[String]]("warnings") shouldBe Right(Nil) } test("SuccessEnvelope.Accepted decodes from minimal JSON with defaults applied") { val minimalJson = io.circe.Json.obj() val decoded = minimalJson.as[SuccessEnvelope.Accepted] decoded shouldBe Right(SuccessEnvelope.Accepted()) } } ================================================ FILE: api/src/test/scala/com/thatdot/api/v2/SuccessEnvelopeGenerators.scala ================================================ package com.thatdot.api.v2 import org.scalacheck.{Arbitrary, Gen} import com.thatdot.quine.ScalaPrimitiveGenerators object SuccessEnvelopeGenerators { import ScalaPrimitiveGenerators.Gens._ object Gens { val warnings: Gen[List[String]] = smallNonNegNum.flatMap(Gen.listOfN(_, nonEmptyAlphaStr)) def ok[A](contentGen: Gen[A]): Gen[SuccessEnvelope.Ok[A]] = for { content <- contentGen message <- optNonEmptyAlphaStr warns <- warnings } yield SuccessEnvelope.Ok(content, message, warns) def created[A](contentGen: Gen[A]): Gen[SuccessEnvelope.Created[A]] = for { content <- contentGen message <- optNonEmptyAlphaStr warns <- warnings } yield SuccessEnvelope.Created(content, message, warns) val accepted: Gen[SuccessEnvelope.Accepted] = for { message <- nonEmptyAlphaStr monitorUrl <- optNonEmptyAlphaStr } yield SuccessEnvelope.Accepted(message, monitorUrl) } object Arbs { implicit def okArb[A](implicit arbA: Arbitrary[A]): Arbitrary[SuccessEnvelope.Ok[A]] = Arbitrary(Gens.ok(arbA.arbitrary)) implicit def createdArb[A](implicit arbA: Arbitrary[A]): Arbitrary[SuccessEnvelope.Created[A]] = Arbitrary(Gens.created(arbA.arbitrary)) implicit val acceptedArb: Arbitrary[SuccessEnvelope.Accepted] = Arbitrary(Gens.accepted) } } ================================================ FILE: api/src/test/scala/com/thatdot/quine/JsonGenerators.scala ================================================ package com.thatdot.quine import io.circe.Json import org.scalacheck.{Arbitrary, Gen} object JsonGenerators { import ScalaPrimitiveGenerators.Gens.{nonEmptyAlphaStr, smallNonNegNum, smallPosNum} object Gens { val nonNullPrimitive: Gen[Json] = Gen.oneOf( Arbitrary.arbBool.arbitrary.map(Json.fromBoolean), Arbitrary.arbLong.arbitrary.map(Json.fromLong), Arbitrary.arbDouble.arbitrary.map(Json.fromDoubleOrNull), Arbitrary.arbString.arbitrary.map(Json.fromString), ) val primitive: Gen[Json] = Gen.oneOf(Gen.const(Json.Null), nonNullPrimitive) def dictionaryOfSize(size: Int): Gen[Map[String, Json]] = Gen.mapOfN(size, Gen.zip(nonEmptyAlphaStr, primitive)) val dictionary: Gen[Map[String, Json]] = smallNonNegNum.flatMap(dictionaryOfSize) val nonEmptyDictionary: Gen[Map[String, Json]] = smallPosNum.flatMap(dictionaryOfSize) val sizedDictionary: Gen[Map[String, Json]] = Gen.sized(dictionaryOfSize) } object Arbs { implicit val primitive: Arbitrary[Json] = Arbitrary(Gens.primitive) implicit val dictionary: Arbitrary[Map[String, Json]] = Arbitrary(Gens.dictionary) } } ================================================ FILE: api/src/test/scala/com/thatdot/quine/ScalaPrimitiveGenerators.scala ================================================ package com.thatdot.quine import org.scalacheck.{Arbitrary, Gen} /** Popular primitive-based generators (no `Arbs`; would conflict with ScalaCheck's). */ object ScalaPrimitiveGenerators { object Gens { val bool: Gen[Boolean] = Arbitrary.arbitrary[Boolean] val smallNonNegNum: Gen[Int] = Gen.chooseNum(0, 10) val smallPosNum: Gen[Int] = Gen.chooseNum(1, 10) val mediumNonNegNum: Gen[Int] = Gen.chooseNum(0, 1000) val mediumPosNum: Gen[Int] = Gen.chooseNum(1, 1000) val largePosNum: Gen[Int] = Gen.chooseNum(1, 1000000) val port: Gen[Int] = Gen.choose(1, 65535) val mediumPosLong: Gen[Long] = Gen.chooseNum(1L, 10000L) val largeNonNegLong: Gen[Long] = Gen.chooseNum(0L, 1000000L) val largePosLong: Gen[Long] = Gen.chooseNum(1L, 1000000L) val unitInterval: Gen[Double] = Gen.chooseNum(0.0, 1.0) val percentage: Gen[Double] = Gen.choose(0.0, 100.0) val mediumNonNegDouble: Gen[Double] = Gen.chooseNum(0.0, 1000.0) /** Generates positive integers within the range representable by `2^pow` bits (`1` to `2^pow - 1`). * * @param pow the "power" (exponent) of base-2 from which a bit range may be derived (e.g. `7` yields `2^7` or `128` bits) * @return an integer between `1` and `2^pow - 1` */ def numWithinBits(pow: Int): Gen[Int] = Gen.chooseNum(1, (1 << pow) - 1) val nonEmptyAlphaStr: Gen[String] = Gen.nonEmptyListOf(Gen.alphaChar).map(_.mkString) val nonEmptyAlphaNumStr: Gen[String] = Gen.nonEmptyListOf(Gen.alphaNumChar).map(_.mkString) val optNonEmptyAlphaStr: Gen[Option[String]] = Gen.option(nonEmptyAlphaStr) val optNonEmptyAlphaNumStr: Gen[Option[String]] = Gen.option(nonEmptyAlphaNumStr) } } ================================================ FILE: api/src/test/scala/com/thatdot/quine/TimeGenerators.scala ================================================ package com.thatdot.quine import java.time.Instant import org.scalacheck.{Arbitrary, Gen} object TimeGenerators { object Gens { /** Generates timestamps from the full possible range. */ val instant: Gen[Instant] = Arbitrary.arbLong.arbitrary.map(Instant.ofEpochMilli) /** Generates timestamps within a specified range. * * @param from * Optional start of range. Uses `Instant.now()` if not provided and `to` is provided. * @param to * Optional end of range. Uses `Instant.now()` if not provided and `from` is provided. * @return * A generator for Instants within the range. If neither bound is provided, returns the full-range [[instant]] * generator. */ def instantWithinRange(from: Option[Instant] = None, to: Option[Instant] = None): Gen[Instant] = (from, to) match { case (Some(f), Some(t)) => Gen.chooseNum(f.toEpochMilli, t.toEpochMilli).map(Instant.ofEpochMilli) case (Some(f), None) => Gen.chooseNum(f.toEpochMilli, Instant.now().toEpochMilli).map(Instant.ofEpochMilli) case (None, Some(t)) => Gen.chooseNum(Instant.now().toEpochMilli, t.toEpochMilli).map(Instant.ofEpochMilli) case (None, None) => instant } } object Arbs { implicit val arbInstant: Arbitrary[Instant] = Arbitrary(Gens.instant) } } ================================================ FILE: aws/src/main/scala/com/thatdot/aws/model/AwsCredentials.scala ================================================ package com.thatdot.aws.model import com.thatdot.common.security.Secret final case class AwsCredentials(accessKeyId: Secret, secretAccessKey: Secret) ================================================ FILE: aws/src/main/scala/com/thatdot/aws/model/AwsRegion.scala ================================================ package com.thatdot.aws.model final case class AwsRegion(region: String) ================================================ FILE: aws/src/main/scala/com/thatdot/aws/util/AwsOps.scala ================================================ package com.thatdot.aws.util import scala.reflect.{ClassTag, classTag} import software.amazon.awssdk.auth.credentials.{ AwsBasicCredentials, AwsCredentialsProvider, DefaultCredentialsProvider, StaticCredentialsProvider, } import software.amazon.awssdk.awscore.client.builder.AwsClientBuilder import software.amazon.awssdk.regions.Region import com.thatdot.aws.model._ import com.thatdot.common.logging.Log._ import com.thatdot.common.security.Secret case object AwsOps extends LazySafeLogging { // the maximum number of simultaneous API requests any individual AWS client should make // invariant: all AWS clients using HTTP will set this as a maximum concurrency value val httpConcurrencyPerClient = 100 def staticCredentialsProviderV2(credsOpt: Option[AwsCredentials]): AwsCredentialsProvider = credsOpt.fold[AwsCredentialsProvider](DefaultCredentialsProvider.builder.build) { credentials => import Secret.Unsafe._ StaticCredentialsProvider.create( AwsBasicCredentials.create(credentials.accessKeyId.unsafeValue, credentials.secretAccessKey.unsafeValue), ) } implicit class AwsBuilderOps[Client: ClassTag, Builder <: AwsClientBuilder[Builder, Client]]( builder: AwsClientBuilder[Builder, Client], ) { /** Credentials to use for this AWS client. If provided, these will be used explicitly. * If absent, credentials will be inferred from the environment according to AWS's DefaultCredentialsProvider * This may have security implications! Ensure your environment only contains environment variables, * java system properties, aws credentials files, and instance profile credentials you trust! * * @see https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html#credentials-default * * If you are deploying on EC2 and do NOT wish to use EC2 container metadata/credentials, ensure the java property * `aws.disableEc2Metadata` is set to true, or the environment variable AWS_EC2_METADATA_DISABLED is set to true. * Note that this will also disable region lookup, and thus require all AWS client constructions to explicitly set * credentials. * * @param credsOpt if set, aws credentials to use explicitly * @return */ def credentialsV2(credsOpt: Option[AwsCredentials]): Builder = { val creds = credsOpt.orElse { logger.info( safe"""No AWS credentials provided while building AWS client of type |${Safe(classTag[Client].runtimeClass.getSimpleName)}. Defaulting |to environmental credentials.""".cleanLines, ) None } builder.credentialsProvider(staticCredentialsProviderV2(creds)) } def regionV2(regionOpt: Option[AwsRegion]): Builder = regionOpt.fold { logger.info( safe"""No AWS region provided while building AWS client of type: |${Safe(classTag[Client].runtimeClass.getSimpleName)}. |Defaulting to environmental settings.""".cleanLines, ) builder.applyMutation(_ => ()) // return the builder unmodified }(region => builder.region(Region.of(region.region))) } } ================================================ FILE: aws/src/test/scala/com/thatdot/aws/util/AwsOpsSpec.scala ================================================ package com.thatdot.aws.util import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider import com.thatdot.aws.model.AwsCredentials import com.thatdot.common.security.Secret class AwsOpsSpec extends AnyWordSpec with Matchers { "staticCredentialsProviderV2" should { "extract actual Secret values for SDK usage" in { val accessKeyId = "AKIAIOSFODNN7EXAMPLE" val secretAccessKey = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" val credentials = AwsCredentials( accessKeyId = Secret(accessKeyId), secretAccessKey = Secret(secretAccessKey), ) val provider = AwsOps.staticCredentialsProviderV2(Some(credentials)) val resolved = provider.resolveCredentials() resolved.accessKeyId() shouldBe accessKeyId resolved.secretAccessKey() shouldBe secretAccessKey } "return DefaultCredentialsProvider when credentials are None" in { val provider = AwsOps.staticCredentialsProviderV2(None) provider shouldBe a[DefaultCredentialsProvider] } "preserve credential values through Secret wrapper" in { val testCases = Seq( ("AKIA123", "secret123"), ("AKIASPECIAL!@#$%", "secret/with+special=chars"), ("A" * 20, "B" * 40), ) for ((accessKey, secretKey) <- testCases) { val credentials = AwsCredentials(Secret(accessKey), Secret(secretKey)) val provider = AwsOps.staticCredentialsProviderV2(Some(credentials)) val resolved = provider.resolveCredentials() withClue(s"For accessKey=$accessKey, secretKey=$secretKey: ") { resolved.accessKeyId() shouldBe accessKey resolved.secretAccessKey() shouldBe secretKey } } } } } ================================================ FILE: build.sbt ================================================ import Dependencies.* import scalajsbundler.util.JSON._ import QuineSettings.* ThisBuild / resolvers += "thatDot maven" at "https://s3.us-west-2.amazonaws.com/com.thatdot.dependencies/release/" ThisBuild / scalaVersion := scalaV addCommandAlias("fmtall", "; scalafmtAll; scalafmtSbt") addCommandAlias("fixall", "; scalafixAll; fmtall") ThisBuild / evictionErrorLevel := Level.Info Global / concurrentRestrictions := Seq( Tags.limit(Tags.Test, 1), ) // Core streaming graph interpreter lazy val `quine-core`: Project = project .settings(commonSettings) .dependsOn(`quine-language`) .settings( libraryDependencies ++= Seq( "org.graalvm.js" % "js" % graalV, "com.chuusai" %% "shapeless" % shapelessV, "org.apache.pekko" %% "pekko-actor" % pekkoV, "org.apache.pekko" %% "pekko-stream" % pekkoV, "org.apache.pekko" %% "pekko-slf4j" % pekkoV, "com.typesafe.scala-logging" %% "scala-logging" % scalaLoggingV, "io.dropwizard.metrics" % "metrics-core" % dropwizardMetricsV, "io.circe" %% "circe-parser" % circeV, "org.msgpack" % "msgpack-core" % msgPackV, "org.apache.commons" % "commons-text" % commonsTextV, "com.github.blemale" %% "scaffeine" % scaffeineV, "io.github.hakky54" % "ayza" % ayzaV, "org.typelevel" %% "cats-core" % catsV, "org.typelevel" %% "cats-effect" % catsEffectV, "com.thatdot" %% "quine-id" % quineCommonV, "com.lihaoyi" %% "pprint" % pprintV, "commons-codec" % "commons-codec" % commonsCodecV, // Testing "org.scalatest" %% "scalatest" % scalaTestV % Test, "org.scalacheck" %% "scalacheck" % scalaCheckV % Test, "org.scalatestplus" %% "scalacheck-1-17" % scalaTestScalaCheckV % Test, "org.apache.pekko" %% "pekko-testkit" % pekkoTestkitV % Test, "ch.qos.logback" % "logback-classic" % logbackV % Test, "commons-io" % "commons-io" % commonsIoV % Test, ), // Compile different files depending on scala version Compile / unmanagedSourceDirectories += { (Compile / sourceDirectory).value / "scala-2.13" }, addCompilerPlugin("org.typelevel" %% "kind-projector" % kindProjectorV cross CrossVersion.full), // Uncomment the following 2 lines to generate flamegraphs for the project's compilation in target/scala-2.13/classes/META-INF // (look for `.flamegraph` files -- these may be imported into intellij profiler or flamegraph.pl) // ThisBuild / scalacOptions += "-Vstatistics", // addCompilerPlugin("ch.epfl.scala" %% "scalac-profiling" % "1.1.0-RC3" cross CrossVersion.full) ) .enablePlugins(BuildInfoPlugin, FlatcPlugin) .settings( // Allow BuildInfo to be cached on `-DIRTY` versions, to avoid recompilation during development buildInfoOptions := (if (git.gitUncommittedChanges.value) Seq() else Seq(BuildInfoOption.BuildTime)), buildInfoKeys := Seq[BuildInfoKey]( version, git.gitHeadCommit, git.gitUncommittedChanges, git.gitHeadCommitDate, BuildInfoKey.action("javaVmName")(scala.util.Properties.javaVmName), BuildInfoKey.action("javaVendor")(scala.util.Properties.javaVendor), BuildInfoKey.action("javaVersion")(scala.util.Properties.javaVersion), ), buildInfoPackage := "com.thatdot.quine", ) // Quine Language - Cypher parser and language services lazy val `quine-language`: Project = project .settings(commonSettings) .enablePlugins(Antlr4Plugin) .settings( libraryDependencies ++= Seq( "org.antlr" % "antlr4-runtime" % antlr4RuntimeV, "org.typelevel" %% "cats-effect" % catsEffectV, "org.eclipse.lsp4j" % "org.eclipse.lsp4j" % lsp4jV, "com.chuusai" %% "shapeless" % shapelessV, "com.google.guava" % "guava" % guavaV, "com.47deg" %% "memeid4s" % memeid4sV, "com.thatdot" %% "quine-id" % quineCommonV, "com.thatdot" %% "quine-utils" % quineCommonV, // Testing "org.scalameta" %% "munit" % munitV % Test, ), Antlr4 / antlr4PackageName := Some("com.thatdot.quine.cypher.parsing"), Antlr4 / antlr4Version := antlr4RuntimeV, Antlr4 / antlr4GenListener := false, Antlr4 / antlr4GenVisitor := true, testFrameworks += new TestFramework("munit.Framework"), addCompilerPlugin("org.typelevel" %% "kind-projector" % kindProjectorV cross CrossVersion.full), ) lazy val `quine-serialization`: Project = project .settings(commonSettings) .dependsOn( `data`, `quine-core` % "compile->compile;test->test", ) .settings( libraryDependencies ++= Seq( "com.google.api.grpc" % "proto-google-common-protos" % protobufCommonV, "com.google.protobuf" % "protobuf-java" % protobufV, "software.amazon.glue" % "schema-registry-serde" % amazonGlueV, // for its protobuf DynamicSchema utility // Glue->AWS Netty Client->Netty, which has some CVEs. Glue 1.1.27 has vulnerable Netty; override to safe AWS SDK. "software.amazon.awssdk" % "netty-nio-client" % awsSdkV, "org.apache.avro" % "avro" % avroV, "org.endpoints4s" %%% "json-schema-generic" % endpoints4sDefaultV, "org.endpoints4s" %%% "json-schema-circe" % endpoints4sCirceV, ), ) // MapDB implementation of a Quine persistor lazy val `quine-mapdb-persistor`: Project = project .settings(commonSettings) .dependsOn(`quine-core` % "compile->compile;test->test") .settings( /* `net.jpountz.lz4:lz4` was moved to `org.lz4:lz4-java`, then to * `at.yawk.lz4:lz4-java` (the maintained fork). MapDB still depends on the * old coordinates, so we exclude the old JAR and pull in the current one. */ libraryDependencies ++= Seq( ("org.mapdb" % "mapdb" % mapDbV).exclude("net.jpountz.lz4", "lz4"), "at.yawk.lz4" % "lz4-java" % lz4JavaV, ), ) // RocksDB implementation of a Quine persistor lazy val `quine-rocksdb-persistor`: Project = project .settings(commonSettings) .dependsOn(`quine-core` % "compile->compile;test->test") .settings( libraryDependencies ++= Seq( "org.rocksdb" % "rocksdbjni" % rocksdbV, ), ) // Cassandra implementation of a Quine persistor lazy val `quine-cassandra-persistor`: Project = project .configs(Integration) .settings(commonSettings, integrationSettings) .dependsOn(`quine-core` % "compile->compile;test->test") .enablePlugins(spray.boilerplate.BoilerplatePlugin) .settings( libraryDependencies ++= Seq( "org.typelevel" %% "cats-core" % catsV, "org.apache.cassandra" % "java-driver-query-builder" % cassandraClientV, // The org name for the Cassandra java-driver was changed from com.datastax.oss to org.apache.cassandra // The sigv4-auth plugin specifies a dep on com.datastax.oss, SBT doesn't know that our org.apache.cassandra // dep is supposed to be the replacement for that, and includes both on the classpath, which then conflict // at the sbt-assembly step (because they both have the same package names internally). "software.aws.mcs" % "aws-sigv4-auth-cassandra-java-driver-plugin" % sigv4AuthCassandraPluginV exclude ("com.datastax.oss", "java-driver-core"), "software.amazon.awssdk" % "sts" % awsSdkV, "com.github.nosan" % "embedded-cassandra" % embeddedCassandraV % Test, ), ) // Parser and interpreter for a subset of [Gremlin](https://tinkerpop.apache.org/gremlin.html) lazy val `quine-gremlin`: Project = project .settings(commonSettings) .dependsOn(`quine-core` % "compile->compile;test->test") .settings( libraryDependencies ++= Seq( "org.scala-lang.modules" %% "scala-parser-combinators" % scalaParserCombinatorsV, "org.apache.commons" % "commons-text" % commonsTextV, "org.scalatest" %% "scalatest" % scalaTestV % Test, ), ) // Compiler for compiling [Cypher](https://neo4j.com/docs/cypher-manual/current/) into Quine queries lazy val `quine-cypher`: Project = project .settings(commonSettings) .dependsOn(`quine-core` % "compile->compile;test->test") .settings( scalacOptions ++= Seq( "-language:reflectiveCalls", "-Xlog-implicits", ), libraryDependencies ++= Seq( "com.thatdot.opencypher" %% "expressions" % openCypherV, "com.thatdot.opencypher" %% "front-end" % openCypherV, "com.thatdot.opencypher" %% "opencypher-cypher-ast-factory" % openCypherV, "com.thatdot.opencypher" %% "util" % openCypherV, "org.typelevel" %% "cats-core" % catsV, "org.scalatest" %% "scalatest" % scalaTestV % Test, "org.apache.pekko" %% "pekko-stream-testkit" % pekkoV % Test, ), addCompilerPlugin("org.typelevel" % "kind-projector" % kindProjectorV cross CrossVersion.full), addCompilerPlugin("com.olegpy" %% "better-monadic-for" % betterMonadicForV), ) /* * Version 7.5.1. It is expected that `Network` and `DataSet` are available under * A globally available `vis` object, as with * * ```html * * ``` * * Thanks to [`scala-js-ts-importer`][ts-importer] which made it possible to generate * A first pass of the facade directly from the Typescipt bindings provided with * `vis-network` (see `Network.d.ts`). * * [ts-importer]: https://github.com/sjrd/scala-js-ts-importer * [visjs]: https://github.com/visjs/vis-network */ lazy val `visnetwork-facade`: Project = project .settings(commonSettings) .enablePlugins(ScalaJSPlugin) .settings( libraryDependencies ++= Seq( "org.scala-js" %%% "scalajs-dom" % scalajsDomV, ), ) lazy val `aws`: Project = project .settings(commonSettings) .settings( libraryDependencies ++= Seq( "com.thatdot" %% "quine-logging" % quineCommonV, "com.thatdot" %% "quine-security" % quineCommonV, "software.amazon.awssdk" % "aws-core" % awsSdkV, "org.scalatest" %% "scalatest" % scalaTestV % Test, ), ) lazy val `data`: Project = project .settings(commonSettings) .settings( libraryDependencies ++= Seq( "com.thatdot" %% "quine-logging" % quineCommonV, "com.thatdot" %% "quine-utils" % quineCommonV, "com.google.protobuf" % "protobuf-java" % protobufV, "io.circe" %% "circe-core" % circeV, "org.apache.avro" % "avro" % avroV, "org.scalatest" %% "scalatest" % scalaTestV % Test, ), ) /** V2 API type definitions shared between server (JVM) and browser (ScalaJS). */ lazy val `quine-endpoints2` = crossProject(JSPlatform, JVMPlatform) .crossType(CrossType.Pure) .in(file("quine-endpoints2")) .settings(commonSettings) .settings( libraryDependencies ++= Seq( "com.softwaremill.sttp.tapir" %%% "tapir-core" % tapirV, "io.circe" %%% "circe-core" % circeV, "io.circe" %%% "circe-generic-extras" % circeGenericExtrasV, ), ) lazy val `api`: Project = project .in(file("api")) .settings(commonSettings) .dependsOn(`quine-serialization`, `quine-endpoints2`.jvm) .settings( libraryDependencies ++= Seq( "com.thatdot" %% "quine-security" % quineCommonV, "com.softwaremill.sttp.tapir" %% "tapir-core" % tapirV, "com.softwaremill.sttp.tapir" %% "tapir-json-circe" % tapirV, "io.circe" %% "circe-core" % circeV, "io.circe" %% "circe-generic-extras" % circeGenericExtrasV, "io.circe" %% "circe-yaml" % circeYamlV, "com.thatdot" %% "quine-security" % quineCommonV, "org.scalatest" %% "scalatest" % scalaTestV % Test, "org.scalatestplus" %% "scalacheck-1-17" % scalaTestScalaCheckV % Test, ), ) lazy val `outputs2`: Project = project .settings(commonSettings) .dependsOn(`aws`, `data`, `quine-core`, `quine-serialization`) .settings( libraryDependencies ++= Seq( "com.thatdot" %% "quine-logging" % quineCommonV, "org.apache.pekko" %% "pekko-actor" % pekkoV, "org.apache.pekko" %% "pekko-stream" % pekkoV, "org.apache.pekko" %% "pekko-http" % pekkoHttpV, "org.apache.pekko" %% "pekko-connectors-kafka" % pekkoKafkaV, "org.apache.pekko" %% "pekko-connectors-kinesis" % pekkoConnectorsV, "org.apache.pekko" %% "pekko-connectors-sns" % pekkoConnectorsV, "software.amazon.awssdk" % "netty-nio-client" % awsSdkV, "com.google.protobuf" % "protobuf-java" % protobufV, "org.scalatest" %% "scalatest" % scalaTestV % Test, "org.scalacheck" %%% "scalacheck" % scalaCheckV % Test, "org.apache.pekko" %% "pekko-http-testkit" % pekkoHttpV % Test, ), ) /** V1 API definitions (that may be used for internal modeling at times) for `quine`-based applications */ lazy val `quine-endpoints` = crossProject(JSPlatform, JVMPlatform) .crossType(CrossType.Pure) .in(file("quine-endpoints")) .settings(commonSettings) .settings( libraryDependencies ++= Seq( "com.thatdot" %%% "quine-security" % quineCommonV, "org.endpoints4s" %%% "json-schema-generic" % endpoints4sDefaultV, "org.endpoints4s" %%% "json-schema-circe" % endpoints4sCirceV, "io.circe" %% "circe-core" % circeV, "org.endpoints4s" %%% "openapi" % endpoints4sOpenapiV, "com.lihaoyi" %% "ujson-circe" % ujsonCirceV, // For the OpenAPI rendering "org.scalacheck" %%% "scalacheck" % scalaCheckV % Test, "org.scalatest" %%% "scalatest" % scalaTestV % Test, "com.softwaremill.sttp.tapir" %% "tapir-core" % tapirV, // For tapir annotations ), ) .jsSettings( // Provides an implementation that allows us to use java.time.Instant in Scala.js libraryDependencies += "io.github.cquiroz" %%% "scala-java-time" % scalaJavaTimeV, ) /** Contains the common (among product needs) converters/conversions between * the independent definitions of API models and internal models. Notably * not versioned because versioning of API and internal models are independent. */ lazy val `model-converters`: Project = project .settings(commonSettings) .dependsOn( `api`, `outputs2`, `quine-endpoints`.jvm, ) // Quine web application lazy val `quine-browser`: Project = project .settings(commonSettings, visNetworkSettings) .dependsOn(`quine-endpoints`.js, `visnetwork-facade`, `quine-endpoints2`.js) .enablePlugins(ScalaJSBundlerPlugin) .settings( libraryDependencies ++= Seq( "org.scala-js" %%% "scalajs-dom" % scalajsDomV, "org.scala-js" %%% "scala-js-macrotask-executor" % scalajsMacroTaskExecutorV, "org.endpoints4s" %%% "xhr-client" % endpoints4sXhrClientV, "io.circe" %%% "circe-generic" % circeV, "io.circe" %%% "circe-parser" % circeV, "com.raquo" %%% "laminar" % laminarV, "com.raquo" %%% "waypoint" % waypointV, ), Compile / npmDevDependencies ++= Seq( // When updating, check whether the minimatch yarn resolution below is still needed "ts-loader" -> "8.0.0", "typescript" -> "4.9.5", "@types/node" -> "16.7.13", // Webpack 5 loaders and polyfills (required by common.webpack.config.js) "style-loader" -> "3.3.4", "css-loader" -> "6.11.0", "buffer" -> "6.0.3", "stream-browserify" -> "3.0.0", "path-browserify" -> "1.0.1", "process" -> "0.11.10", ), Compile / npmDependencies ++= Seq( "es6-shim" -> "0.35.7", "plotly.js" -> s"npm:plotly.js-strict-dist-min@${plotlyV}", // CSP-compliant strict bundle "@stoplight/elements" -> stoplightElementsV, "react" -> reactV, // Peer dependency of @stoplight/elements "react-dom" -> reactV, "mkdirp" -> "1.0.0", "@coreui/coreui" -> coreuiV, "@coreui/icons" -> coreuiIconsV, "@popperjs/core" -> "2.11.8", ), // Force patched dependency versions via yarn resolutions (see NPM Override Versions in Dependencies.scala) Compile / additionalNpmConfig := Map( "resolutions" -> obj( "lodash" -> str(lodashV), "react-router" -> str(reactRouterV), "react-router-dom" -> str(reactRouterV), "@remix-run/router" -> str(remixRunRouterV), "minimatch" -> str(minimatchV), "yaml" -> str(yamlV), "brace-expansion" -> str(braceExpansionV), ), ), webpackNodeArgs := nodeLegacySslIfAvailable, // Scalajs-bundler 0.21.1 updates to webpack 5 but doesn't inform webpack that the scalajs-based file it emits is // an entrypoint -- therefore webpack emits an error saying effectively, "no entrypoint" that we must ignore. // This aggressively ignores all warnings from webpack, which is more than necessary, but trivially works webpackExtraArgs := Seq("--ignore-warnings-message", "/.*/"), fastOptJS / webpackConfigFile := Some(baseDirectory.value / "dev.webpack.config.js"), fastOptJS / webpackDevServerExtraArgs := Seq("--inline", "--hot"), fullOptJS / webpackConfigFile := Some(baseDirectory.value / "prod.webpack.config.js"), Test / webpackConfigFile := Some(baseDirectory.value / "common.webpack.config.js"), test := {}, useYarn := true, yarnExtraArgs := Seq("--frozen-lockfile"), ) // Streaming graph application built on top of the Quine library lazy val `quine`: Project = project .settings(commonSettings) .dependsOn( `quine-core` % "compile->compile;test->test", `quine-cypher` % "compile->compile;test->test", `quine-endpoints`.jvm % "compile->compile;test->test", `data` % "compile->compile;test->test", `api` % "compile->compile;test->test", `model-converters`, `outputs2` % "compile->compile;test->test", `quine-gremlin`, `quine-cassandra-persistor`, `quine-mapdb-persistor`, `quine-rocksdb-persistor`, ) .settings( libraryDependencies ++= Seq( "ch.qos.logback" % "logback-classic" % logbackV, "com.github.davidb" % "metrics-influxdb" % metricsInfluxdbV, "com.github.jnr" % "jnr-posix" % jnrPosixV, "com.github.pjfanning" %% "pekko-http-circe" % pekkoHttpCirceV, "com.github.pureconfig" %% "pureconfig" % pureconfigV, "com.github.scopt" %% "scopt" % scoptV, "com.google.api.grpc" % "proto-google-common-protos" % protobufCommonV, "com.github.ben-manes.caffeine" % "caffeine" % caffeineV, "com.github.blemale" %% "scaffeine" % scaffeineV, "com.google.protobuf" % "protobuf-java" % protobufV, "com.softwaremill.sttp.tapir" %% "tapir-pekko-http-server" % tapirV, "com.softwaremill.sttp.tapir" %% "tapir-openapi-docs" % tapirV, "com.softwaremill.sttp.tapir" %% "tapir-json-circe" % tapirV, "com.softwaremill.sttp.apispec" %% "openapi-circe-yaml" % openApiCirceYamlV exclude ("io.circe", "circe-yaml"), "org.apache.pekko" %% "pekko-http-testkit" % pekkoHttpV % Test, "io.circe" %% "circe-yaml" % circeYamlV, "com.typesafe.scala-logging" %% "scala-logging" % scalaLoggingV, "ch.qos.logback" % "logback-classic" % logbackV, "com.softwaremill.sttp.tapir" %% "tapir-sttp-stub-server" % tapirV % Test, "org.scalatest" %% "scalatest" % scalaTestV % Test, //"commons-io" % "commons-io" % commonsIoV % Test, "io.circe" %% "circe-config" % "0.10.2", "io.circe" %% "circe-generic-extras" % circeGenericExtrasV, "io.circe" %% "circe-yaml-v12" % "0.16.1", "io.circe" %% "circe-core" % circeV, "io.dropwizard.metrics" % "metrics-core" % dropwizardMetricsV, "io.dropwizard.metrics" % "metrics-jmx" % dropwizardMetricsV, "io.dropwizard.metrics" % "metrics-jvm" % dropwizardMetricsV, "org.apache.commons" % "commons-csv" % apacheCommonsCsvV, "org.apache.kafka" % "kafka-clients" % kafkaClientsV, "org.apache.pekko" %% "pekko-connectors-csv" % pekkoConnectorsV, "org.apache.pekko" %% "pekko-connectors-kafka" % pekkoKafkaV, "org.apache.pekko" %% "pekko-connectors-kinesis" % pekkoConnectorsV exclude ("org.rocksdb", "rocksdbjni"), "software.amazon.kinesis" % "amazon-kinesis-client" % amazonKinesisClientV, "org.apache.pekko" %% "pekko-connectors-s3" % pekkoConnectorsV, "org.apache.pekko" %% "pekko-connectors-sns" % pekkoConnectorsV, "org.apache.pekko" %% "pekko-connectors-sqs" % pekkoConnectorsV, "org.apache.pekko" %% "pekko-connectors-sse" % pekkoConnectorsV, "org.apache.pekko" %% "pekko-connectors-text" % pekkoConnectorsV, // pekko-http-xml is not a direct dep, but an older version is pulled in transitively by // pekko-connectors-s3 above. All pekko-http module version numbers need to match exactly, or else it throws // at startup: "java.lang.IllegalStateException: Detected possible incompatible versions on the classpath." "org.apache.pekko" %% "pekko-http-xml" % pekkoHttpV, "org.apache.pekko" %% "pekko-stream-testkit" % pekkoV % Test, "org.endpoints4s" %% "pekko-http-server" % endpoints4sHttpServerV, "org.scalatest" %% "scalatest" % scalaTestV % Test, "org.scalatestplus" %% "scalacheck-1-17" % scalaTestScalaCheckV % Test, // WebJars (javascript dependencies masquerading as JARs) "org.webjars" % "ionicons" % ioniconsV, "org.webjars" % "jquery" % jqueryV, "org.webjars" % "webjars-locator" % webjarsLocatorV, "org.webjars.npm" % "sugar-date" % sugarV, "org.apache.avro" % "avro" % avroV, // AWS SDK deps (next 4) effectively bundle sibling JARs needed for certain features, despite no code references "software.amazon.awssdk" % "sso" % awsSdkV, "software.amazon.awssdk" % "ssooidc" % awsSdkV, "software.amazon.awssdk" % "sts" % awsSdkV, "software.amazon.awssdk" % "aws-query-protocol" % awsSdkV, ), // Add JVM options for tests to allow reflection access to java.util (needed for env var manipulation in tests) Test / javaOptions += "--add-opens=java.base/java.util=ALL-UNNAMED", Test / fork := true, ) .enablePlugins(WebScalaJSBundlerPlugin) .settings( scalaJSProjects := Seq(`quine-browser`), Assets / pipelineStages := Seq(scalaJSPipeline), ) .enablePlugins(BuildInfoPlugin, Packaging, Docker, Ecr) .settings( startupMessage := "", buildInfoKeys := Seq[BuildInfoKey](version, startupMessage), buildInfoPackage := "com.thatdot.quine.app", ) lazy val `quine-docs`: Project = { val docJsonV1 = Def.setting((Compile / sourceManaged).value / "reference" / "openapi-v1.json") val docJsonV2 = Def.setting((Compile / sourceManaged).value / "reference" / "openapi-v2.json") val cypherTable1 = Def.setting((Compile / sourceManaged).value / "reference" / "cypher-builtin-functions.md") val cypherTable2 = Def.setting((Compile / sourceManaged).value / "reference" / "cypher-user-defined-functions.md") val cypherTable3 = Def.setting((Compile / sourceManaged).value / "reference" / "cypher-user-defined-procedures.md") val generateDocs = TaskKey[Unit]("generateDocs", "Generate documentation tables for the Quine (Mkdocs) project") Project("quine-docs", file("quine-docs")) .dependsOn(`quine`) .settings(commonSettings) .settings( generateDocs := Def .sequential( Def.taskDyn { (Compile / runMain) .toTask( List( " com.thatdot.quine.docs.GenerateCypherTables", cypherTable1.value.getAbsolutePath, cypherTable2.value.getAbsolutePath, cypherTable3.value.getAbsolutePath, ).mkString(" "), ) }, Def.taskDyn { (Compile / runMain) .toTask(s" com.thatdot.quine.docs.GenerateOpenApi ${docJsonV1.value.getAbsolutePath}") }, Def.taskDyn { (Compile / runMain) .toTask(s" com.thatdot.quine.docs.GenerateOpenApiV2 ${docJsonV2.value.getAbsolutePath}") }, ) .value, ) .settings( libraryDependencies ++= Seq( "org.pegdown" % "pegdown" % pegdownV, "org.parboiled" % "parboiled-java" % parboiledV, "org.scalatest" %% "scalatest" % scalaTestV % Test, ), ) } // Spurious warnings Global / excludeLintKeys += `quine-browser` / webpackNodeArgs Global / excludeLintKeys += `quine-browser` / webpackExtraArgs ================================================ FILE: data/src/main/scala/com/thatdot/data/DataFoldableFrom.scala ================================================ package com.thatdot.data import scala.collection.{SeqView, View, mutable} import scala.jdk.CollectionConverters._ import scala.reflect.ClassTag import scala.util.Try import org.apache.pekko.util import com.google.protobuf.Descriptors.EnumValueDescriptor import com.google.protobuf.Descriptors.FieldDescriptor.JavaType import com.google.protobuf.{ByteString, Descriptors, DynamicMessage} import io.circe.{Json, JsonNumber, JsonObject} import org.apache.avro.generic.{GenericArray, GenericEnumSymbol, GenericFixed, GenericRecord} import com.thatdot.common.logging.Log._ trait DataFoldableFrom[A] extends LazySafeLogging { def fold[B](value: A, folder: DataFolderTo[B]): B def fold[B, Frame](t: (() => Try[A], Frame), folder: DataFolderTo[B]): (Try[B], Frame) = (t._1().map(a => fold(a, folder)), t._2) def to[B: DataFolderTo: ClassTag]: A => B = { case b: B => b case a => fold(a, DataFolderTo[B]) } } object DataFoldableFrom { def apply[A](implicit df: DataFoldableFrom[A]): DataFoldableFrom[A] = df def contramap[A: DataFoldableFrom, B](f: B => A): DataFoldableFrom[B] = new DataFoldableFrom[B] { override def fold[C](value: B, folder: DataFolderTo[C]): C = DataFoldableFrom[A].fold(f(value), folder) } implicit final class Ops[A](private val self: DataFoldableFrom[A]) extends AnyVal { def contramap[B](f: B => A): DataFoldableFrom[B] = DataFoldableFrom.contramap(f)(self) } implicit val jsonDataFoldable: DataFoldableFrom[Json] = new DataFoldableFrom[Json] { def fold[B](value: Json, folder: DataFolderTo[B]): B = value.foldWith(new Json.Folder[B] { def onNull: B = folder.nullValue def onBoolean(value: Boolean): B = if (value) folder.trueValue else folder.falseValue def onNumber(value: JsonNumber): B = value.toLong.fold(folder.floating(value.toDouble))(l => folder.integer(l)) def onString(value: String): B = folder.string(value) def onArray(value: Vector[Json]): B = { val builder = folder.vectorBuilder() value.foreach(j => builder.add(fold[B](j, folder))) builder.finish() } def onObject(value: JsonObject): B = { val builder = folder.mapBuilder() value.toIterable.foreach { case (k, v) => builder.add(k, fold[B](v, folder)) } builder.finish() } }) } implicit val byteStringDataFoldable: DataFoldableFrom[util.ByteString] = new DataFoldableFrom[util.ByteString] { def fold[B](value: util.ByteString, folder: DataFolderTo[B]): B = folder.bytes(value.toArrayUnsafe()) } implicit val bytesDataFoldable: DataFoldableFrom[Array[Byte]] = new DataFoldableFrom[Array[Byte]] { def fold[B](value: Array[Byte], folder: DataFolderTo[B]): B = folder.bytes(value) } implicit val stringDataFoldable: DataFoldableFrom[String] = new DataFoldableFrom[String] { def fold[B](value: String, folder: DataFolderTo[B]): B = folder.string(value) } implicit val stringIterableDataFoldable: DataFoldableFrom[Iterable[String]] = new DataFoldableFrom[Iterable[String]] { override def fold[B](value: Iterable[String], folder: DataFolderTo[B]): B = { val builder = folder.vectorBuilder() value.foreach(v => builder.add(folder.string(v))) builder.finish() } } implicit val stringVectorDataFoldable: DataFoldableFrom[Vector[String]] = new DataFoldableFrom[Vector[String]] { override def fold[B](value: Vector[String], folder: DataFolderTo[B]): B = { val builder = folder.vectorBuilder() value.foreach(v => builder.add(folder.string(v))) builder.finish() } } implicit val stringMapDataFoldable: DataFoldableFrom[Map[String, String]] = new DataFoldableFrom[Map[String, String]] { override def fold[B](value: Map[String, String], folder: DataFolderTo[B]): B = { val builder = folder.mapBuilder() value.foreach { case (name, value) => builder.add(name, folder.string(value)) } builder.finish() } } implicit val protobufDataFoldable: DataFoldableFrom[DynamicMessage] = new DataFoldableFrom[DynamicMessage] { import com.google.protobuf.Descriptors.FieldDescriptor.JavaType._ private def fieldToValue[B](javaType: JavaType, value: AnyRef, folder: DataFolderTo[B]): B = javaType match { case STRING => folder.string(value.asInstanceOf[String]) case INT | LONG => folder.integer(value.asInstanceOf[java.lang.Number].longValue) case FLOAT | DOUBLE => folder.floating(value.asInstanceOf[java.lang.Number].doubleValue) case BOOLEAN => val bool = value.asInstanceOf[java.lang.Boolean] if (bool) folder.trueValue else folder.falseValue case BYTE_STRING => folder.bytes(value.asInstanceOf[ByteString].toByteArray) case ENUM => folder.string(value.asInstanceOf[EnumValueDescriptor].getName) case MESSAGE => fold(value.asInstanceOf[DynamicMessage], folder) } override def fold[B](message: DynamicMessage, folder: DataFolderTo[B]): B = { val descriptor: Descriptors.Descriptor = message.getDescriptorForType val oneOfs: SeqView[Descriptors.OneofDescriptor] = descriptor.getOneofs.asScala.view // optionals are modeled as (synthetic) oneOfs of a single field. // Kind of annoying finding a replacement for isSynthetic: https://github.com/googleapis/sdk-platform-java/pull/2764 val (optionals, realOneOfs) = oneOfs.partition { oneof => // `getRealContainingOneof` call ends up being `null` if the `oneof` is synthetic, // with a use of `isSynthetic` in its implementation. // There might be a case where a user really has a `oneof` with a single optional // field, so I did not use isOptional here. oneof.getField(0).getRealContainingOneof == null } // synthetic oneOfs (optionals) just have the one field val setOptionals: View[Descriptors.FieldDescriptor] = optionals.map(_.getField(0)).filter(message.hasField) // Find which field in each oneOf is set val oneOfFields: View[Descriptors.FieldDescriptor] = realOneOfs.flatMap(_.getFields.asScala.find(message.hasField)) val regularFields = descriptor.getFields.asScala.view diff oneOfs.flatMap(_.getFields.asScala).toVector val mapBuilder: DataFolderTo.MapBuilder[B] = folder.mapBuilder() (setOptionals ++ oneOfFields ++ regularFields).foreach { field => val b: B = { if (field.isRepeated) { if (field.isMapField) { val localMapBuilder = folder.mapBuilder() message .getField(field) .asInstanceOf[java.util.List[DynamicMessage]] .asScala .foreach { mapEntry => /* mapEntry.getDescriptorForType is a type described as: message MapFieldEntry { key_type key = 1; value_type value = 2; } We already know what fields it contains. */ val buffer: mutable.Buffer[Descriptors.FieldDescriptor] = mapEntry.getDescriptorForType.getFields.asScala assert(buffer.length == 2) val k = buffer.head val v = buffer.tail.head assert(k.getName == "key") assert(v.getName == "value") val maybeKey = k.getJavaType match { // According to Protobuf docs, "the key_type can be any integral or string type" // https://developers.google.com/protocol-buffers/docs/proto3#maps case STRING => Some(mapEntry.getField(k).asInstanceOf[String]) case INT | LONG | BOOLEAN => Some(mapEntry.getField(k).toString) case other => logger.warn( safe"Cannot process the key ${Safe(other.toString)}. Protobuf can only accept keys of type String, Boolean, Integer. This map key will be ignored.", ) None } maybeKey.map(key => localMapBuilder.add(key, fieldToValue(v.getJavaType, mapEntry.getField(v), folder)), ) } localMapBuilder.finish() } else { val vecBuilder = folder.vectorBuilder() message .getField(field) .asInstanceOf[java.util.List[AnyRef]] .asScala .map(f => fieldToValue(field.getJavaType, f, folder)) .foreach(vecBuilder.add) vecBuilder.finish() } } else { fieldToValue(field.getJavaType, message.getField(field), folder) } } mapBuilder.add(field.getName, b) } mapBuilder.finish() } } implicit val avroDataFoldable: DataFoldableFrom[GenericRecord] = new DataFoldableFrom[GenericRecord] { private def foldMapLike[B](kv: Iterable[(String, Any)], folder: DataFolderTo[B]): B = { val mapBuilder = folder.mapBuilder() kv.foreach { case (k, v) => mapBuilder.add(k, foldField(v, folder)) } mapBuilder.finish() } // All of the underlying types for avro were taken from here: https://stackoverflow.com/questions/34070028/get-a-typed-value-from-an-avro-genericrecord/34234039#34234039 private def foldField[B](field: Any, folder: DataFolderTo[B]): B = field match { case b: java.lang.Boolean if b => folder.trueValue case b: java.lang.Boolean if !b => folder.falseValue case i: java.lang.Integer => folder.integer(i.longValue) case i: java.lang.Long => folder.integer(i) case f: java.lang.Float => folder.floating(f.doubleValue) case d: java.lang.Double => folder.floating(d) case bytes: java.nio.ByteBuffer => folder.bytes(bytes.array) case str: CharSequence => folder.string(str.toString) case record: GenericRecord => foldMapLike( record.getSchema.getFields.asScala.collect { case k if record.hasField(k.name) => (k.name, record.get(k.name)) }, folder, ) case map: java.util.Map[_, _] => foldMapLike(map.asScala.map { case (k, v) => (k.toString, v) }, folder) case symbol: GenericEnumSymbol[_] => folder.string(symbol.toString) case array: GenericArray[_] => val vector = folder.vectorBuilder() array.forEach(elem => vector.add(foldField(elem, folder))) vector.finish() case fixed: GenericFixed => folder.bytes(fixed.bytes) case n if n == null => folder.nullValue case other => throw new IllegalArgumentException( s"Got an unexpected value: ${other} of type: ${other.getClass.getName} from avro. This shouldn't happen...", ) } override def fold[B](record: GenericRecord, folder: DataFolderTo[B]): B = foldField(record, folder) } } ================================================ FILE: data/src/main/scala/com/thatdot/data/DataFolderTo.scala ================================================ package com.thatdot.data import java.time._ import java.time.format.DateTimeFormatter import scala.collection.immutable.SortedMap import io.circe.Json import com.thatdot.common.util.ByteConversions trait DataFolderTo[A] { def nullValue: A def trueValue: A def falseValue: A def integer(l: Long): A def string(s: String): A def bytes(b: Array[Byte]): A def floating(d: Double): A def date(d: LocalDate): A def time(t: OffsetTime): A def localTime(t: LocalTime): A def localDateTime(ldt: LocalDateTime): A def zonedDateTime(zdt: ZonedDateTime): A def duration(d: Duration): A def vectorBuilder(): DataFolderTo.CollectionBuilder[A] def mapBuilder(): DataFolderTo.MapBuilder[A] } object DataFolderTo { trait CollectionBuilder[A] { def add(a: A): Unit def finish(): A } trait MapBuilder[A] { def add(key: String, value: A): Unit def finish(): A } def apply[A](implicit df: DataFolderTo[A]): DataFolderTo[A] = df implicit val jsonFolder: DataFolderTo[Json] = new DataFolderTo[Json] { def nullValue: Json = Json.Null def trueValue: Json = Json.True def falseValue: Json = Json.False def integer(i: Long): Json = Json.fromLong(i) def string(s: String): Json = Json.fromString(s) def bytes(b: Array[Byte]): Json = Json.fromString(ByteConversions.formatHexBinary(b)) def floating(f: Double): Json = Json.fromDoubleOrString(f) def date(d: LocalDate): Json = Json.fromString(d.format(DateTimeFormatter.ISO_LOCAL_DATE)) def time(t: OffsetTime): Json = Json.fromString(t.format(DateTimeFormatter.ISO_OFFSET_TIME)) def localTime(t: LocalTime): Json = Json.fromString(t.format(DateTimeFormatter.ISO_LOCAL_TIME)) def localDateTime(ldt: LocalDateTime): Json = Json.fromString(ldt.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)) def zonedDateTime(zdt: ZonedDateTime): Json = Json.fromString(zdt.format(DateTimeFormatter.ISO_ZONED_DATE_TIME)) def duration(d: Duration): Json = Json.fromString(d.toString) def vectorBuilder(): CollectionBuilder[Json] = new CollectionBuilder[Json] { private val elements = Vector.newBuilder[Json] def add(a: Json): Unit = elements += a def finish(): Json = Json.fromValues(elements.result()) } def mapBuilder(): MapBuilder[Json] = new MapBuilder[Json] { private val fields = Seq.newBuilder[(String, Json)] def add(key: String, value: Json): Unit = fields += (key -> value) def finish(): Json = Json.fromFields(fields.result()) } } val anyFolder: DataFolderTo[Any] = new DataFolderTo[Any] { override def nullValue: Any = null override def trueValue: Any = true override def falseValue: Any = false override def integer(l: Long): Any = l override def string(s: String): Any = s override def bytes(b: Array[Byte]): Any = b override def floating(d: Double): Any = d override def date(d: LocalDate): Any = d override def time(t: OffsetTime): Any = t override def localTime(t: LocalTime): Any = t override def localDateTime(ldt: LocalDateTime): Any = ldt override def zonedDateTime(zdt: ZonedDateTime): Any = zdt override def duration(d: Duration): Any = d override def vectorBuilder(): DataFolderTo.CollectionBuilder[Any] = new DataFolderTo.CollectionBuilder[Any] { private val elements = Vector.newBuilder[Any] def add(a: Any): Unit = elements += a def finish(): Any = elements.result() } def mapBuilder(): DataFolderTo.MapBuilder[Any] = new DataFolderTo.MapBuilder[Any] { private val kvs = SortedMap.newBuilder[String, Any] def add(key: String, value: Any): Unit = kvs += (key -> value) def finish(): Any = kvs.result() } } } ================================================ FILE: data/src/test/scala/com/thatdot/data/AvroDecoderTest.scala ================================================ package com.thatdot.data import java.nio.ByteBuffer import java.nio.charset.StandardCharsets import scala.collection.immutable.{SortedMap, TreeMap} import scala.jdk.CollectionConverters._ import org.apache.avro.Schema import org.apache.avro.generic.GenericData import org.scalatest.funspec.AnyFunSpec import org.scalatest.matchers.should.Matchers class AvroDecoderTest extends AnyFunSpec with Matchers { def canonicalize(v: Any): Any = v match { case b: Array[_] => b.toVector case m: Map[_, _] => m.view.mapValues(canonicalize).toMap case m: java.util.Map[_, _] => m.asScala.view.mapValues(canonicalize).toMap case bytes: ByteBuffer => bytes.array().toVector case _ => v } it("Avro - simple types") { val schema1 = new Schema.Parser().parse(""" |{ | "type": "record", | "name": "testRecord", | "fields": [ | {"name": "astring", "type": "string"}, | {"name": "anull", "type": "null"}, | {"name": "abool", "type": "boolean"}, | {"name": "aint", "type": "int"}, | {"name": "along", "type": "long"}, | {"name": "afloat", "type": "float"}, | {"name": "adouble", "type": "double"}, | {"name": "abytes", "type": "bytes"} | ] |} |""".stripMargin) val record1: GenericData.Record = new GenericData.Record(schema1) val fieldVals = SortedMap[String, Any]( ("astring" -> "string1"), ("anull" -> null), ("abool" -> true), ("aint" -> 100), ("along" -> Long.MaxValue), ("afloat" -> 101F), ("adouble" -> Double.MaxValue), ("abytes" -> ByteBuffer.wrap("some bytes".getBytes(StandardCharsets.UTF_8))), ) fieldVals.foreach { case (s, v) => record1.put(s, v) } val result = DataFoldableFrom.avroDataFoldable.fold(record1, DataFolderTo.anyFolder).asInstanceOf[TreeMap[Any, Any]] assert(canonicalize(result) == canonicalize(fieldVals)) } it("Avro - record of records") { val schema1 = new Schema.Parser().parse(""" |{ | "name": "multi", | "type": "record", | "fields": [ | { | "name": "left", | "type": { | "name": "leftT", | "type": "record", | "fields": [ {"name": "leftA", "type": "string"}, {"name": "leftB", "type": "int"} ] | } | }, | { | "name": "right", | "type": { | "name": "rightT", | "type": "record", | "fields": [ {"name": "rightA", "type": "boolean"}, {"name": "rightB", "type": "string"} ] | } | } | ] |} |""".stripMargin) val left: GenericData.Record = new GenericData.Record(schema1.getField("left").schema()) left.put("leftA", "a string") left.put("leftB", 101) val right: GenericData.Record = new GenericData.Record(schema1.getField("right").schema) right.put("rightA", false) right.put("rightB", "another string") val record: GenericData.Record = new GenericData.Record(schema1) record.put("left", left) record.put("right", right) val result = DataFoldableFrom.avroDataFoldable.fold(record, DataFolderTo.anyFolder) assert( result == TreeMap[String, TreeMap[String, Any]]( ("left" -> TreeMap[String, Any](("leftA" -> "a string"), ("leftB" -> 101))), ("right" -> TreeMap[String, Any](("rightA" -> false), ("rightB" -> "another string"))), ), ) } it("Avro - array of maps") { val schema1 = new Schema.Parser().parse(""" | { | "name": "ArrayOfMaps", | "type": "record", | "fields": [{ | "name": "alist", | "type": { | "type": "array", | "items": { | "type": "map", | "values": "long" | } | } | }] | } |""".stripMargin) val record: GenericData.Record = new GenericData.Record(schema1) val maps: List[java.util.Map[String, Long]] = List( Map(("k1a" -> 101L), ("k1b" -> 102L)).asJava, Map(("k2a" -> 102L), ("k2b" -> 103L)).asJava, ) record.put( "alist", new GenericData.Array[java.util.Map[String, Long]](schema1.getField("alist").schema(), maps.asJava), ) val result = DataFoldableFrom.avroDataFoldable.fold(record, DataFolderTo.anyFolder) assert( canonicalize(result) == Map( ("alist" -> List( Map(("k1a" -> 101), ("k1b" -> 102)), Map(("k2a" -> 102), ("k2b" -> 103)), )), ), ) } } ================================================ FILE: data/src/test/scala/com/thatdot/data/DataFoldableFromSpec.scala ================================================ package com.thatdot.data import io.circe.Json import org.scalatest.funspec.AnyFunSpec import org.scalatest.matchers.should.Matchers class DataFoldableFromSpec extends AnyFunSpec with Matchers { describe("Chained foldables of the same type work") { it("works even if types are repeated") { val jsonValue = DataFoldableFrom.stringDataFoldable.fold("ABC", DataFolderTo.jsonFolder) jsonValue shouldBe Json.fromString("ABC") val jsonValue2 = DataFoldableFrom.jsonDataFoldable.fold(jsonValue, DataFolderTo.jsonFolder) jsonValue2 shouldEqual jsonValue } } } ================================================ FILE: data/src/test/scala/com/thatdot/data/DataFolderToSpec.scala ================================================ package com.thatdot.data import org.scalatest.funspec.AnyFunSpec import org.scalatest.matchers.should.Matchers import com.thatdot.data.DataFoldableFrom._ class DataFolderToSpec extends AnyFunSpec with Matchers { private def nullSafeToString(x: Any) = s"$x" describe("DataFolderTo") { it("preserves map values across a fold") { val testDataStringified: Map[String, String] = FoldableTestData().asMap.view.mapValues(nullSafeToString).to(Map) val v = stringMapDataFoldable.fold(testDataStringified, DataFolderTo.anyFolder) v shouldBe testDataStringified } it("preserves vector values across a fold") { val testDataStringified = FoldableTestData().asVector.map(nullSafeToString) val v = stringIterableDataFoldable.fold(testDataStringified, DataFolderTo.anyFolder) v shouldBe testDataStringified } } } ================================================ FILE: data/src/test/scala/com/thatdot/data/FoldableTestData.scala ================================================ package com.thatdot.data import java.time.{Duration => JavaDuration, LocalDate, LocalDateTime, LocalTime, OffsetTime, ZonedDateTime} import scala.util.Random /** Object including all types that are covered by [[DataFoldableFrom]] */ case class FoldableTestData( nullValue: Null = null, trueValue: Boolean = true, falseValue: Boolean = false, integerValue: Integer = Random.nextInt(), stringValue: String = Random.nextString(Random.nextInt(10)), bytesValue: Array[Byte] = Random.nextBytes(10), floatingValue: Double = Random.nextDouble(), dateValue: LocalDate = LocalDate.now(), timeValue: OffsetTime = OffsetTime.now(), localTimeValue: LocalTime = LocalTime.now(), localDateTimeValue: LocalDateTime = LocalDateTime.now(), zonedDateTimeValue: ZonedDateTime = ZonedDateTime.now(), durationValue: JavaDuration = JavaDuration.ofNanos(Random.between(0L, Long.MaxValue)), mapValue: Map[String, Any] = Map.empty[String, Any], vectorValue: Vector[Any] = Vector.empty[Any], ) { def asMap: Map[String, Any] = 0.until(productArity).map(i => productElementName(i) -> productElement(i)).toMap def asVector: Vector[Any] = 0.until(productArity).map(i => productElement(i)).toVector def foldTo[B](implicit dataFolder: DataFolderTo[B]): B = { val mapBuilder = dataFolder.mapBuilder() asMap.foreach { case (k, v) => mapBuilder.add(k, FoldableTestData.fromAnyDataFoldable.fold(v, dataFolder)) } mapBuilder.finish() } } object FoldableTestData { val fromAnyDataFoldable: DataFoldableFrom[Any] = new DataFoldableFrom[Any] { override def fold[B](value: Any, folder: DataFolderTo[B]): B = value match { case null => folder.nullValue case true => folder.trueValue case false => folder.falseValue case s: String => folder.string(s) case b: Array[Byte] => folder.bytes(b) case i: Int => folder.integer(i.longValue()) case l: Long => folder.integer(l) case d: Number => folder.floating(d.doubleValue()) case ld: LocalDate => folder.date(ld) case ldt: LocalDateTime => folder.localDateTime(ldt) case t: OffsetTime => folder.time(t) case lt: LocalTime => folder.localTime(lt) case zdt: ZonedDateTime => folder.zonedDateTime(zdt) case dur: JavaDuration => folder.duration(dur) case m: Map[_, _] => val b = folder.mapBuilder() m.foreach { case (key, value) => b.add(key.toString, fold(value, folder)) } b.finish() case c: Iterable[Any] => val b = folder.vectorBuilder() c.foreach(v => b.add(fold(v, folder))) b.finish() case other => throw new UnsupportedOperationException(s" Value $other of type ${other.getClass} is not handled") } } } ================================================ FILE: model-converters/src/main/scala/com/thatdot/convert/Api2ToAws.scala ================================================ package com.thatdot.convert import com.thatdot.{api, aws} /** Conversions from values in the API2 model to the corresponding values in the internal AWS model. */ object Api2ToAws { def apply(c: api.v2.AwsCredentials): aws.model.AwsCredentials = aws.model.AwsCredentials(c.accessKeyId, c.secretAccessKey) def apply(r: api.v2.AwsRegion): aws.model.AwsRegion = aws.model.AwsRegion(r.region) } ================================================ FILE: model-converters/src/main/scala/com/thatdot/convert/Api2ToModel1.scala ================================================ package com.thatdot.convert import com.thatdot.api import com.thatdot.quine.{routes => V1} object Api2ToModel1 { def apply(rates: api.v2.RatesSummary): V1.RatesSummary = V1.RatesSummary( count = rates.count, oneMinute = rates.oneMinute, fiveMinute = rates.fiveMinute, fifteenMinute = rates.fifteenMinute, overall = rates.overall, ) def apply(c: api.v2.AwsCredentials): V1.AwsCredentials = V1.AwsCredentials( accessKeyId = c.accessKeyId, secretAccessKey = c.secretAccessKey, ) def apply(r: api.v2.AwsRegion): V1.AwsRegion = V1.AwsRegion(r.region) } ================================================ FILE: model-converters/src/main/scala/com/thatdot/convert/Api2ToOutputs2.scala ================================================ package com.thatdot.convert import scala.concurrent.{ExecutionContext, Future} import org.apache.pekko.actor.ActorSystem import com.thatdot.quine.graph.BaseGraph import com.thatdot.quine.serialization.ProtobufSchemaCache import com.thatdot.quine.util.StringInput import com.thatdot.{api, outputs2} /** Conversions from API models in [[api.v2.outputs]] to internal models in [[outputs2]]. */ object Api2ToOutputs2 { def apply(config: api.v2.SaslJaasConfig): outputs2.SaslJaasConfig = config match { case api.v2.PlainLogin(username, password) => outputs2.PlainLogin(username, password) case api.v2.ScramLogin(username, password) => outputs2.ScramLogin(username, password) case api.v2.OAuthBearerLogin(clientId, clientSecret, scope, tokenEndpointUrl) => outputs2.OAuthBearerLogin(clientId, clientSecret, scope, tokenEndpointUrl) } def apply( format: api.v2.outputs.OutputFormat, )(implicit protobufSchemaCache: ProtobufSchemaCache, ec: ExecutionContext): Future[outputs2.OutputEncoder] = format match { case api.v2.outputs.OutputFormat.JSON => Future.successful(outputs2.OutputEncoder.JSON()) case api.v2.outputs.OutputFormat.Protobuf(schemaUrl, typeName) => protobufSchemaCache .getMessageDescriptor(StringInput.filenameOrUrl(schemaUrl), typeName, flushOnFail = true) .map(desc => outputs2.OutputEncoder.Protobuf(schemaUrl, typeName, desc)) } def apply( destinationSteps: api.v2.outputs.DestinationSteps, )(implicit graph: BaseGraph, ec: ExecutionContext, protobufSchemaCache: ProtobufSchemaCache, ): Future[outputs2.FoldableDestinationSteps] = { implicit val system: ActorSystem = graph.system destinationSteps match { case api.v2.outputs.DestinationSteps.Drop() => Future.successful( outputs2.FoldableDestinationSteps.WithAny( destination = outputs2.destination.Drop, ), ) case api.v2.outputs.DestinationSteps.File(path) => Future.successful( outputs2.FoldableDestinationSteps.WithByteEncoding( // Update this when non-JSON outputs are supported for File formatAndEncode = outputs2.OutputEncoder.JSON(), destination = outputs2.destination.File( path = path, ), ), ) case api.v2.outputs.DestinationSteps.HttpEndpoint(url, parallelism, headers) => Future.successful( outputs2.FoldableDestinationSteps.WithDataFoldable( destination = outputs2.destination.HttpEndpoint( url = url, parallelism = parallelism, headers = headers, ), ), ) case api.v2.outputs.DestinationSteps.Kafka( topic, bootstrapServers, format, sslKeystorePassword, sslTruststorePassword, sslKeyPassword, saslJaasConfig, kafkaProperties, ) => apply(format).map(enc => outputs2.FoldableDestinationSteps.WithByteEncoding( formatAndEncode = enc, destination = outputs2.destination.Kafka( topic = topic, bootstrapServers = bootstrapServers, sslKeystorePassword = sslKeystorePassword, sslTruststorePassword = sslTruststorePassword, sslKeyPassword = sslKeyPassword, saslJaasConfig = saslJaasConfig.map(apply), kafkaProperties = kafkaProperties.view.mapValues(_.s).toMap, ), ), ) case api.v2.outputs.DestinationSteps.Kinesis( credentials, region, streamName, format, kinesisParallelism, kinesisMaxBatchSize, kinesisMaxRecordsPerSecond, kinesisMaxBytesPerSecond, ) => apply(format).map(enc => outputs2.FoldableDestinationSteps.WithByteEncoding( formatAndEncode = enc, destination = outputs2.destination.Kinesis( credentials = credentials.map(Api2ToAws.apply), region = region.map(Api2ToAws.apply), streamName = streamName, kinesisParallelism = kinesisParallelism, kinesisMaxBatchSize = kinesisMaxBatchSize, kinesisMaxRecordsPerSecond = kinesisMaxRecordsPerSecond, kinesisMaxBytesPerSecond = kinesisMaxBytesPerSecond, ), ), ) case api.v2.outputs.DestinationSteps.ReactiveStream(address, port, format) => apply(format).map(enc => outputs2.FoldableDestinationSteps.WithByteEncoding( formatAndEncode = enc, destination = outputs2.destination.ReactiveStream( address = address, port = port, ), ), ) case api.v2.outputs.DestinationSteps.SNS(credentials, region, topic, format) => apply(format).map(enc => outputs2.FoldableDestinationSteps.WithByteEncoding( formatAndEncode = enc, destination = outputs2.destination.SNS( credentials = credentials.map(Api2ToAws.apply), region = region.map(Api2ToAws.apply), topic = topic, ), ), ) case api.v2.outputs.DestinationSteps.StandardOut() => Future.successful( outputs2.FoldableDestinationSteps.WithByteEncoding( // Update this when non-JSON outputs are supported for StandardOut formatAndEncode = outputs2.OutputEncoder.JSON(), destination = outputs2.destination.StandardOut, ), ) } } } ================================================ FILE: model-converters/src/main/scala/com/thatdot/convert/Model1ToApi2.scala ================================================ package com.thatdot.convert import com.thatdot.api import com.thatdot.quine.{routes => V1} object Model1ToApi2 { def apply(rates: V1.RatesSummary): api.v2.RatesSummary = api.v2.RatesSummary( count = rates.count, oneMinute = rates.oneMinute, fiveMinute = rates.fiveMinute, fifteenMinute = rates.fifteenMinute, overall = rates.overall, ) def apply(c: V1.AwsCredentials): api.v2.AwsCredentials = api.v2.AwsCredentials( accessKeyId = c.accessKeyId, secretAccessKey = c.secretAccessKey, ) def apply(r: V1.AwsRegion): api.v2.AwsRegion = api.v2.AwsRegion(r.region) } ================================================ FILE: outputs2/src/main/scala/com/thatdot/outputs2/DestinationSteps.scala ================================================ package com.thatdot.outputs2 import org.apache.pekko.NotUsed import org.apache.pekko.stream.scaladsl.{Flow, Sink} import com.thatdot.common.logging.Log.LogConfig import com.thatdot.data.DataFoldableFrom import com.thatdot.quine.graph.NamespaceId /** The steps that are executed to ultimately write a result to a destination * * Sub abstractions are: * - For foldable destination steps [[FoldableDestinationSteps]] * - For non-foldable destination steps [[NonFoldableDestinationSteps]] */ sealed trait DestinationSteps { // TODO def post-enrichment transform // def transform: Option[Core.PostEnrichmentTransform] def destination: ResultDestination } sealed trait FoldableDestinationSteps extends DestinationSteps with DataFoldableSink { def sink[In: DataFoldableFrom](outputName: String, namespaceId: NamespaceId)(implicit logConfig: LogConfig, ): Sink[In, NotUsed] } sealed trait NonFoldableDestinationSteps extends DestinationSteps with DataNonFoldableSink object NonFoldableDestinationSteps { case class WithRawBytes( destination: ResultDestination.Bytes, ) extends NonFoldableDestinationSteps { def sink[In: BytesOutputEncoder](outputName: String, namespaceId: NamespaceId)(implicit logConfig: LogConfig, ): Sink[In, NotUsed] = destination.sink(outputName, namespaceId).contramap[In](implicitly[BytesOutputEncoder[In]].bytes) } } object FoldableDestinationSteps { case class WithByteEncoding( formatAndEncode: OutputEncoder, destination: ResultDestination.Bytes, ) extends FoldableDestinationSteps { override def sink[In: DataFoldableFrom](outputName: String, namespaceId: NamespaceId)(implicit logConfig: LogConfig, ): Sink[In, NotUsed] = { val inToRepr = DataFoldableFrom[In].to(formatAndEncode.folderTo, formatAndEncode.reprTag) val inToBytes = inToRepr.andThen(formatAndEncode.bytes) Flow.fromFunction(inToBytes).to(destination.sink(outputName, namespaceId)) } } case class WithDataFoldable(destination: ResultDestination.FoldableData) extends FoldableDestinationSteps { override def sink[In: DataFoldableFrom](outputName: String, namespaceId: NamespaceId)(implicit logConfig: LogConfig, ): Sink[In, NotUsed] = destination.sink(outputName, namespaceId) } case class WithAny(destination: ResultDestination.AnyData) extends FoldableDestinationSteps { override def sink[In: DataFoldableFrom](outputName: String, namespaceId: NamespaceId)(implicit logConfig: LogConfig, ): Sink[In, NotUsed] = destination.sink(outputName, namespaceId) } } ================================================ FILE: outputs2/src/main/scala/com/thatdot/outputs2/OutputEncoder.scala ================================================ package com.thatdot.outputs2 import java.nio.ByteBuffer import java.nio.charset.{Charset, StandardCharsets} import scala.reflect.ClassTag import com.google.protobuf.Descriptors.Descriptor import com.thatdot.data.DataFolderTo import com.thatdot.quine.model.QuineValue import com.thatdot.quine.serialization.QuineValueToProtobuf import com.thatdot.quine.serialization.data.QuineSerializationFoldersTo sealed trait OutputEncoder { type Repr val reprTag: ClassTag[Repr] def folderTo: DataFolderTo[Repr] def bytes(value: Repr): Array[Byte] } sealed trait BytesOutputEncoder[Repr] { def bytes(value: Repr): Array[Byte] } object BytesOutputEncoder { def apply[A](f: A => Array[Byte]): BytesOutputEncoder[A] = new BytesOutputEncoder[A] { override def bytes(value: A): Array[Byte] = f(value) } } object OutputEncoder { /** A JSON encoder for a [[charset]] that yields a byte array of a JSON value with a new line character appended. * * *NOTE* We do not currently allow the [[charset]] to be set via the API, but when we do, we will need * to adapt [[com.thatdot.model.v2.outputs.ResultDestination.Bytes.File]] to also accommodate the `charset` * (right now, it assumes UTF_8, since that's the default here)! * * @param charset the character set to use in encoding the [[io.circe.Json]] value to {{{Array[Byte]}}} */ case class JSON(charset: Charset = StandardCharsets.UTF_8) extends OutputEncoder { import io.circe.{Json, Printer} type Repr = Json val reprTag: ClassTag[Repr] = implicitly[ClassTag[Repr]] override def folderTo: DataFolderTo[Repr] = DataFolderTo.jsonFolder private val printer = Printer.noSpaces private val newline: Array[Byte] = { val buf = charset.encode("\n") val arr = Array.ofDim[Byte](buf.limit() - buf.position()) buf.get(arr) arr } override def bytes(value: Repr): Array[Byte] = { val buffer = printer.printToByteBuffer(value, charset) val bufSize = buffer.limit() - buffer.position() val arr = Array.ofDim[Byte](bufSize + newline.length) // Add the JSON bytes to the array buffer.get(arr, 0, bufSize) // Add the newline bytes after the JSON bytes ByteBuffer.wrap(newline).get(arr, bufSize, newline.length) arr } } final case class Protobuf( schemaUrl: String, typeName: String, descriptor: Descriptor, ) extends OutputEncoder { override type Repr = QuineValue val reprTag: ClassTag[Repr] = implicitly[ClassTag[Repr]] private val toPb: QuineValueToProtobuf = new QuineValueToProtobuf(descriptor) override def folderTo: DataFolderTo[Repr] = QuineSerializationFoldersTo.quineValueFolder override def bytes(value: Repr): Array[Byte] = value match { case QuineValue.Map(map) => toPb .toProtobufBytes(map) .fold[Array[Byte]]( failure => throw new Exception(failure.toString), identity, ) case _ => throw new Exception("Unable to convert a non-map to Protobuf") } } } ================================================ FILE: outputs2/src/main/scala/com/thatdot/outputs2/OutputsLoggables.scala ================================================ package com.thatdot.outputs2 import com.thatdot.common.logging.Log.AlwaysSafeLoggable object OutputsLoggables { implicit val LogStatusCode: AlwaysSafeLoggable[org.apache.pekko.http.scaladsl.model.StatusCode] = _.value } ================================================ FILE: outputs2/src/main/scala/com/thatdot/outputs2/ResultDestination.scala ================================================ package com.thatdot.outputs2 import com.thatdot.aws.model.{AwsCredentials, AwsRegion} trait SinkName { def slug: String def sinkName(outputName: String): String = s"result-destination--$slug--$outputName" } /** The interface (despite the API needing an ADT) for result destinations, * which are adapters for sending/writing to a location. */ sealed trait ResultDestination extends SinkName object ResultDestination { sealed trait Bytes extends ResultDestination with ByteArraySink object Bytes { trait ReactiveStream extends Bytes { def address: String def port: Int } trait StandardOut extends Bytes trait SNS extends Bytes { def credentials: Option[AwsCredentials] def region: Option[AwsRegion] def topic: String } trait Kafka extends Bytes { def topic: String def bootstrapServers: String def kafkaProperties: Map[String, String] } trait Kinesis extends Bytes { def credentials: Option[AwsCredentials] def region: Option[AwsRegion] def streamName: String def kinesisParallelism: Option[Int] def kinesisMaxBatchSize: Option[Int] def kinesisMaxRecordsPerSecond: Option[Int] def kinesisMaxBytesPerSecond: Option[Int] } trait File extends Bytes { def path: String } } sealed trait FoldableData extends ResultDestination with DataFoldableSink object FoldableData { trait HttpEndpoint extends FoldableData { def url: String def parallelism: Int } } sealed trait AnyData extends ResultDestination with AnySink object AnyData { trait Drop extends AnyData } } ================================================ FILE: outputs2/src/main/scala/com/thatdot/outputs2/SaslJaasConfig.scala ================================================ package com.thatdot.outputs2 import com.thatdot.common.logging.Log.AlwaysSafeLoggable import com.thatdot.common.security.Secret /** Internal SASL/JAAS configuration for Kafka authentication. */ sealed trait SaslJaasConfig object SaslJaasConfig { /** Format a SASL/JAAS configuration as a Kafka JAAS config string. * * @param config * the SASL/JAAS configuration to format * @param renderSecret * function to render secret values (e.g., redact or expose) * @return * a JAAS configuration string */ private def formatJaasString(config: SaslJaasConfig, renderSecret: Secret => String): String = config match { case PlainLogin(username, password) => s"""org.apache.kafka.common.security.plain.PlainLoginModule required username="$username" password=" ${renderSecret(password)}";""" case ScramLogin(username, password) => s"""org.apache.kafka.common.security.scram.ScramLoginModule required username="$username" password=" ${renderSecret(password)}";""" case OAuthBearerLogin(clientId, clientSecret, scope, tokenEndpointUrl) => val scopePart = scope.map(s => s""" scope="$s"""").getOrElse("") val tokenUrlPart = tokenEndpointUrl.map(u => s""" sasl.oauthbearer.token.endpoint.url="$u"""").getOrElse("") s"""org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule required clientId="$clientId" clientSecret="${renderSecret( clientSecret, )}"$scopePart$tokenUrlPart;""" } /** Loggable instance that outputs JAAS format with redacted secrets. */ implicit val loggable: AlwaysSafeLoggable[SaslJaasConfig] = formatJaasString(_, _ => "****") /** Convert to Kafka's JAAS config string format. * * Requires an unsafe access witness to extract the secret values. */ def toJaasConfigString(config: SaslJaasConfig)(implicit ev: Secret.UnsafeAccess): String = formatJaasString(config, _.unsafeValue) } /** PLAIN authentication mechanism. */ final case class PlainLogin( username: String, password: Secret, ) extends SaslJaasConfig /** SCRAM authentication mechanism. */ final case class ScramLogin( username: String, password: Secret, ) extends SaslJaasConfig /** OAuth Bearer authentication mechanism. */ final case class OAuthBearerLogin( clientId: String, clientSecret: Secret, scope: Option[String] = None, tokenEndpointUrl: Option[String] = None, ) extends SaslJaasConfig ================================================ FILE: outputs2/src/main/scala/com/thatdot/outputs2/Sinks.scala ================================================ package com.thatdot.outputs2 import org.apache.pekko.NotUsed import org.apache.pekko.stream.scaladsl.Sink import com.thatdot.common.logging.Log.LogConfig import com.thatdot.data.DataFoldableFrom import com.thatdot.quine.graph.NamespaceId trait DataFoldableSink { def sink[In: DataFoldableFrom](outputName: String, namespaceId: NamespaceId)(implicit logConfig: LogConfig, ): Sink[In, NotUsed] } trait DataNonFoldableSink { def sink[In: BytesOutputEncoder](outputName: String, namespaceId: NamespaceId)(implicit logConfig: LogConfig, ): Sink[In, NotUsed] } trait ByteArraySink { def sink(name: String, inNamespace: NamespaceId)(implicit logConfig: LogConfig): Sink[Array[Byte], NotUsed] } trait AnySink { def sink(name: String, inNamespace: NamespaceId)(implicit logConfig: LogConfig): Sink[Any, NotUsed] } ================================================ FILE: outputs2/src/main/scala/com/thatdot/outputs2/destination/Drop.scala ================================================ package com.thatdot.outputs2.destination import org.apache.pekko.NotUsed import org.apache.pekko.stream.scaladsl.Sink import com.thatdot.common.logging.Log.LogConfig import com.thatdot.outputs2.ResultDestination import com.thatdot.quine.graph.NamespaceId case object Drop extends ResultDestination.AnyData.Drop { override def slug: String = "drop" override def sink(name: String, inNamespace: NamespaceId)(implicit logConfig: LogConfig): Sink[Any, NotUsed] = Sink.ignore.mapMaterializedValue(_ => NotUsed).named(sinkName(name)) } ================================================ FILE: outputs2/src/main/scala/com/thatdot/outputs2/destination/File.scala ================================================ package com.thatdot.outputs2.destination import java.nio.file.{Paths, StandardOpenOption} import org.apache.pekko.NotUsed import org.apache.pekko.stream.scaladsl.{FileIO, Sink} import org.apache.pekko.util.ByteString import com.thatdot.common.logging.Log.LogConfig import com.thatdot.outputs2.ResultDestination import com.thatdot.quine.graph.NamespaceId final case class File( path: String, ) extends ResultDestination.Bytes.File { override def slug: String = "file" override def sink(name: String, inNamespace: NamespaceId)(implicit logConfig: LogConfig): Sink[Array[Byte], NotUsed] = FileIO .toPath( Paths.get(path), Set(StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.APPEND), ) .named(sinkName(name)) .contramap[Array[Byte]](ByteString.fromArray) .mapMaterializedValue(_ => NotUsed) } ================================================ FILE: outputs2/src/main/scala/com/thatdot/outputs2/destination/HttpEndpoint.scala ================================================ package com.thatdot.outputs2.destination import scala.concurrent.{ExecutionContext, Future} import scala.util.{Failure, Success} import org.apache.pekko.NotUsed import org.apache.pekko.actor.ActorSystem import org.apache.pekko.http.scaladsl.Http import org.apache.pekko.http.scaladsl.model.MediaTypes.`application/json` import org.apache.pekko.http.scaladsl.model.headers.RawHeader import org.apache.pekko.http.scaladsl.model.{HttpEntity, HttpMethods, HttpRequest} import org.apache.pekko.http.scaladsl.unmarshalling.Unmarshal import org.apache.pekko.stream.scaladsl.{Flow, Sink} import io.circe.Json import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, Safe, SafeLoggableInterpolator} import com.thatdot.common.security.Secret import com.thatdot.data.DataFoldableFrom import com.thatdot.outputs2.OutputsLoggables.LogStatusCode import com.thatdot.outputs2.ResultDestination import com.thatdot.quine.graph.NamespaceId final case class HttpEndpoint( url: String, parallelism: Int = 8, headers: Map[String, Secret] = Map.empty, )(implicit system: ActorSystem) extends ResultDestination.FoldableData.HttpEndpoint with LazySafeLogging { override def slug: String = "http" override def sink[A: DataFoldableFrom](name: String, inNamespace: NamespaceId)(implicit logConfig: LogConfig, ): Sink[A, NotUsed] = { val http = Http() val toJson = DataFoldableFrom[A].to[Json] import Secret.Unsafe._ val customHeaders: List[RawHeader] = headers.map { case (k, v) => RawHeader(k, v.unsafeValue) }.toList Flow[A] .mapAsync(parallelism) { (a: A) => val json = toJson(a) val request = HttpRequest( method = HttpMethods.POST, uri = url, headers = customHeaders, entity = HttpEntity( contentType = `application/json`, json.noSpaces.getBytes, ), ) val posted: Future[Unit] = http .singleRequest(request) .flatMap(response => if (response.status.isSuccess()) { response.entity .discardBytes() .future() .map(_ => ())(ExecutionContext.parasitic) } else { Unmarshal(response) .to[String] .andThen { case Failure(err) => logger.error( log"""Failed to deserialize error response from POST ${Safe(json.toString)} to ${Safe(url)}. |Response status was ${response.status}""".cleanLines withException err, ) case Success(responseBody) => logger.error( log"""Failed to POST ${Safe(json.toString)} to ${Safe(url)}. |Response was ${response.status} |""".cleanLines + log": ${Safe(responseBody)}", ) }(system.dispatcher) .map(_ => ())(ExecutionContext.parasitic) }, )(system.dispatcher) posted.recover { case err => logger.error(log"Failed to POST result" withException err) }(system.dispatcher) } .to(Sink.ignore) .named(sinkName(name)) } } ================================================ FILE: outputs2/src/main/scala/com/thatdot/outputs2/destination/Kafka.scala ================================================ package com.thatdot.outputs2.destination import scala.annotation.unused import org.apache.pekko.NotUsed import org.apache.pekko.actor.ActorSystem import org.apache.pekko.kafka.scaladsl.{Producer => KafkaProducer} import org.apache.pekko.kafka.{ProducerMessage, ProducerSettings} import org.apache.pekko.stream.scaladsl.{Flow, Sink} import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.common.serialization.ByteArraySerializer import com.thatdot.common.logging.Log import com.thatdot.common.logging.Log.{LazySafeLogging, Safe, SafeLoggableInterpolator} import com.thatdot.common.security.Secret import com.thatdot.outputs2.{ResultDestination, SaslJaasConfig} import com.thatdot.quine.graph.NamespaceId import com.thatdot.quine.util.Log.implicits._ final case class Kafka( topic: String, bootstrapServers: String, sslKeystorePassword: Option[Secret] = None, sslTruststorePassword: Option[Secret] = None, sslKeyPassword: Option[Secret] = None, saslJaasConfig: Option[SaslJaasConfig] = None, kafkaProperties: Map[String, String] = Map.empty, )(implicit system: ActorSystem) extends ResultDestination.Bytes.Kafka with LazySafeLogging { import Secret.Unsafe._ override def slug: String = "kafka" /** Log warnings for any kafkaProperties keys that will be overridden by typed Secret params. */ private def warnOnOverriddenProperties()(implicit @unused logConfig: Log.LogConfig): Unit = { val typedSecretKeys: Set[String] = Set.empty ++ sslKeystorePassword.map(_ => "ssl.keystore.password") ++ sslTruststorePassword.map(_ => "ssl.truststore.password") ++ sslKeyPassword.map(_ => "ssl.key.password") ++ saslJaasConfig.map(_ => "sasl.jaas.config") val overriddenKeys = kafkaProperties.keySet.intersect(typedSecretKeys) overriddenKeys.foreach { key => logger.warn( safe"Kafka property '${Safe(key)}' in kafkaProperties will be overridden by typed Secret parameter. " + safe"Remove '${Safe(key)}' from kafkaProperties to suppress this warning.", ) } } /** Merge typed secret params into Kafka properties. Typed params take precedence. */ private[destination] def effectiveProperties: Map[String, String] = { val secretProps: Map[String, String] = Map.empty ++ sslKeystorePassword.map("ssl.keystore.password" -> _.unsafeValue) ++ sslTruststorePassword.map("ssl.truststore.password" -> _.unsafeValue) ++ sslKeyPassword.map("ssl.key.password" -> _.unsafeValue) ++ saslJaasConfig.map("sasl.jaas.config" -> SaslJaasConfig.toJaasConfigString(_)) kafkaProperties ++ secretProps } override def sink(name: String, inNamespace: NamespaceId)(implicit logConfig: Log.LogConfig, ): Sink[Array[Byte], NotUsed] = { warnOnOverriddenProperties() val settings = ProducerSettings( system, new ByteArraySerializer, new ByteArraySerializer, ).withBootstrapServers(bootstrapServers) .withProperties(effectiveProperties) saslJaasConfig.foreach(config => logger.info(safe"Kafka SASL config: $config")) logger.info(safe"Writing to kafka with properties ${Safe(kafkaProperties)}") Flow[Array[Byte]] .map { bytes => ProducerMessage .single(new ProducerRecord[Array[Byte], Array[Byte]](topic, bytes)) } .via(KafkaProducer.flexiFlow(settings).named(sinkName(name))) .to(Sink.ignore) } } ================================================ FILE: outputs2/src/main/scala/com/thatdot/outputs2/destination/Kinesis.scala ================================================ package com.thatdot.outputs2.destination import scala.util.{Failure, Random, Success} import org.apache.pekko.NotUsed import org.apache.pekko.stream.connectors.kinesis.KinesisFlowSettings import org.apache.pekko.stream.connectors.kinesis.scaladsl.KinesisFlow import org.apache.pekko.stream.scaladsl.Sink import software.amazon.awssdk.core.SdkBytes import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient import software.amazon.awssdk.services.kinesis.KinesisAsyncClient import software.amazon.awssdk.services.kinesis.model.PutRecordsRequestEntry import com.thatdot.aws.model.{AwsCredentials, AwsRegion} import com.thatdot.aws.util.AwsOps import com.thatdot.aws.util.AwsOps.AwsBuilderOps import com.thatdot.common.logging.Log.LogConfig import com.thatdot.outputs2.ResultDestination import com.thatdot.quine.graph.NamespaceId final case class Kinesis( credentials: Option[AwsCredentials], region: Option[AwsRegion], streamName: String, kinesisParallelism: Option[Int], kinesisMaxBatchSize: Option[Int], kinesisMaxRecordsPerSecond: Option[Int], kinesisMaxBytesPerSecond: Option[Int], ) extends ResultDestination.Bytes.Kinesis { override def slug: String = "kinesis" override def sink(name: String, inNamespace: NamespaceId)(implicit logConfig: LogConfig, ): Sink[Array[Byte], NotUsed] = { val kinesisAsyncClient: KinesisAsyncClient = KinesisAsyncClient .builder() .credentialsV2(credentials) .regionV2(region) .httpClient(NettyNioAsyncHttpClient.builder.maxConcurrency(AwsOps.httpConcurrencyPerClient).build()) .build() def closeClient(): Unit = kinesisAsyncClient.close() val lifecycleSink = Sink.onComplete { case Failure(_) => closeClient() case Success(_) => closeClient() } val settings = { var s = KinesisFlowSettings.create() s = kinesisParallelism.foldLeft(s)(_ withParallelism _) s = kinesisMaxBatchSize.foldLeft(s)(_ withMaxBatchSize _) s = kinesisMaxRecordsPerSecond.foldLeft(s)(_ withMaxRecordsPerSecond _) s = kinesisMaxBytesPerSecond.foldLeft(s)(_ withMaxBytesPerSecond _) s } KinesisFlow( streamName, settings, )(kinesisAsyncClient) .named(sinkName(name)) .contramap[Array[Byte]] { bytes => val builder = PutRecordsRequestEntry.builder() builder.data(SdkBytes.fromByteArray(bytes)) builder.partitionKey("undefined") builder.explicitHashKey(BigInt(128, Random).toString) builder.build() } .to(lifecycleSink) } } ================================================ FILE: outputs2/src/main/scala/com/thatdot/outputs2/destination/ReactiveStream.scala ================================================ package com.thatdot.outputs2.destination import org.apache.pekko.NotUsed import org.apache.pekko.actor.ActorSystem import org.apache.pekko.stream.scaladsl.{BroadcastHub, Flow, Keep, Sink, Tcp} import org.apache.pekko.util.ByteString import com.thatdot.common.logging.Log.LogConfig import com.thatdot.outputs2.ResultDestination import com.thatdot.quine.graph.NamespaceId final case class ReactiveStream( address: String = "localhost", port: Int, )(implicit system: ActorSystem) extends ResultDestination.Bytes.ReactiveStream { override def slug: String = "reactive-stream" override def sink(name: String, inNamespace: NamespaceId)(implicit logConfig: LogConfig, ): Sink[Array[Byte], NotUsed] = { // Convert Array[Byte] to length-prefixed ByteString for framing val lengthFieldFraming = Flow[Array[Byte]].map { bytes => val data = ByteString(bytes) val length = ByteString.fromArray(java.nio.ByteBuffer.allocate(4).putInt(data.length).array()) length ++ data } // BroadcastHub with a dummy sink attached to prevent blocking when no consumers // When TCP consumers connect, BroadcastHub backpressures to the slowest one Flow[Array[Byte]] .via(lengthFieldFraming) .toMat( Sink.fromGraph( BroadcastHub.sink[ByteString](bufferSize = 256), ), )(Keep.right) .mapMaterializedValue { broadcastSource => // Attach a dummy sink that drops all messages - prevents backpressure when no TCP clients broadcastSource.runWith(Sink.ignore) // Bind TCP server that connects each client to the broadcast source Tcp() .bind(address, port) .to(Sink.foreach { connection: Tcp.IncomingConnection => // Each client gets data from BroadcastHub // Silences the non-Unit value of type org.apache.pekko.NotUsed val _ = broadcastSource .via(connection.flow) .to(Sink.ignore) .run() }) .run() NotUsed } .named(sinkName(name)) } } ================================================ FILE: outputs2/src/main/scala/com/thatdot/outputs2/destination/SNS.scala ================================================ package com.thatdot.outputs2.destination import scala.util.{Failure, Success} import org.apache.pekko.NotUsed import org.apache.pekko.stream.connectors.sns.scaladsl.SnsPublisher import org.apache.pekko.stream.scaladsl.Sink import org.apache.pekko.util.ByteString import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient import software.amazon.awssdk.services.sns.SnsAsyncClient import com.thatdot.aws.model.{AwsCredentials, AwsRegion} import com.thatdot.aws.util.AwsOps import com.thatdot.aws.util.AwsOps.AwsBuilderOps import com.thatdot.common.logging.Log.LogConfig import com.thatdot.outputs2.ResultDestination import com.thatdot.quine.graph.NamespaceId final case class SNS( credentials: Option[AwsCredentials], region: Option[AwsRegion], topic: String, ) extends ResultDestination.Bytes.SNS { override def slug: String = "sns" override def sink(name: String, inNamespace: NamespaceId)(implicit logConfig: LogConfig, ): Sink[Array[Byte], NotUsed] = { val awsSnsClient = SnsAsyncClient .builder() .credentialsV2(credentials) .regionV2(region) .httpClient( NettyNioAsyncHttpClient.builder.maxConcurrency(AwsOps.httpConcurrencyPerClient).build(), ) .build() def closeClient(): Unit = awsSnsClient.close() // NOTE pekko-connectors requires we close the SNS client val lifecycleSink = Sink.onComplete { case Failure(exception) => closeClient() case Success(value) => closeClient() } // NB: by default, this will make 10 parallel requests [configurable via parameter to SnsPublisher.flow] // TODO if any request to SNS errors, that thread (of the aforementioned 10) will retry its request // indefinitely. If all worker threads block, the SnsPublisher.flow will backpressure indefinitely. SnsPublisher .flow(topic)(awsSnsClient) .named(sinkName(name)) .contramap[Array[Byte]](ByteString(_).utf8String) .mapMaterializedValue(_ => NotUsed) .to(lifecycleSink) } } ================================================ FILE: outputs2/src/main/scala/com/thatdot/outputs2/destination/StandardOut.scala ================================================ package com.thatdot.outputs2.destination import org.apache.pekko.NotUsed import org.apache.pekko.stream.scaladsl.Sink import com.thatdot.common.logging.Log.LogConfig import com.thatdot.outputs2.ResultDestination import com.thatdot.quine.graph.NamespaceId case object StandardOut extends ResultDestination.Bytes.StandardOut { override def slug: String = "standard-out" override def sink(name: String, inNamespace: NamespaceId)(implicit logConfig: LogConfig): Sink[Array[Byte], NotUsed] = Sink .foreach[Array[Byte]](System.out.write) .mapMaterializedValue(_ => NotUsed) .named(sinkName(name)) } ================================================ FILE: outputs2/src/main/scala/com/thatdot/outputs2/package.scala ================================================ package com.thatdot /** The Outputs V2 definitions. These must be and remain available to all products. */ package object outputs2 ================================================ FILE: outputs2/src/test/scala/com/thatdot/outputs2/destination/KafkaSpec.scala ================================================ package com.thatdot.outputs2.destination import org.apache.pekko.actor.ActorSystem import org.scalatest.BeforeAndAfterAll import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import com.thatdot.common.security.Secret import com.thatdot.outputs2.{PlainLogin, ScramLogin} class KafkaSpec extends AnyFunSuite with Matchers with BeforeAndAfterAll { implicit val system: ActorSystem = ActorSystem("KafkaSpec") override def afterAll(): Unit = { system.terminate() super.afterAll() } test("effectiveProperties includes sslKeystorePassword when set") { val kafka = Kafka( topic = "test", bootstrapServers = "localhost:9092", sslKeystorePassword = Some(Secret("keystore-secret")), ) kafka.effectiveProperties should contain("ssl.keystore.password" -> "keystore-secret") } test("effectiveProperties includes sslTruststorePassword when set") { val kafka = Kafka( topic = "test", bootstrapServers = "localhost:9092", sslTruststorePassword = Some(Secret("truststore-secret")), ) kafka.effectiveProperties should contain("ssl.truststore.password" -> "truststore-secret") } test("effectiveProperties includes sslKeyPassword when set") { val kafka = Kafka( topic = "test", bootstrapServers = "localhost:9092", sslKeyPassword = Some(Secret("key-secret")), ) kafka.effectiveProperties should contain("ssl.key.password" -> "key-secret") } test("effectiveProperties includes saslJaasConfig as JAAS string when set") { val kafka = Kafka( topic = "test", bootstrapServers = "localhost:9092", saslJaasConfig = Some(PlainLogin("alice", Secret("password123"))), ) val jaasConfig = kafka.effectiveProperties.get("sasl.jaas.config") jaasConfig shouldBe defined jaasConfig.get should include("PlainLoginModule") jaasConfig.get should include("alice") jaasConfig.get should include("password123") } test("effectiveProperties preserves non-conflicting kafkaProperties") { val kafka = Kafka( topic = "test", bootstrapServers = "localhost:9092", kafkaProperties = Map( "acks" -> "all", "batch.size" -> "16384", ), ) kafka.effectiveProperties should contain("acks" -> "all") kafka.effectiveProperties should contain("batch.size" -> "16384") } test("typed Secret params override conflicting kafkaProperties") { val kafka = Kafka( topic = "test", bootstrapServers = "localhost:9092", sslKeystorePassword = Some(Secret("typed-keystore-secret")), kafkaProperties = Map( "ssl.keystore.password" -> "should-be-overridden", "acks" -> "all", ), ) kafka.effectiveProperties should contain("ssl.keystore.password" -> "typed-keystore-secret") kafka.effectiveProperties should contain("acks" -> "all") kafka.effectiveProperties should not contain ("ssl.keystore.password" -> "should-be-overridden") } test("all typed Secret params override their corresponding kafkaProperties") { val kafka = Kafka( topic = "test", bootstrapServers = "localhost:9092", sslKeystorePassword = Some(Secret("typed-ks")), sslTruststorePassword = Some(Secret("typed-ts")), sslKeyPassword = Some(Secret("typed-key")), saslJaasConfig = Some(ScramLogin("bob", Secret("typed-sasl"))), kafkaProperties = Map( "ssl.keystore.password" -> "old-ks", "ssl.truststore.password" -> "old-ts", "ssl.key.password" -> "old-key", "sasl.jaas.config" -> "old-jaas-config", ), ) kafka.effectiveProperties("ssl.keystore.password") shouldBe "typed-ks" kafka.effectiveProperties("ssl.truststore.password") shouldBe "typed-ts" kafka.effectiveProperties("ssl.key.password") shouldBe "typed-key" kafka.effectiveProperties("sasl.jaas.config") should include("ScramLoginModule") kafka.effectiveProperties("sasl.jaas.config") should include("typed-sasl") } test("effectiveProperties is empty when no params are set") { val kafka = Kafka( topic = "test", bootstrapServers = "localhost:9092", ) kafka.effectiveProperties shouldBe empty } } ================================================ FILE: project/Dependencies.scala ================================================ import sbt._ object Dependencies { val amazonKinesisClientV = "3.4.2" val apacheCommonsCsvV = "1.14.1" val avroV = "1.12.1" // On update, check whether nettyOverrideV override is removable val awsSdkV = "2.42.24" // On update, check whether netty-nio-client override in quine-serialization is removable val amazonGlueV = "1.1.27" val betterMonadicForV = "0.3.1" val boopickleV = "1.5.0" val bootstrapV = "5.3.6" val coreuiV = "5.4.3" val coreuiIconsV = "3.0.1" val caffeineV = "3.2.3" val cassandraClientV = "4.19.2" val catsV = "2.13.0" val catsEffectV = "3.7.0" val circeYamlV = "0.16.1" val commonsCodecV = "1.21.0" val commonsTextV = "1.15.0" val commonsIoV = "2.21.0" val dropwizardMetricsV = "4.2.38" val embeddedCassandraV = "5.0.3" val endpoints4sDefaultV = "1.12.1" val endpoints4sCirceV = "2.6.1" val endpoints4sHttpServerV = "2.0.1" val endpoints4sOpenapiV = "5.0.1" val endpoints4sXhrClientV = "5.3.0" val flatbuffersV = "25.2.10" val graalV = "25.0.2" val ioniconsV = "2.0.1" val jnrPosixV = "3.1.22" val jqueryV = "3.6.3" val jwtV = "0.13.0" val jwtScalaV = "11.0.4" // On update, keep lz4JavaV in sync val kafkaClientsV = "3.9.2" val kindProjectorV = "0.13.4" val logbackV = "1.5.32" val laminarV = "17.2.1" val waypointV = "10.0.0-M7" // Keep in sync with the version kafka-clients (kafkaClientsV) depends on val lz4JavaV = "1.10.4" // On update, check whether net.jpountz.lz4:lz4 exclusion in quine-mapdb-persistor is removable val mapDbV = "3.1.0" val metricsInfluxdbV = "1.1.0" val msgPackV = "0.9.11" val openApiCirceYamlV = "0.11.10" val openCypherV = "9.2.3" val parboiledV = "1.4.1" val pegdownV = "1.6.0" val pekkoV = "1.5.0" val pekkoTestkitV = "1.5.0" val pekkoHttpV = "1.3.0" val pekkoHttpCirceV = "3.9.1" val pekkoManagementV = "1.2.1" val pekkoKafkaV = "1.1.0" val pekkoConnectorsV = "1.3.0" val plotlyV = "2.25.2" val pprintV = "0.9.6" val protobufV = "4.34.1" val protobufCommonV = "2.14.2" val pureconfigV = "0.17.10" val antlr4RuntimeV = "4.13.2" val lsp4jV = "0.24.0" val guavaV = "33.3.0-jre" val memeid4sV = "0.8.0" val munitV = "1.3.0" val quineCommonV = "0.0.4" val reactV = "17.0.2" val rocksdbV = "10.10.1.1" val scaffeineV = "5.3.0" val scalaCheckV = "1.19.0" val scalaJavaTimeV = "2.6.0" val scalaLoggingV = "3.9.6" val scalaParserCombinatorsV = "2.4.0" val scalaTestScalaCheckV = "3.2.18.0" val scalajsDomV = "2.8.1" val scalaTestV = "3.2.20" val scalajsMacroTaskExecutorV = "1.1.1" val scoptV = "4.1.0" val shapelessV = "2.3.13" val ayzaV = "10.0.4" // On update, check whether com.datastax.oss exclusion in quine-cassandra-persistor is removable val sigv4AuthCassandraPluginV = "4.0.9" // On update, check whether any NPM Override Versions (below) are removable val stoplightElementsV = "9.0.1" val sugarV = "2.0.6" val tapirV = "1.13.15" val ujsonCirceV = "3.3.1" val circeV = "0.14.15" val circeGenericExtrasV = "0.14.4" val circeOpticsV = "0.15.1" val webjarsLocatorV = "0.52" // === Vis-Network and Peer Dependencies val visNetworkV = "10.0.2" val visDataV = "8.0.3" val visUtilV = "6.0.0" val egjsHammerjsV = "2.0.17" val componentEmitterV = "2.0.0" val keycharmV = "0.4.0" val uuidV = "11.1.0" // === JVM Override Versions === // == Remove overrides when parents require fixed versions of the transitive dependency. == // Parent: AWS SDK (awsSdkV) via transitive Netty dependency val nettyOverrideV = "4.1.132.Final" // CVE-2026-33871 val jvmDependencyOverrides: Seq[ModuleID] = Seq( "io.netty" % "netty-handler" % nettyOverrideV, "io.netty" % "netty-codec-http" % nettyOverrideV, "io.netty" % "netty-codec-http2" % nettyOverrideV, "io.netty" % "netty-transport-classes-epoll" % nettyOverrideV, ) // === NPM Override Versions === // == Remove overrides when parents require fixed versions of the transitive dependency. == // Parents: @stoplight/elements (stoplightElementsV), webpack (scalajs-bundler) val lodashV = "4.18.0" // CVE-2025-13465 (GHSA-xxjr-mmjv-4gpg), CVE-2026-4800 // Parent: @stoplight/elements (stoplightElementsV) via react-router-dom val reactRouterV = "6.30.3" // CVE-2025-68470 & CVE-2026-22029 (GHSA-2w69-qvjg-hvjx) val remixRunRouterV = "1.23.2" // CVE-2026-22029 (GHSA-2w69-qvjg-hvjx) // Parents: @stoplight/elements (stoplightElementsV), glob. val minimatchV = "3.1.5" // CVE-2026-27903 & CVE-2026-27904 // Parent: @stoplight/elements (stoplightElementsV) via @stoplight/yaml and openapi3-ts val yamlV = "1.10.3" // CVE-2026-33532 (GHSA-48c2-rrv3-qjmp) // Parent: @stoplight/elements (stoplightElementsV) via minimatch val braceExpansionV = "1.1.13" // CVE-2026-33750 (GHSA-f886-m6hf-6m8v) } ================================================ FILE: project/Docker.scala ================================================ import scala.concurrent.duration.* import scala.sys.process.* import sbt.* import sbt.Keys.{baseDirectory, name, streams, target, version} import sbt.io.IO import sbtassembly.AssemblyKeys.assembly import sbtassembly.AssemblyPlugin import sbtdocker.DockerKeys.{docker, dockerBuildArguments, dockerfile, imageNames} import sbtdocker.staging.DefaultDockerfileProcessor import sbtdocker.{DockerPlugin, Dockerfile, DockerfileLike, ImageName} object Docker extends AutoPlugin { override def requires = AssemblyPlugin && DockerPlugin override def trigger = allRequirements object autoImport { // See https://github.com/marcuslonnberg/sbt-docker#pushing-an-image val dockerTags = SettingKey[Seq[String]]("docker-tags", "The tag names to push the docker image under") val dockerVolume = SettingKey[File]("docker-volume", "Path to where the app should save its data") val includeNginx = docker / settingKey[Boolean]("Whether to install and use nginx in app container") val dockerJarTask = docker / taskKey[File]("The JAR file to include in the Docker image") val dockerStage = docker / taskKey[File]("Stage docker context without building the image") } import autoImport.* override lazy val projectSettings = Seq( dockerVolume := file("/var/quine"), dockerTags := sys.props.get("docker.tag").fold(Seq(version.value, "latest"))(Seq(_)), docker / imageNames := dockerTags.value.map(t => ImageName(namespace = Some("thatdot"), repository = name.value, tag = Some(t)), ), docker / includeNginx := true, // Enforce Docker image format rather than OCI format (the Podman default), enabling HEALTHCHECK docker / dockerBuildArguments := Map("format" -> "docker"), // Default docker jar task - projects can override this to use packageObfuscatedJar docker / dockerJarTask := assembly.value, docker / dockerfile := { val jar: sbt.File = dockerJarTask.value val jarPath = "/" + jar.name val jmxPrometheusJarName = "jmx_prometheus_javaagent.jar" val temp = IO.createTemporaryDirectory val jmxPrometheusFile: sbt.File = temp / "jmx_prometheus_javaagent.jar" url( "https://github.com/prometheus/jmx_exporter/releases/download/1.1.0/jmx_prometheus_javaagent-1.1.0.jar", ) #> jmxPrometheusFile ! val exporterYamlName = "exporter.yaml" val exporterYamlFile = temp / exporterYamlName IO.append(exporterYamlFile, "rules:\n- pattern: \".*\"") val exporterYamlPath = "/" + exporterYamlName val base = new Dockerfile { from( ImageName( repository = "eclipse-temurin", tag = Some("21.0.10_7-jre-noble"), ), ) healthCheckShell( "curl --silent --fail http://localhost:8080/api/v1/admin/liveness || exit 1".split(' '), interval = Some(10.seconds), timeout = Some(2.seconds), startPeriod = Some(5.seconds), ) expose(7626, 8080) env("QUINE_DATA", dockerVolume.value.getPath) volume("$QUINE_DATA") copy(jar, jarPath) copy(jmxPrometheusFile, jmxPrometheusJarName) copy(exporterYamlFile, exporterYamlPath) } // Do not include NGINX for Quine OSS if (includeNginx.value && name.value != "quine") { val quinePlusRootDir = baseDirectory.value.getParentFile val initScriptName = "init-quine.sh" val initScript = quinePlusRootDir / s"docker/$initScriptName" val initScriptDest = s"/$initScriptName" val nginxConfName = "nginx.conf.template" val nginxConf = quinePlusRootDir / s"docker/$nginxConfName" val nginxDest = s"/etc/nginx/$nginxConfName" val uid = 777 val permissionsFix = s""" chown -R $uid:0 /var/log/nginx \\ | && chmod -R g+w /var/log/nginx \\ | && chown -R $uid:0 /var/lib/nginx \\ | && chmod -R g+w /var/lib/nginx \\ | && chown -R $uid:0 /etc/nginx \\ | && chmod -R g+w /etc/nginx""".stripMargin base .runRaw("apt-get update; apt-get install -y nginx") .runRaw("rm /etc/nginx/sites-enabled/default") .runRaw(permissionsFix) .copy(initScript, initScriptDest) .copy(nginxConf, nginxDest) .entryPoint(initScriptDest) .env("QUINE_JAR", jarPath) } else { base .entryPoint( "java", "-XX:+AlwaysPreTouch", "-XX:+UseParallelGC", "-XX:InitialRAMPercentage=40.0", "-XX:MaxRAMPercentage=80.0", "-jar", jarPath, ) } }, dockerStage := { val log = streams.value.log val stageDir = target.value / "docker" val df = (docker / dockerfile).value.asInstanceOf[DockerfileLike] // Use sbt-docker's internal staging processor val staged = DefaultDockerfileProcessor(df, stageDir) // Clean and create stage directory IO.delete(stageDir) IO.createDirectory(stageDir) // Write Dockerfile IO.write(stageDir / "Dockerfile", staged.instructionsString) // Copy all staged files staged.stageFiles.foreach { case (source, dest) => source.stage(dest) } log.info(s"Docker context staged to: $stageDir") stageDir }, ) } ================================================ FILE: project/Ecr.scala ================================================ import java.nio.charset.StandardCharsets.UTF_8 import sbt._ import sbt.Keys.streams import sbtdocker.DockerKeys.{docker, imageNames} import software.amazon.awssdk.core.exception.SdkClientException import software.amazon.awssdk.services.ecr.EcrClient import java.util.Base64 import scala.sys.process._ object Ecr extends AutoPlugin { object autoImport { val publishToEcr = SettingKey[Boolean]("publish-to-ecr", "Flag to enable publishing docker images to ECR") // Returns an Option in case e.g. the user doesn't have AWS creds val ecrLogin = TaskKey[Option[URL]]("ecr-login", "Login to ECR, returning the URL to the docker registry") } import autoImport._ override def requires = Docker override lazy val projectSettings = Seq( publishToEcr := true, ecrLogin := (try { val authData = EcrClient.create.getAuthorizationToken.authorizationData.get(0) val authTokenString = new String(Base64.getDecoder.decode(authData.authorizationToken), UTF_8) val Array(user, pass) = authTokenString.split(':') val domain = authData.proxyEndpoint Seq("docker", "login", "--username", user, "--password-stdin", domain).run(stringToStdIn(pass)) Some(url(domain)) } catch { case e: SdkClientException => // E.g. no AWS creds in environment streams.value.log.warn("Unable to get ECR token: " + e.getMessage) None }), docker / imageNames := { val images = (docker / imageNames).value ecrLogin.value match { case Some(ecrRegistry) if publishToEcr.value => images.map(_.copy(registry = Some(ecrRegistry.getHost))) case _ => images } }, ) // Used to pipe the password to the `docker login` process private def stringToStdIn(s: String): ProcessIO = BasicIO.standard { os => os.write(s.getBytes(UTF_8)) os.close() } } ================================================ FILE: project/FlatcPlugin.scala ================================================ import sbt._ import sbt.Keys._ import sbt.util.CacheImplicits._ import scala.util.Properties object FlatcPlugin extends AutoPlugin { import Dependencies.flatbuffersV object autoImport { val flatcOptions = SettingKey[Seq[String]]("flatc-options", "Additional options to be passed to flatc") val flatcSources = SettingKey[Seq[File]]("flatc-sources", "Directories to look for source files") val flatcOutput = SettingKey[File]("flatc-output", "Directory into which outputs will be written") val flatcDependency = SettingKey[Option[URL]]("flatc-dependency", "URL for zipped binary artifact for flatc") val flatcExecutable = TaskKey[File]( "flatc-executable", "Path to a flatc executable. Default downloads flatcDependency from Github.", ) } import autoImport._ // Use `buildSettings` to download the `flatc` executable only once (not once per project) override def buildSettings: Seq[Def.Setting[_]] = Seq( flatcDependency := { val prefix = s"https://github.com/google/flatbuffers/releases/download/v$flatbuffersV/" val suffixOpt = if (Properties.isMac) Some("Mac.flatc.binary.zip") else if (Properties.isWin) Some("Windows.flatc.binary.zip") else if (Properties.isLinux) Some("Linux.flatc.binary.clang++-18.zip") else None suffixOpt.map(suffix => url(prefix + suffix)) }, // This must match the version of the jar we download from Maven flatcExecutable := { val outputDirectory = (ThisBuild / baseDirectory).value / BuildPaths.DefaultTargetName / "flatc" val url: URL = flatcDependency.value.getOrElse { val os = Properties.osName val suggestion = "set flatcExecutable := file(path-to-flatc)" throw new sbt.internal.util.MessageOnlyException( s"Could not identify flatc binary for $os (try manually setting `$suggestion`)", ) } val flatcStore = streams.value.cacheStoreFactory.make("flatcStore") /* Fetch the right `flatc` binary * * @param file directory into which to place the `flatc` binary * @param url URL from which to download a ZIP of the `flatc` binary * @return path to the downloaded flatc */ val getFlatc: ((File, URL)) => File = Cache.cached[(File, URL), File](flatcStore) { case (outputDirectory, url) => val logger = streams.value.log logger.info(s"Downloading flatc from $url...") val files = IO.unzipURL(url, outputDirectory) assert(files.size == 1, "Only expected a single file in the zip file when downloading flatc") val flatcPath = files.head if (IO.isPosix) IO.chmod("rwxr--r--", flatcPath) logger.info(s"Saved flatc to $flatcPath") flatcPath } getFlatc(outputDirectory, url) }, ) override def projectSettings: Seq[Def.Setting[_]] = Seq( flatcOptions := Seq("--java"), flatcSources := Seq((Compile / sourceDirectory).value / "fbs"), flatcOutput := (Compile / sourceManaged).value / "fbs", Compile / sourceGenerators += Def.task { val logger = streams.value.log val flatcBin = flatcExecutable.value.getAbsolutePath val cachedGen = FileFunction.cached(streams.value.cacheDirectory / "fbs") { (in: Set[File]) => val inFiles: List[String] = flatcSources.value .flatMap(srcFolder => (srcFolder ** "*.fbs").get) .map(_.getAbsolutePath) .toList val outFolder = flatcOutput.value logger.info(s"Generating flatbuffers code") IO.delete(outFolder) val args: List[String] = flatcOptions.value.toList ++ ("-o" :: outFolder.getAbsolutePath :: inFiles) logger.debug(s"Running '$flatcBin ${args.mkString(" ")}'") val exitCode = sys.process.Process(flatcBin, args) ! logger if (exitCode != 0) throw new sbt.internal.util.MessageOnlyException("Could not generate FlatBuffers classes") (outFolder ** "*.java").get.toSet } cachedGen(flatcSources.value.toSet).toSeq }, Compile / managedSourceDirectories += flatcOutput.value, libraryDependencies += "com.google.flatbuffers" % "flatbuffers-java" % flatbuffersV, ) } ================================================ FILE: project/GitVersion.scala ================================================ import sbt.{AutoPlugin, SettingKey} import sbt.Keys.version import com.github.sbt.git.SbtGit.GitKeys.gitReader import com.github.sbt.git.GitReadonlyInterface object GitVersion extends AutoPlugin { override def trigger = allRequirements object autoImport { val tagPrefix = SettingKey[String]("tag-prefix", "The prefix of the git tag to use as the version number") } import autoImport._ private def tagWithPrefix(git: GitReadonlyInterface, prefix: String): Option[String] = git.describedVersion(Seq(prefix + '*')).map(_.stripPrefix(prefix)) override lazy val projectSettings = Seq( tagPrefix := "quine/", version := gitReader.value.withGit(git => // Try "v" as a fallback option to support just "v" as the tag prefix in the OSS repo tagWithPrefix(git, tagPrefix.value) orElse tagWithPrefix(git, "v") getOrElse "UNKNOWN", ), ) } ================================================ FILE: project/Packaging.scala ================================================ import sbtassembly.{Assembly, AssemblyPlugin, CustomMergeStrategy, MergeStrategy, PathList} import sbtassembly.AssemblyKeys.{assembly, assemblyMergeStrategy} import sbt._ import sbt.Keys.packageOptions /* Plugin for building a fat JAR */ object Packaging extends AutoPlugin { override def requires = AssemblyPlugin // Assembly merge strategy private val appendProjectsLast: MergeStrategy = CustomMergeStrategy("appendProjectsLast") { conflicts => val (projects, libraries) = conflicts.partition(_.isProjectDependency) // Make sure our reference.confs are appended _after_ reference.confs in libraries MergeStrategy.concat(libraries ++ projects) } /* This decides how to aggregate files from different JARs into one JAR. * * - resolves conflicts between duplicate files in different JARs * - allows for removing entirely unnecessary resources from output JAR */ val customMergeStrategy: String => MergeStrategy = { case x if Assembly.isConfigFile(x) => appendProjectsLast case "version.conf" => MergeStrategy.concat case PathList("META-INF", "LICENSES.txt") | "AUTHORS" => MergeStrategy.concat case PathList("META-INF", "io.netty.versions.properties") => MergeStrategy.discard // Discard Kotlin Native metadata files that cause deduplication conflicts. // These "nativeMain/default/manifest" and similar files from okio and wire // libraries are only relevant for Kotlin Native targets, not JVM. case PathList("commonMain", "default", "manifest") => MergeStrategy.discard case PathList("nativeMain", "default", "manifest") => MergeStrategy.discard case PathList("commonMain", "default", "linkdata", "module") => MergeStrategy.discard case PathList("nativeMain", "default", "linkdata", "module") => MergeStrategy.discard case PathList("META-INF", "kotlin-project-structure-metadata.json") => MergeStrategy.discard case PathList("META-INF", "kotlinx-serialization-core.kotlin_module") => MergeStrategy.first case PathList("META-INF", "okio-fakefilesystem.kotlin_module") => MergeStrategy.first case PathList("META-INF", "okio.kotlin_module") => MergeStrategy.first case PathList("META-INF", "wire-runtime.kotlin_module") => MergeStrategy.first case PathList("META-INF", "wire-schema.kotlin_module") => MergeStrategy.first case PathList("META-INF", "versions", "9", "OSGI-INF", "MANIFEST.MF") => MergeStrategy.first // from bouncycastle case PathList("META-INF", "FastDoubleParser-NOTICE") => MergeStrategy.first // from fasterxml jackson core (and its awssdk shadow) case PathList("META-INF", "native-image", "org.mongodb", "bson", "native-image.properties") => MergeStrategy.discard case PathList("codegen-resources", _) => MergeStrategy.discard case PathList(ps @ _*) if ps.last == "module-info.class" => MergeStrategy.discard case PathList("META-INF", "native-image", "io.netty", "netty-common", "native-image.properties") => MergeStrategy.discard case PathList("META-INF", "native-image", "io.netty", "codec-http", "native-image.properties") => MergeStrategy.discard case "findbugsExclude.xml" => MergeStrategy.discard case "JS_DEPENDENCIES" => MergeStrategy.discard // See https://github.com/akka/akka/issues/29456 case PathList("google", "protobuf", file) if file.split('.').last == "proto" => MergeStrategy.first case PathList("google", "protobuf", "compiler", "plugin.proto") => MergeStrategy.first case PathList("org", "apache", "avro", "reflect", _) => MergeStrategy.first case other => MergeStrategy.defaultMergeStrategy(other) } override lazy val projectSettings = Seq( assembly / assemblyMergeStrategy := customMergeStrategy, // GraalVM 25+ uses Multi-Release JARs (MRJAR). This manifest attribute must be preserved // in the assembled JAR for Truffle/GraalJS to initialize correctly. // See: https://www.graalvm.org/latest/reference-manual/embed-languages/#uber-jar-file-creation assembly / packageOptions += Package.ManifestAttributes("Multi-Release" -> "true"), ) } ================================================ FILE: project/QuineSettings.scala ================================================ import sbt._ import sbt.Keys._ import org.portablescala.sbtplatformdeps.PlatformDepsPlugin.autoImport._ import scalajsbundler.sbtplugin.ScalaJSBundlerPlugin.autoImport._ import scala.collection.compat.toOptionCompanionExtension import scala.sys.process._ import scala.util.Try object QuineSettings { val scalaV = "2.13.18" val nodeLegacySslArg = "--openssl-legacy-provider" // See if node accepts this arg. Give it an expression to evaluate {} so it returns instead of entering the repl def nodeLegacySslIfAvailable: Seq[String] = if (Try(Seq("node", nodeLegacySslArg, "-e", "{}") ! ProcessLogger(_ => ())).toOption.contains(0)) Seq(nodeLegacySslArg) else Seq() val integrationTestTag = "com.thatdot.quine.test.tags.IntegrationTest" val licenseRequiredTestTag = "com.thatdot.quine.test.tags.LicenseRequiredTest" lazy val Integration = config("integration").extend(Test) lazy val LicenseTest = config("licenseTest").extend(Test) val commonSettings: Seq[Setting[_]] = Seq( organization := "com.thatdot", organizationName := "thatDot Inc.", organizationHomepage := Some(url("https://www.thatdot.com")), autoAPIMappings := true, scalacOptions ++= Seq( "-language:postfixOps", "-encoding", "utf8", "-feature", "-unchecked", "-deprecation", "-release", "11", "-Xlint:_,-byname-implicit", "-Wdead-code", "-Wnumeric-widen", "-Wvalue-discard", "-Wunused:imports", "-Wunused:privates,locals,patvars", ) ++ Option.when(insideCI.value)("-Werror"), javacOptions ++= Seq("--release", "11"), // Circe is binary compatible between 0.13 and 0.14 // Circe projects from other orgs sometimes pull in older versions of circe (0.13): // As of Mar 8 2023, ujson-circe // This prevents sbt from erroring with: // "found version conflict(s) in library dependencies; some are suspected to be binary incompatible" libraryDependencySchemes ++= Seq( "io.circe" %% "circe-core" % VersionScheme.Always, "io.circe" %% "circe-parser" % VersionScheme.Always, ), Test / testOptions ++= Seq( //Include a report at the end of a test run with details on any failed tests: // use oG for full stack traces, oT for short ones Tests.Argument(TestFrameworks.ScalaTest, "-oT"), Tests.Argument(TestFrameworks.ScalaTest, "-l", integrationTestTag), Tests.Argument(TestFrameworks.ScalaTest, "-l", licenseRequiredTestTag), ), dependencyOverrides ++= Dependencies.jvmDependencyOverrides, excludeDependencies ++= Seq( ExclusionRule("commons-logging", "commons-logging"), // Exclude old lz4-java; we use at.yawk.lz4:lz4-java instead (CVE-2025-66566, CVE-2025-12183) ExclusionRule("org.lz4", "lz4-java"), ), libraryDependencies ++= Seq( "org.slf4j" % "jcl-over-slf4j" % "2.0.17", ), ) /* Settings for projects with integrationTests */ val integrationSettings: Seq[Setting[_]] = Seq( Integration / testOptions -= Tests.Argument(TestFrameworks.ScalaTest, "-l", integrationTestTag), Integration / testOptions += Tests.Argument(TestFrameworks.ScalaTest, "-n", integrationTestTag), Integration / parallelExecution := false, ) ++ inConfig(Integration)(Defaults.testTasks) /* Settings for projects with license-required tests */ val licenseTestSettings: Seq[Setting[_]] = Seq( LicenseTest / testOptions -= Tests.Argument(TestFrameworks.ScalaTest, "-l", licenseRequiredTestTag), LicenseTest / testOptions += Tests.Argument(TestFrameworks.ScalaTest, "-n", licenseRequiredTestTag), LicenseTest / parallelExecution := false, LicenseTest / fork := true, ) ++ inConfig(LicenseTest)(Defaults.testTasks) val startupMessage = settingKey[String]("If non-empty, print this message on startup") .withRank(KeyRanks.Invisible) /* Settings for projects using vis-network (CSP-compliant peer build) * * The peer build avoids dynamic code evaluation (eval), allowing stricter * Content Security Policy without 'unsafe-eval' in script-src. */ val visNetworkSettings: Seq[Setting[_]] = Seq( Compile / npmDependencies ++= Seq( "vis-network" -> Dependencies.visNetworkV, "vis-data" -> Dependencies.visDataV, "vis-util" -> Dependencies.visUtilV, "@egjs/hammerjs" -> Dependencies.egjsHammerjsV, "component-emitter" -> Dependencies.componentEmitterV, "keycharm" -> Dependencies.keycharmV, "uuid" -> Dependencies.uuidV, ), ) } ================================================ FILE: project/ScalaFix.scala ================================================ import sbt._ import sbt.Keys.{semanticdbEnabled, semanticdbVersion} import scalafix.sbt.ScalafixPlugin // Extra scalafix configuration and dependencies object ScalaFix extends AutoPlugin { override def requires = ScalafixPlugin override def trigger = allRequirements import ScalafixPlugin.autoImport._ override lazy val projectSettings = Seq( semanticdbEnabled := true, // enable SemanticDB semanticdbVersion := scalafixSemanticdb.revision, // use Scalafix compatible version ThisBuild / scalafixDependencies ++= Seq( "org.scala-lang" %% "scala-rewrites" % "0.1.5", ), ) } ================================================ FILE: project/build.properties ================================================ sbt.version=1.12.9 ================================================ FILE: project/dependencySchemes.sbt ================================================ // scala-xml should be compatible across 1.x and 2.x. Dependencies of the meta-build itself require // conflicting major versions. Tell SBT they are always compatible to prevent it from failing to compile // (just running "sbt" in this project could fail). ThisBuild / libraryDependencySchemes += "org.scala-lang.modules" %% "scala-xml" % VersionScheme.Always ================================================ FILE: project/plugins.sbt ================================================ // resolvers += "Typesafe repository" at "http://repo.typesafe.com/typesafe/releases/" val scalajsBundlerVersion = "0.21.1" addDependencyTreePlugin addSbtPlugin("org.scala-js" % "sbt-scalajs" % "1.21.0") addSbtPlugin("ch.epfl.scala" % "sbt-scalajs-bundler" % scalajsBundlerVersion) addSbtPlugin("ch.epfl.scala" % "sbt-web-scalajs-bundler" % scalajsBundlerVersion) addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.14.6") addSbtPlugin("org.portable-scala" % "sbt-scalajs-crossproject" % "1.3.2") addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.3.1") addSbtPlugin("se.marcuslonnberg" % "sbt-docker" % "1.11.0") addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.13.1") addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.6.0") addSbtPlugin("com.github.sbt" % "sbt-git" % "2.1.0") addSbtPlugin("io.spray" % "sbt-revolver" % "0.10.0") addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.4.8") libraryDependencies += "software.amazon.awssdk" % "ecr" % "2.17.231" libraryDependencies += "org.eclipse.jgit" % "org.eclipse.jgit" % "7.6.0.202603022253-r" addSbtPlugin("com.github.sbt" % "sbt-boilerplate" % "0.8.0") addSbtPlugin("com.github.sbt" %% "sbt-sbom" % "0.5.0") addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.8.3") // 👇 IMPORTANT: When updating this version, also update the NVD cache key in // .github/workflows/dependency-check.yml (search for "nvd-db-") addSbtPlugin("net.nmoncho" % "sbt-dependency-check" % "1.9.0") // ☝️ If updating sbt-dependency-check version, update NVD cache key, too! ================================================ FILE: quine/recipes/apache_log.yaml ================================================ version: 1 title: Apache Log Analytics contributor: https://github.com/joshcody summary: '' description: '' ingestStreams: - type: FileIngest path: $in_file format: type: CypherLine query: |- WITH text.regexFirstMatch($that, '(\\S+)\\s+\\S+\\s+(\\S+)\\s+\\[(.+)\\]\\s+\"(.*)\\s+(.*)\\s+(.*)\"\\s+([0-9]+)\\s+(\\S+)\\s+\"(.*)\"\\s+\"(.*)\"') AS r CREATE ({ sourceIp: r[1], user: r[2], time: datetime(r[3], 'dd/MMM/yyyy:HH:mm:ss Z'), verb: r[4], path: r[5], httpVersion: r[6], status: r[7], size: r[8], referrer: r[9], agent: r[10], type: 'log' }) standingQueries: - pattern: type: Cypher query: MATCH (l) WHERE l.type = 'log' RETURN DISTINCT id(l) AS id mode: DistinctId outputs: verb: type: CypherQuery query: |- MATCH (l) WHERE id(l) = $that.data.id MATCH (v) WHERE id(v) = idFrom('verb', l.verb) SET v.type = 'verb', v.verb = l.verb CREATE (l)-[:verb]->(v) nodeAppearances: [ ] quickQueries: [ ] sampleQueries: - name: Count HTTP GET Requests query: >- MATCH (l)-[rel:verb]->(v) WHERE l.type = 'log' AND v.type = 'verb' AND v.verb = 'GET' RETURN count(rel) AS get_count statusQuery: cypherQuery: >- MATCH (l)-[rel:verb]->(v) WHERE l.type = 'log' AND v.type = 'verb' AND v.verb = 'GET' RETURN count(rel) AS get_count ================================================ FILE: quine/recipes/apt-detection.yaml ================================================ title: APT Detection summary: Endpoint logs and network traffic data merge to auto-detect exfiltration contributor: https://github.com/rrwright version: 1 description: |- This APT (Advanced Persistent Threat) detection recipe ingests EDR (Endpoint Detection and Response) and network traffic logs, while monitoring for an IoB (Indicator of Behavior) that matches malicious data exfiltration patterns. SCENARIO: Using a standing query, the recipe monitors for covert interprocess communication using a file to pass data. When that pattern is matched, with a network SEND event, we have our smoking gun and a URL is logged linking to the Quine Exploration UI with the full activity and context for investigation. In this scenario, a malicious Excel macro collects personal data and stores it in a temporary file. The APT process "ntclean" infiltrated the system previously through an SSH exploit, and now reads from that temporary file and exfiltrates data from the network--hiding it as an HTTP GET request-- before deleting the temporary file to cover its tracks. The source of the SSH exploit that planted the APT and the destination for exfiltrated data utilize the same IP address. SAMPLE DATA: endpoint.json - https://recipes.quine.io/apt-detection/endpoint-json network.json - https://recipes.quine.io/apt-detection/network-json Download the sample data to the same directory where Quine will be run. RESULTS: When the standing query detects the WRITE->READ->SEND->DELETE pattern, it will output a link to the console that can be copied and pasted into a browser to explore the event in the Quine Exploration UI. ingestStreams: - type: FileIngest path: endpoint.json format: type: CypherJson query: >- MATCH (proc), (event), (object) WHERE id(proc) = idFrom($that.pid) AND id(event) = idFrom($that) AND id(object) = idFrom($that.object) SET proc.id = $that.pid, proc: Process, event.type = $that.event_type, event: EndpointEvent, event.time = $that.time, object.data = $that.object CREATE (proc)-[:EVENT]->(event)-[:EVENT]->(object) - type: FileIngest path: network.json format: type: CypherJson query: >- MATCH (src), (dst), (event) WHERE id(src) = idFrom($that.src_ip+":"+$that.src_port) AND id(dst) = idFrom($that.dst_ip+":"+$that.dst_port) AND id(event) = idFrom('network_event', $that) SET src.ip = $that.src_ip+":"+$that.src_port, src: IP, dst.ip = $that.dst_ip+":"+$that.dst_port, dst: IP, event.proto = $that.proto, event.time = $that.time, event.detail = $that.detail, event: NetTraffic CREATE (src)-[:NET_TRAFFIC]->(event)-[:NET_TRAFFIC]->(dst) standingQueries: - pattern: type: Cypher query: >- MATCH (e1)-[:EVENT]->(f)<-[:EVENT]-(e2), (f)<-[:EVENT]-(e3)<-[:EVENT]-(p2)-[:EVENT]->(e4) WHERE e1.type = "WRITE" AND e2.type = "READ" AND e3.type = "DELETE" AND e4.type = "SEND" RETURN DISTINCT id(f) as fileId outputs: stolen-data: type: CypherQuery query: >- MATCH (p1)-[:EVENT]->(e1)-[:EVENT]->(f)<-[:EVENT]-(e2)<-[:EVENT]-(p2), (f)<-[:EVENT]-(e3)<-[:EVENT]-(p2)-[:EVENT]->(e4)-[:EVENT]->(ip) WHERE id(f) = $that.data.fileId AND e1.type = "WRITE" AND e2.type = "READ" AND e3.type = "DELETE" AND e4.type = "SEND" AND e1.time < e2.time AND e2.time < e3.time AND e2.time < e4.time CREATE (e1)-[:NEXT]->(e2)-[:NEXT]->(e4)-[:NEXT]->(e3) WITH e1, e2, e3, e4, p1, p2, f, ip, "http://localhost:8080/#MATCH" + text.urlencode(" (e1),(e2),(e3),(e4),(p1),(p2),(f),(ip) WHERE id(p1)='"+strId(p1)+"' AND id(e1)='"+strId(e1)+"' AND id(f)='"+strId(f)+"' AND id(e2)='"+strId(e2)+"' AND id(p2)='"+strId(p2)+"' AND id(e3)='"+strId(e3)+"' AND id(e4)='"+strId(e4)+"' AND id(ip)='"+strId(ip)+"' RETURN e1, e2, e3, e4, p1, p2, f, ip") as URL RETURN URL andThen: type: PrintToStandardOut nodeAppearances: - predicate: propertyKeys: [] knownValues: {} dbLabel: Process icon: ion-load-a label: type: Property key: id prefix: "Process: " - predicate: propertyKeys: [] knownValues: {} dbLabel: IP icon: ion-ios-world label: type: Property key: ip prefix: "" - predicate: propertyKeys: [] knownValues: {} dbLabel: EndpointEvent icon: ion-android-checkmark-circle label: type: Property key: type prefix: "" - predicate: propertyKeys: [] knownValues: {} dbLabel: NetTraffic icon: ion-network label: type: Property key: proto prefix: "" - predicate: propertyKeys: [] knownValues: {} icon: ion-ios-copy label: type: Property key: data prefix: "" quickQueries: - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Adjacent Nodes querySuffix: MATCH (n)--(m) RETURN DISTINCT m queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Refresh querySuffix: RETURN n queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Local Properties querySuffix: RETURN id(n), properties(n) queryLanguage: Cypher sort: Text - predicate: propertyKeys: [] knownValues: {} dbLabel: Process quickQuery: name: Files Read querySuffix: MATCH (n)-[:EVENT]->(e)-[:EVENT]->(f) WHERE e.type = "READ" RETURN f queryLanguage: Cypher sort: Node edgeLabel: read - predicate: propertyKeys: [] knownValues: {} dbLabel: Process quickQuery: name: Files Written querySuffix: MATCH (n)-[:EVENT]->(e)-[:EVENT]->(f) WHERE e.type = "WRITE" RETURN f queryLanguage: Cypher sort: Node edgeLabel: wrote - predicate: propertyKeys: - data knownValues: {} quickQuery: name: Read By querySuffix: MATCH (n)<-[:EVENT]-(e)<-[:EVENT]-(p) WHERE e.type = "READ" RETURN p queryLanguage: Cypher sort: Node edgeLabel: written by - predicate: propertyKeys: - data knownValues: {} quickQuery: name: Written By querySuffix: MATCH (n)<-[:EVENT]-(e)<-[:EVENT]-(p) WHERE e.type = "WRITE" RETURN p queryLanguage: Cypher sort: Node edgeLabel: written by - predicate: propertyKeys: [] knownValues: {} dbLabel: Process quickQuery: name: Received Data querySuffix: MATCH (n)-[:EVENT]->(e)-[:EVENT]->(i) WHERE e.type = "RECEIVE" RETURN i queryLanguage: Cypher sort: Node edgeLabel: received - predicate: propertyKeys: [] knownValues: {} dbLabel: Process quickQuery: name: Sent Data querySuffix: MATCH (n)-[:EVENT]->(e)-[:EVENT]->(i) WHERE e.type = "SEND" RETURN i queryLanguage: Cypher sort: Node edgeLabel: sent - predicate: propertyKeys: [] knownValues: {} dbLabel: Process quickQuery: name: Started By querySuffix: MATCH (n)<-[:EVENT]-(e)<-[:EVENT]-(p) WHERE e.type = "SPAWN" RETURN p queryLanguage: Cypher sort: Node edgeLabel: parent process - predicate: propertyKeys: [] knownValues: {} dbLabel: Process quickQuery: name: Started Other Process querySuffix: MATCH (n)-[:EVENT]->(e)-[:EVENT]->(p) WHERE e.type = "SPAWN" RETURN p queryLanguage: Cypher sort: Node edgeLabel: child process - predicate: propertyKeys: [] knownValues: {} dbLabel: IP quickQuery: name: Network Send querySuffix: MATCH (n)-[:NET_TRAFFIC]->(net) RETURN net queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} dbLabel: IP quickQuery: name: Network Receive querySuffix: MATCH (n)<-[:NET_TRAFFIC]-(net) RETURN net queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} dbLabel: IP quickQuery: name: Network Communication querySuffix: MATCH (n)-[:NET_TRAFFIC]-(net)-[:NET_TRAFFIC]-(ip) RETURN ip queryLanguage: Cypher sort: Node edgeLabel: Communication sampleQueries: [] ================================================ FILE: quine/recipes/books.yaml ================================================ version: 1 title: Book ratings demo contributor: https://github.com/stevenbenjamin summary: Demonstration of building a graph of reviewers, their reviews and products description: |- Raw review data is imported from an example data set. For each review we create nodes for the review, the reviewer (user) and the reviewed product. Input review fields: ------------------ reviewerID - ID of the reviewer, e.g. A2SUAM1J3GNN3B asin - ID of the product, e.g. 0000013714 reviewerName - name of the reviewer vote - helpful votes of the review style - a dictionary of the product metadata, e.g., "Format" is "Hardcover" reviewText - text of the review overall - rating of the product summary - summary of the review unixReviewTime - time of the review (unix time) reviewTime - time of the review (raw) image - images that users post after they have received the product Example data can be found at ------------------- http://deepyeti.ucsd.edu/jianmo/amazon/categoryFilesSmall/Books_5.json.gz ingestStreams: - type: FileIngest path: $in_file format: type: CypherJson query: |- MATCH (review), (product), (user) WHERE id(review) = idFrom("Review", $that.reviewerID, $that.asin, $that.reviewTime) AND id(product) = idFrom("Product", $that.asin) AND id(user) = idFrom("User", $that.reviewerID) SET review = $that, review:Review, product.id = $that.asin, product:Product, user.name = $that.reviewerName, user:User, review.ok = $that.overall > 4 WITH review, product, user CALL incrementCounter(user, "reviewsPosted") YIELD count AS reviewsPosted CALL incrementCounter(product, "reviews") YIELD count AS reviews CALL incrementCounter(product, "total_score", $that.overall) YIELD count AS total_score CREATE (product)<-[:REVIEWED]-(review)<-[:POSTED]-(user) WITH $that.asin AS asin MATCH (p) WHERE id(p) = idFrom("Product", asin) SET p.avg = tofloat(p.total_score) / tofloat(p.reviews) standingQueries: - pattern: type: Cypher query: |- MATCH (r:Review) RETURN DISTINCT id(r) AS id outputs: trending-products: type: CypherQuery query: |- MATCH (p:Product)<-[:REVIEWED]-(r:Review) WHERE strId(r) = $that.data.id MATCH (p2:Product)<-[:REVIEWED]-(r2:Review) WHERE id(p2) = id(p) WITH p2, count(r2) as ct, abs(r2.unixReviewTime - r.unixReviewTime) as diff, r.unixReviewTime as timestamp, r.reviewTime as date WHERE ct > 5 AND diff <= 86400000 RETURN p2.id as product_id, ct, timestamp, date andThen: type: PrintToStandardOut sampleQueries: - name: Most Active User query: |- MATCH (u:User) WITH u ORDER BY u.reviewsPosted DESC LIMIT 1 MATCH (u)--(m) RETURN u, m queryLanguage: Cypher sort: Node - name: Most Reviewed Book query: MATCH (p:Product) WITH p ORDER BY p.reviews DESC LIMIT 1 MATCH (p)--(r) RETURN p, r queryLanguage: Cypher sort: Node - name: Good Reviews query: MATCH (r:Review) WHERE r.ok RETURN r sort: Node - name: Highest Rated Book query: MATCH (p:Product) RETURN p ORDER BY p.avg DESC LIMIT 1 sort: Node - name: Lowest Rated Book query: MATCH (p:Product) RETURN p ORDER BY p.avg ASC LIMIT 1 sort: Node quickQueries: [] nodeAppearances: - predicate: propertyKeys: [] knownValues: {} dbLabel: User label: type: Property key: name color: "#490e55" icon: ion-person - predicate: propertyKeys: [] knownValues: { ok: false } dbLabel: Review color: "#c70039" size: 40.00 icon: ion-thumbsdown label: type: Property key: overall prefix: "" - predicate: propertyKeys: [] knownValues: { ok: true } dbLabel: Review color: "#32a139" size: 40.00 icon: ion-thumbsup label: type: Property key: overall prefix: "" - predicate: propertyKeys: [] knownValues: {} dbLabel: Product color: "#66490c" icon: ion-document size: 40.00 label: type: Property key: id prefix: "product: " ================================================ FILE: quine/recipes/cdn.yaml ================================================ version: 1 title: CDN Cache Efficiency By Segment contributor: https://www.linkedin.com/in/alokaggarwal2 summary: Real-time computation of CDN cache node efficiency from pseudonymized Fastly CDN logs, with graph association of each log entry to serving PoP, cache server, client, client ASN, asset and origin to identify potential root cause of issues. description: Raw CDN Log data is imported from a .json file via a file ingest, and a node is manifested for the elements of each line. Each of the manifested nodes increments a counter to track the number of cache hits and misses and calculates hit/miss ratios as data is ingested. Selecting any node allows you to query for the associated ASNs and CDN cache servers to identify potential root cause of poor performance. Thresholds are set to create qualitative 'state' properties on each node indicating the health of the component as 'good,' 'warn,' or 'alarm.' Node appearance properties are set to add icons and colors to represent the type of node and it's state, respectively, in the exploration UI. Lastly, a standing query is defined to match consecutive cache misses within a configurable fixed period of time for the purpose of alerting. ------------------------------------------------------------------------------ Note 1 Sample data file for this recipe is in the file 'cdn_data_50k.json' which can be accessed at https://that.re/cdn-data Note 2 This recipe includes numerical thresholds for the hit/miss ratios in each node creation ingest query. Change the thresholds as needed to provide the right color indicators for your data! ingestStreams: - type: FileIngest path: $in_file format: type: CypherJson query: |- //////////////////////////////////////////////////////// // Manifest nodes from each log entry //////////////////////////////////////////////////////// // Quickly match nodes with specific IDs using `idFrom(...)` for the purpose of defining // deterministic derived IDs for referencing nodes in future queries // A more detailed description is provided in this blog post: // https://www.thatdot.com/blog/kafka-data-deduping-made-easy-using-quines-idfrom-function MATCH (event), (client), (asset), (asn), (server), (pop), (origin), (clientGeo) WHERE $that.cache_status IS NOT NULL AND id(event) = idFrom('event', $that.timestamp, $that.request_id) AND id(client) = idFrom('client', $that.client_ip, $that.business_unit) AND id(asset) = idFrom('asset', $that.path) AND id(asn) = idFrom('asn', toString($that.client_asn)) AND id(server) = idFrom('server', $that.pop, $that.server_id) AND id(pop) = idFrom('pop', $that.pop) AND id(origin) = idFrom('origin', $that.backend_ip) AND id(clientGeo) = idFrom('clientGeo', $that.client_geo_country) //////////////////////////////////////// //Bucketing for HITs and MISSes counters //////////////////////////////////////// // RegEx deets here: https://regex101.com/r/uP0KMm/1 WITH *, text.regexFirstMatch($that.cache_status, '(HIT|MISS(?!.*HIT)).*') AS hmp WHERE hmp[1] IS NOT NULL //////////////////////////////////////// // Bucketing for node type counters //////////////////////////////////////// CALL incrementCounter(client, "count",1) YIELD count AS clientCount CALL incrementCounter(client, toLower(hmp[1]),1) YIELD count AS clientHitMissCount CALL incrementCounter(asset, "count",1) YIELD count AS assetCount CALL incrementCounter(asset, toLower(hmp[1]),1) YIELD count AS assetHitMissCount CALL incrementCounter(asn, "count",1) YIELD count AS asnCount CALL incrementCounter(asn, toLower(hmp[1]),1) YIELD count AS asnHitMissCount CALL incrementCounter(server, "count",1) YIELD count AS serverCount CALL incrementCounter(server, toLower(hmp[1]),1) YIELD count AS serverHitMissCount CALL incrementCounter(pop, "count",1) YIELD count AS popCount CALL incrementCounter(pop, toLower(hmp[1]),1) YIELD count AS popHitMissCount CALL incrementCounter(clientGeo, "count",1) YIELD count AS clientGeoCount CALL incrementCounter(clientGeo, toLower(hmp[1]),1) YIELD count AS clientGeoHitMissCount CALL incrementCounter(origin, "count",1) YIELD count AS originGeoCount CALL incrementCounter(origin, toLower(hmp[1]),1) YIELD count AS originGeoHitMissCount //////////////////////////////////////////////////////// // Event //////////////////////////////////////////////////////// SET event = $that, event.cache_class = hmp[1], event: event //////////////////////////////////////////////////////// // Origin //////////////////////////////////////////////////////// SET origin.backend_ip = $that.backend_ip, origin: origin, origin.MISS_Percent = coalesce((tofloat(origin.miss))/(tofloat(origin.count))*100.0, 0.0), origin.HIT_Percent = coalesce((tofloat(origin.hit))/(tofloat(origin.count))*100.0, 0.0), origin.state = CASE // Set threshold ratios below for each of three cases WHEN origin.HIT_Percent >= 80 THEN 'good' WHEN origin.HIT_Percent >= 25 AND origin.HIT_Percent < 80 THEN 'warn' WHEN origin.HIT_Percent < 25 THEN 'alarm' ELSE 'alarm' END //////////////////////////////////////////////////////// // Client //////////////////////////////////////////////////////// SET client.client_geo_country = $that.client_geo_country, client.client_ip = $that.client_ip, client.user_agent = $that.user_agent, client: client, client.MISS_Percent = coalesce((tofloat(client.miss))/(tofloat(client.count))*100.0, 0.0), client.HIT_Percent = coalesce((tofloat(client.hit))/(tofloat(client.count))*100.0, 0.0), client.state = CASE // Set threshold ratios below for each of three cases WHEN client.HIT_Percent >= 80 THEN 'good' WHEN client.HIT_Percent >= 25 AND client.HIT_Percent < 80 THEN 'warn' WHEN client.HIT_Percent < 25 THEN 'alarm' ELSE 'alarm' END // Extract Browser and Version // RegEx here: https://regex101.com/r/T0MThZ/2 WITH *, text.regexFirstMatch($that.user_agent, '\\((.*?)\\)(\\s|$)|(.*?)\\/(.*?)(\\s|$)') AS cb SET client.browser = cb[3], client.browserVer = cb[4], client.first_seen = coll.min([$that.timestamp, coalesce(client.first_seen, $that.timestamp)]), client.last_seen = coll.max([$that.timestamp, coalesce(client.last_seen, $that.timestamp)]) //////////////////////////////////////////////////////// // Client Geo //////////////////////////////////////////////////////// SET clientGeo.client_geo_country = $that.client_geo_country, clientGeo: clientGeo, clientGeo.MISS_Percent = coalesce((tofloat(clientGeo.miss))/(tofloat(clientGeo.count))*100.0, 0.0), clientGeo.HIT_Percent = coalesce((tofloat(clientGeo.hit))/(tofloat(clientGeo.count))*100.0, 0.0), clientGeo.state = CASE // Set threshold ratios below for each of three cases WHEN clientGeo.HIT_Percent >= 80 THEN 'good' WHEN clientGeo.HIT_Percent >= 25 AND clientGeo.HIT_Percent < 80 THEN 'warn' WHEN clientGeo.HIT_Percent < 25 THEN 'alarm' ELSE 'alarm' END //////////////////////////////////////////////////////// // Asset //////////////////////////////////////////////////////// // RegEx here: https://regex101.com/r/tB8cd4/1 WITH *, text.regexFirstMatch($that.path, '^(.+\\/)([^\\/]+)$') AS ap SET asset.path = ap[1], asset.name = ap[2], asset.full_path = $that.path, asset.if_modified_since = coll.max([$that.timestamp, coalesce(asset.if_modified_since, $that.timestamp)]), asset: asset, asset.MISS_Percent = coalesce((tofloat(asset.miss))/(tofloat(asset.count))*100.0, 0.0), asset.HIT_Percent = coalesce((tofloat(asset.hit))/(tofloat(asset.count))*100.0, 0.0), asset.state = CASE // Set threshold ratios below for each of three cases WHEN asset.HIT_Percent >= 80 THEN 'good' WHEN asset.HIT_Percent >= 25 AND asset.HIT_Percent < 80 THEN 'warn' WHEN asset.HIT_Percent < 25 THEN 'alarm' ELSE 'alarm' END //////////////////////////////////////////////////////// // ASN //////////////////////////////////////////////////////// SET asn.asn_id = toString($that.client_asn), asn: asn, asn.MISS_Percent = coalesce((tofloat(asn.miss))/(tofloat(asn.count))*100.0, 0.0), asn.HIT_Percent = coalesce((tofloat(asn.hit))/(tofloat(asn.count))*100.0, 0.0), asn.state = CASE // Set threshold ratios below for each of three cases WHEN asn.HIT_Percent >= 80 THEN 'good' WHEN asn.HIT_Percent >= 25 AND asn.HIT_Percent < 80 THEN 'warn' WHEN asn.HIT_Percent < 25 THEN 'alarm' ELSE 'alarm' END //////////////////////////////////////////////////////// // Server //////////////////////////////////////////////////////// SET server.server_id = $that.server_id, server.server_ip = $that.server_ip, server.cache_shield = $that.cache_shield, server.environment = $that.environment, server.host = $that.host, server.role = $that.role, server.pop = $that.pop, server: server, server.MISS_Percent = coalesce((tofloat(server.miss))/(tofloat(server.count))*100.0, 0.0), server.HIT_Percent = coalesce((tofloat(server.hit))/(tofloat(server.count))*100.0, 0.0), server.state = CASE // Set threshold ratios below for each of three cases WHEN server.HIT_Percent >= 80 THEN 'good' WHEN server.HIT_Percent >= 25 AND server.HIT_Percent < 80 THEN 'warn' WHEN server.HIT_Percent < 25 THEN 'alarm' ELSE 'alarm' END //////////////////////////////////////////////////////// // PoP //////////////////////////////////////////////////////// SET pop.source = $that.pop, pop.environment = $that.environment, pop: pop, pop.MISS_Percent = coalesce((tofloat(pop.miss))/(tofloat(pop.count))*100.0, 0.0), pop.HIT_Percent = coalesce((tofloat(pop.hit))/(tofloat(pop.count))*100.0, 0.0), pop.state = CASE // Set threshold ratios for each of three cases WHEN pop.HIT_Percent >= 80 THEN 'good' WHEN pop.HIT_Percent >= 25 AND pop.HIT_Percent < 80 THEN 'warn' WHEN pop.HIT_Percent < 25 THEN 'alarm' ELSE 'alarm' END //////////////////////////////////////////////////////// // Create relationship between nodes //////////////////////////////////////////////////////// CREATE (asset)<-[:REQUESTED]-(event)-[:REQUESTED_OVER]->(asn)-[:IN_CLIENT_GEO]->(clientGeo), (origin)<-[:FROM]-(pop)<-[:WITHIN]-(server)<-[:TARGETED]-(event)<-[:ORIGINATED]-(client) standingQueries: - pattern: type: Cypher query: |- /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Subquery to look for 10 consecutive cache MISS events involving the same server and asset pair within a defined duration /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Look for consecutive cache MISS events involving the same server and asset pair MATCH (server1:server)<-[:TARGETED]-(event1 {cache_class:"MISS"})-[:REQUESTED]->(asset)<-[:REQUESTED]-(event2 {cache_class:"MISS"})-[:TARGETED]->(server2:server) RETURN DISTINCT id(event1) AS event1 outputs: cacheMissAlert: type: CypherQuery query: |- // Add constraints to the cache MISS events match involving the same server and asset pair. MATCH (server1:server)<-[:TARGETED]-(event1 {cache_class:"MISS"})-[:REQUESTED]->(asset)<-[:REQUESTED]-(event2 {cache_class:"MISS"})-[:TARGETED]->(server2:server) WHERE id(event1) = $that.data.event1 // Time between consecutive cache MISSes between 5-45 minutes expressed in ISO 8601 duration format (https://en.wikipedia.org/wiki/ISO_8601#Durations) // Feel free to alter the range to meet your requirements AND duration("PT45M") > duration.between(localdatetime(event1.timestamp, "yyyy-MM-dd HH:mm:ss.SSSSSS"), localdatetime(event2.timestamp, "yyyy-MM-dd HH:mm:ss.SSSSSS")) > duration("PT5M") AND event1.client_asn = event2.client_asn AND id(server1) = id(server2) AND id(event1) <> id(event2) //////////////////////////////////////////////////////// // missEvents //////////////////////////////////////////////////////// // Manifest missEvents node to track metadata relative to consecutive cache MISSes that match the previous constraints MATCH (missEvents) WHERE id(missEvents) = idFrom('missEvents', server1.server_id, asset.full_path) SET missEvents.asset = event1.path, missEvents.server = event1.server_id, missEvents.pop = event1.pop, missEvents.firstMiss = coll.min([event1.timestamp, coalesce(missEvents.firstMiss, event1.timestamp)]), missEvents.latestMiss = coll.max([event1.timestamp, coalesce(missEvents.latestMiss, event1.timestamp)]), missEvents: missEvents // Create subgraph from consecutive cache MISS events to provide a visualization in the Quine Exploration UI CREATE (asset)-[:HAD]->(missEvents)-[:FROM]->(server1)<-[:TARGETED]-(event1), (server1)<-[:TARGETED]-(event2) // Increment the missEvents counter for the purpose of triggering an alert at a specified threshold WITH missEvents CALL incrementCounter(missEvents, "cumulativeCount", 1) YIELD count AS cumulativeCount // Trigger alert (RETURN clause) that prints URL to local running Quine instance MATCH (missEvents) // Threshold at which to emit alert // Feel free to alter it to meet your requirements WHERE missEvents.cumulativeCount = 10 RETURN 'http://localhost:8080/#' + text.urlencode('MATCH(missEvents:missEvents) WHERE id(missEvents)="' + toString(strId(missEvents)) + '" MATCH (event {cache_class:"MISS"})-[:TARGETED]->(server)<-[:FROM]-(missEvents)<-[:HAD]-(asset)<-[:REQUESTED]-(event {cache_class:"MISS"}) RETURN DISTINCT missEvents, event, server, asset LIMIT 10') AS Alert andThen: type: PrintToStandardOut nodeAppearances: # ASN Icon/color ********************* - predicate: propertyKeys: - state knownValues: state: "good" dbLabel: asn icon: radio-waves color: "#32a852" size: 40.00 label: type: Property key: asn_id prefix: "asn: " - predicate: propertyKeys: - state knownValues: state: "warn" dbLabel: asn icon: radio-waves color: "#d68400" size: 40.00 label: type: Property key: asn_id prefix: "asn: " - predicate: propertyKeys: - state knownValues: state: "alarm" dbLabel: asn icon: radio-waves color: "#cf151e" size: 40.00 label: type: Property key: asn_id prefix: "asn: " # Asset Icon/color ********************* - predicate: propertyKeys: - state knownValues: state: "good" dbLabel: asset icon: ion-android-film color: "#32a852" size: 40.00 label: type: Property key: name prefix: "asset: " - predicate: propertyKeys: - state knownValues: state: "warn" dbLabel: asset icon: ion-android-film color: "#d68400" size: 40.00 label: type: Property key: name prefix: "asset: " - predicate: propertyKeys: - state knownValues: state: "alarm" dbLabel: asset icon: ion-android-film color: "#cf151e" size: 40.00 label: type: Property key: name prefix: "asset: " # Client Icon/color ********************* - predicate: propertyKeys: - state knownValues: state: "good" dbLabel: client icon: ion-ios-contact-outline color: "#32a852" size: 30.00 label: type: Property key: client_ip prefix: "client: " - predicate: propertyKeys: - state knownValues: state: "warn" dbLabel: client icon: ion-ios-contact-outline color: "#d68400" size: 30.00 label: type: Property key: client_ip prefix: "client: " - predicate: propertyKeys: - state knownValues: state: "alarm" dbLabel: client icon: ion-ios-contact-outline color: "#cf151e" size: 30.00 label: type: Property key: client_ip prefix: "client: " # Date/Time Icon/color ********************* - predicate: propertyKeys: - period knownValues: period: "year" dbLabel: icon: ion-android-calendar color: size: 30 - predicate: propertyKeys: - period knownValues: period: "month" dbLabel: icon: ion-android-calendar color: size: 25 - predicate: propertyKeys: - period knownValues: period: "day" dbLabel: icon: ion-android-calendar color: size: 20 - predicate: propertyKeys: - period knownValues: period: "hour" dbLabel: icon: ion-clock color: size: 30 - predicate: propertyKeys: - period knownValues: period: "minute" dbLabel: icon: ion-clock color: size: 25 - predicate: propertyKeys: - period knownValues: period: "second" dbLabel: icon: ion-clock color: size: 20 # Event Icon/color ********************* - predicate: propertyKeys: - cache_class knownValues: { cache_class: "HIT" } dbLabel: event icon: checkmark-circled color: "#32a852" size: 30.00 label: type: Property key: timestamp prefix: "event: " - predicate: propertyKeys: - cache_class knownValues: { cache_class: "MISS" } dbLabel: event icon: close-circled color: "#cf151e" size: 30.00 label: type: Property key: timestamp prefix: "event: " # Pop Icon/color ******************* - predicate: propertyKeys: - state knownValues: state: "good" dbLabel: pop icon: arrow-shrink color: "#32a852" size: 40.00 label: type: Property key: source prefix: "PoP: " - predicate: propertyKeys: - state knownValues: state: "warn" dbLabel: pop icon: arrow-shrink color: "#d68400" size: 40.00 label: type: Property key: source prefix: "PoP: " - predicate: propertyKeys: - state knownValues: state: "alarm" dbLabel: pop icon: arrow-shrink color: "#cf151e" size: 40.00 label: type: Property key: source prefix: "PoP: " # missEvent Icon/color ********************* - predicate: propertyKeys: [] knownValues: {} dbLabel: missEvents icon: ion-ios-bolt color: "#cf151e" size: 50.00 label: type: Property key: latestMiss prefix: "Miss Events: " # Server Icon/color ********************* - predicate: propertyKeys: - state knownValues: state: "good" dbLabel: server icon: navicon-round color: "#32a852" size: 40.00 label: type: Property key: server_id prefix: - predicate: propertyKeys: - state knownValues: state: "warn" dbLabel: server icon: navicon-round color: "#d68400" size: 40.00 label: type: Property key: server_id prefix: - predicate: propertyKeys: - state knownValues: state: "alarm" dbLabel: server icon: navicon-round color: "#cf151e" size: 40.00 label: type: Property key: server_id prefix: # Client/Geo Icon/color ********************* - predicate: propertyKeys: - state knownValues: state: "good" dbLabel: clientGeo icon: ion-android-globe color: "#32a852" size: 40.00 label: type: Property key: client_geo_country prefix: "Country: " - predicate: propertyKeys: - state knownValues: state: "warn" dbLabel: clientGeo icon: ion-android-globe color: "#d68400" size: 40.00 label: type: Property key: client_geo_country prefix: "Country: " - predicate: propertyKeys: - state knownValues: state: "alarm" dbLabel: clientGeo icon: ion-android-globe color: "#cf151e" size: 40.00 label: type: Property key: client_geo_country prefix: "Country: " # Origin Icon/color ********************* - predicate: propertyKeys: - state knownValues: state: "good" dbLabel: origin icon: ion-ios-home color: "#32a852" size: 40.00 label: type: Property key: backend_ip prefix: "Origin: " - predicate: propertyKeys: - state knownValues: state: "warn" dbLabel: origin icon: ion-ios-home color: "#d68400" size: 40.00 label: type: Property key: backend_ip prefix: "Origin: " - predicate: propertyKeys: - state knownValues: state: "alarm" dbLabel: origin icon: ion-ios-home-outline color: "#cf151e" size: 40.00 label: type: Property key: backend_ip prefix: "Origin: " quickQueries: - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Adjacent Nodes querySuffix: MATCH (n)--(m) RETURN DISTINCT m queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Refresh querySuffix: RETURN n queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Local Properties querySuffix: RETURN id(n), properties(n) queryLanguage: Cypher sort: Text - predicate: propertyKeys: [] knownValues: {} dbLabel: server quickQuery: name: Server PoP querySuffix: MATCH (n:server)-[:WITHIN]->(m:pop) RETURN DISTINCT m queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} dbLabel: asn quickQuery: name: Client Geo querySuffix: MATCH (n:asn)-[:IN_CLIENT_GEO]->(m:clientGeo) RETURN DISTINCT m queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} dbLabel: server quickQuery: name: Cache Hit/Miss Percentage querySuffix: MATCH (m:event)-[r:TARGETED]->(n:server) RETURN DISTINCT n.server_id AS CACHE, n.state AS State, coalesce(n.miss, 0) AS MISSES, coalesce(n.hit, 0) AS HITS, coalesce(tofloat(coalesce(n.hit, 0.0))/tofloat(coalesce(n.count, 0.0))*100.0, 0.0) AS HIT_Percentage, coalesce(tofloat(coalesce(n.miss, 0.0))/tofloat(coalesce(n.count, 0.0))*100.0, 0.0) AS MISS_Percentage queryLanguage: Cypher sort: Text - predicate: propertyKeys: [] knownValues: {} dbLabel: client quickQuery: name: Client Hit/Miss Percentage querySuffix: MATCH (n:client) RETURN DISTINCT n.client_id AS CLIENT, n.state AS State, coalesce(n.miss, 0) AS MISSES, coalesce(n.hit, 0) AS HITS, coalesce(tofloat(coalesce(n.hit, 0.0))/tofloat(coalesce(n.count, 0.0))*100.0, 0.0) AS HIT_Percentage, coalesce(tofloat(coalesce(n.miss, 0.0))/tofloat(coalesce(n.count, 0.0))*100.0, 0.0) AS MISS_Percentage queryLanguage: Cypher sort: Text - predicate: propertyKeys: [] knownValues: {} dbLabel: origin quickQuery: name: Origin Hit/Miss Percentage querySuffix: MATCH (n:origin) RETURN DISTINCT n.backend_ip AS ORIGIN, n.state AS State, coalesce(n.miss, 0) AS MISSES, coalesce(n.hit, 0) AS HITS, coalesce(tofloat(coalesce(n.hit, 0.0))/tofloat(coalesce(n.count, 0.0))*100.0, 0.0) AS HIT_Percentage, coalesce(tofloat(coalesce(n.miss, 0.0))/tofloat(coalesce(n.count, 0.0))*100.0, 0.0) AS MISS_Percentage queryLanguage: Cypher sort: Text - predicate: propertyKeys: [] knownValues: {} dbLabel: pop quickQuery: name: PoP Hit/Miss Percentage querySuffix: MATCH (m:event)-[r:TARGETED]->(p:server)-[s:WITHIN]->(n:pop) RETURN DISTINCT n.source AS POP, n.state AS State, n.count AS COUNT, coalesce(n.miss, 0) AS MISSES, coalesce(n.hit, 0) AS HITS, coalesce(tofloat(coalesce(n.hit, 0.0))/tofloat(coalesce(n.count, 0.0))*100.0, 0.0) AS HIT_Percentage, coalesce(tofloat(coalesce(n.miss, 0.0))/tofloat(coalesce(n.count, 0.0))*100.0, 0.0) AS MISS_Percentage queryLanguage: Cypher sort: Text - predicate: propertyKeys: [] knownValues: {} dbLabel: pop quickQuery: name: PoP Origins querySuffix: MATCH (n)-[:FROM]->(origin) RETURN DISTINCT origin queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} dbLabel: asset quickQuery: name: Asset Hit/Miss Percentage querySuffix: MATCH (p:pop)<-[:WITHIN]-(o:server)<-[:TARGETED]-(m:event)-[r:REQUESTED]->(n:asset) RETURN DISTINCT n.name AS ASSET, n.state AS State, coalesce(n.miss, 0) AS MISSES, coalesce(n.hit, 0) AS HITS, coalesce(tofloat(coalesce(n.hit, 0.0))/tofloat(coalesce(n.count, 0.0))*100.0, 0.0) AS HIT_Percentage, coalesce(tofloat(coalesce(n.miss, 0.0))/tofloat(coalesce(n.count, 0.0))*100.0, 0.0) AS MISS_Percentage queryLanguage: Cypher sort: Text - predicate: propertyKeys: [] knownValues: {} dbLabel: asn quickQuery: name: ASN Hit/Miss Percentage querySuffix: MATCH (m:event)-[r:REQUESTED_OVER]->(n:asn) RETURN DISTINCT n.asn_id AS ASN, n.state AS State, coalesce(n.miss, 0) AS MISSES, coalesce(n.hit, 0) AS HITS, coalesce(tofloat(coalesce(n.hit, 0.0))/tofloat(coalesce(n.count, 0.0))*100.0, 0.0) AS HIT_Percentage, coalesce(tofloat(coalesce(n.miss, 0.0))/tofloat(coalesce(n.count, 0.0))*100.0, 0.0) AS MISS_Percentage queryLanguage: Cypher sort: Text - predicate: propertyKeys: [] knownValues: {} dbLabel: clientGeo quickQuery: name: clientGeo Hit/Miss Percentage querySuffix: MATCH (m:asn)-[r:IN_CLIENT_GEO]->(n:clientGeo) RETURN DISTINCT n.client_geo_country AS Geo, n.state AS State, coalesce(n.miss, 0) AS MISSES, coalesce(n.hit, 0) AS HITS, coalesce(tofloat(coalesce(n.hit, 0.0))/tofloat(coalesce(n.count, 0.0))*100.0, 0.0) AS HIT_Percentage, coalesce(tofloat(coalesce(n.miss, 0.0))/tofloat(coalesce(n.count, 0.0))*100.0, 0.0) AS MISS_Percentage queryLanguage: Cypher sort: Text - predicate: propertyKeys: [] knownValues: {} dbLabel: missEvents quickQuery: name: Reset Counter querySuffix: DETACH DELETE n queryLanguage: Cypher sort: Text - predicate: propertyKeys: [] knownValues: {} dbLabel: client quickQuery: name: Create Timeline of Events querySuffix: > MATCH (n)-[:ORIGINATED]->(event) WITH event ORDER BY event.timestamp ASC WITH collect(event) as events FOREACH (i in range(0, size(events) - 2) | FOREACH (node1 in [events[i]] | FOREACH (node2 in [events[i+1]] | CREATE (node1)-[:NEXT]->(node2)))) queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} dbLabel: client quickQuery: name: Show Timeline of Events querySuffix: MATCH (n)-[:ORIGINATED]->(event1:event)-[:NEXT*0..]->(event2:event) RETURN event1,event2 queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} dbLabel: event quickQuery: name: Show Client querySuffix: MATCH (n)<-[:ORIGINATED]-(client) RETURN DISTINCT client queryLanguage: Cypher sort: Node - predicate: propertyKeys: - period knownValues: {} dbLabel: quickQuery: name: Period Hit/Miss Percentage querySuffix: MATCH (n) RETURN DISTINCT n.start AS Time, coalesce(n.miss, 0) AS MISSES, coalesce(n.hit, 0) AS HITS, coalesce(tofloat(coalesce(n.hit, 0.0))/tofloat(coalesce(n.count, 0.0))*100.0, 0.0) AS HIT_Percentage, coalesce(tofloat(coalesce(n.miss, 0.0))/tofloat(coalesce(n.count, 0.0))*100.0, 0.0) AS MISS_Percentage queryLanguage: Cypher sort: Text - predicate: propertyKeys: - period knownValues: period: "second" dbLabel: quickQuery: name: Time Linked List querySuffix: MATCH (n)<-[:second]-(m)<-[:minute]-(l)<-[:hour]-(k)<-[:day]-(j)<-[:month]-(i) RETURN distinct i,j,k,l,m queryLanguage: Cypher sort: Node - predicate: propertyKeys: - period knownValues: period: "second" dbLabel: quickQuery: name: Previous TimeNode querySuffix: MATCH (n)<-[:second]-(m) RETURN distinct m queryLanguage: Cypher sort: Node - predicate: propertyKeys: - period knownValues: period: "minute" dbLabel: quickQuery: name: Previous TimeNode querySuffix: MATCH (n)<-[:minute]-(m) RETURN distinct m queryLanguage: Cypher sort: Node - predicate: propertyKeys: - period knownValues: period: "hour" dbLabel: quickQuery: name: Previous TimeNode querySuffix: MATCH (n)<-[:hour]-(m) RETURN distinct m queryLanguage: Cypher sort: Node - predicate: propertyKeys: - period knownValues: period: "day" dbLabel: quickQuery: name: Previous TimeNode querySuffix: MATCH (n)<-[:day]-(m) RETURN distinct m queryLanguage: Cypher sort: Node - predicate: propertyKeys: - period knownValues: period: "month" dbLabel: quickQuery: name: Previous TimeNode querySuffix: MATCH (n)<-[:month]-(m) RETURN distinct m queryLanguage: Cypher sort: Node sampleQueries: # Provide easy access to node types in the Exploration UI - name: Last 10 Nodes query: CALL recentNodes(10) - name: Legend query: MATCH (n) WHERE labels(n) IS NOT NULL WITH labels(n) AS kind, collect(n) AS legend RETURN legend[0] - name: One Client Node query: MATCH (client:client) RETURN client LIMIT 1 - name: One Client Node with more than Ten Events query: MATCH (client:client) WHERE client.count > 10 RETURN client LIMIT 1 - name: One Source ASN Node query: MATCH (asn:asn) RETURN asn LIMIT 1 - name: One Server Node query: MATCH (server:server) RETURN server LIMIT 1 - name: One PoP Node query: MATCH (pop:pop) RETURN pop LIMIT 1 - name: One Asset Node query: MATCH (asset:asset) RETURN asset LIMIT 1 - name: One Origin Node query: MATCH (origin:origin) RETURN origin LIMIT 1 ================================================ FILE: quine/recipes/certstream-firehose.yaml ================================================ version: 1 title: Certstream Firehose contributor: https://github.com/emanb29 summary: Log new SSL certificate registrations description: |- Reproduces the behavior of the certstream website (https://certstream.calidog.io/) by connecting to the certstream firehose via SSL-encrypted websocket and printing to standard out each time a new certificate is detected ingestStreams: - type: WebsocketSimpleStartupIngest url: wss://certstream.calidog.io/ format: type: CypherJson query: |- CREATE ($that) standingQueries: - pattern: type: Cypher query: MATCH (n) RETURN DISTINCT id(n) AS id outputs: log-new-certs: type: CypherQuery query: |- MATCH (n) WHERE id(n) = $that.data.id RETURN n.data andThen: type: PrintToStandardOut logMode: FastSampling nodeAppearances: [] quickQueries: [] sampleQueries: [] ================================================ FILE: quine/recipes/conways-gol.yaml ================================================ version: 1 title: Conway's Game of Life contributor: Matthew Cullum https://github.com/brackishman summary: Conway's Game of Life in Quine description: |- This recipe implements a generic Conway's Game of Life using standing queries for real-time cellular automaton evolution. The grid size, initial patterns, and configuration are loaded from a JSON file specified at runtime. Each cell evaluates its neighbors and changes state only when Conway's rules dictate a change, triggering cascading updates throughout the grid. Conway's Rules: 1. Live cell with 2-3 live neighbors survives 2. Dead cell with exactly 3 live neighbors becomes alive 3. All other cells die or stay dead Usage: Specify JSON config file with --recipe-value config_file=path/to/config.json The config file schema is as follows: { "name": "My Game of Life", "description": "A description of this setup", "gridWidth": 10, "gridHeight": 10, "initialPattern": [ {"x": 1, "y": 0, "alive": true}, {"x": 2, "y": 1, "alive": true}, {"x": 0, "y": 2, "alive": true}, {"x": 1, "y": 2, "alive": true}, {"x": 2, "y": 2, "alive": true} ] } In Quine you can view all cell nodes with the following query: MATCH (c:Cell) RETURN c Once Quine is running with this recipe, load the layout json from the UI to see the grid. You can create a new layout by running the generate-conways-layout.js script while Quine is running. Once the cell nodes are layed out, make sure to enable the bookmarklet. The javascript for the bookmarklet is in conways-gol-bookmarklet.js Start the game with the "▶️ START Game" quick query on any cell node, and pause it with the "⏸️ STOP Game" quick query. # Set up grid dynamically from JSON configuration file ingestStreams: - type: FileIngest path: $config_file format: type: CypherJson query: |- // Extract configuration from JSON and calculate totalCells WITH $that.gridWidth AS gridWidth, $that.gridHeight AS gridHeight, $that.gridWidth * $that.gridHeight AS totalCells, $that.name AS name, $that.description AS description, $that.initialPattern AS initialPattern // Create all grid cells (totalCells = gridWidth * gridHeight) UNWIND range(0, totalCells - 1) AS cellIndex WITH gridWidth, gridHeight, totalCells, name, description, initialPattern, cellIndex % gridWidth AS x, cellIndex / gridWidth AS y // Determine if this cell should be alive based on initialPattern WITH x, y, gridWidth, gridHeight, totalCells, name, description, CASE WHEN any(pattern IN initialPattern WHERE pattern.x = x AND pattern.y = y AND pattern.alive = true) THEN true ELSE false END AS alive // Create/update the specific cell MATCH (cell) WHERE id(cell) = idFrom("cell", x, y) SET cell.x = x, cell.y = y, cell.alive = alive, cell.generation = 0, cell.state = "applied", cell: Cell // Create neighbor relationships within grid bounds WITH cell, x, y, gridWidth, gridHeight, totalCells, name, description UNWIND [ [x-1, y-1], [x, y-1], [x+1, y-1], [x-1, y], [x+1, y], [x-1, y+1], [x, y+1], [x+1, y+1] ] AS neighbor WITH cell, neighbor[0] AS nx, neighbor[1] AS ny, gridWidth, gridHeight, totalCells, name, description WHERE nx >= 0 AND nx < gridWidth AND ny >= 0 AND ny < gridHeight MATCH (neighborCell) WHERE id(neighborCell) = idFrom("cell", nx, ny) CREATE (cell)-[:NEIGHBOR]->(neighborCell) // Create/update ready node with configuration and connect to this cell WITH cell, gridWidth, gridHeight, totalCells, name, description MATCH (ready) WHERE id(ready) = idFrom("ready") SET ready.computingCells = 0, ready.applyingCells = 0, ready.generation = 0, ready.state = "stopped", ready.totalCells = totalCells, ready.gridWidth = gridWidth, ready.gridHeight = gridHeight, ready.name = name, ready.description = description CREATE (ready)-[:ACTIVATES]->(cell) # Standing queries for two-wave Conway's Game of Life evolution (fully dynamic) standingQueries: # Wave 1: Compute next state for all cells - pattern: type: Cypher mode: MultipleValues query: >- MATCH (ready)-[:ACTIVATES]->(cell) WHERE ready.computingCells = ready.totalCells AND ready.state = "computing" RETURN id(cell) AS cellId outputs: compute-next-state: type: CypherQuery query: |- MATCH (cell)-[:NEIGHBOR]->(neighbor) WHERE id(cell) = $that.data.cellId WITH cell, count(CASE WHEN neighbor.alive = true THEN 1 END) AS liveNeighbors WITH cell, liveNeighbors, CASE WHEN cell.alive = false AND liveNeighbors = 3 THEN true WHEN cell.alive = true AND (liveNeighbors = 2 OR liveNeighbors = 3) THEN true ELSE false END AS nextAlive SET cell.nextAlive = nextAlive, cell.state = "calculated" WITH cell MATCH (ready)-[:ACTIVATES]->(cell) WHERE id(cell) = $that.data.cellId CALL int.add(ready, "computingCells", -1) YIELD result RETURN cell.x AS x, cell.y AS y, cell.nextAlive AS nextAlive, "calculated" AS cellState, result AS remainingCells andThen: type: PrintToStandardOut # Wave 2: Apply computed state changes - pattern: type: Cypher mode: MultipleValues query: >- MATCH (ready)-[:ACTIVATES]->(cell) WHERE ready.applyingCells = ready.totalCells AND ready.state = "applying" RETURN id(cell) AS cellId outputs: apply-state-change: type: CypherQuery query: |- MATCH (cell) WHERE id(cell) = $that.data.cellId WITH cell, cell.alive AS oldAlive, cell.nextAlive AS newAlive SET cell.alive = newAlive, cell.updated = (oldAlive <> newAlive), cell.state = "applied" WITH cell MATCH (ready)-[:ACTIVATES]->(cell) WHERE id(cell) = $that.data.cellId CALL int.add(ready, "applyingCells", -1) YIELD result RETURN cell.x AS x, cell.y AS y, cell.alive AS alive, "applied" AS cellState, result AS remainingCells andThen: type: PrintToStandardOut # Wave coordination: Wave 1 complete -> Start Wave 2 (two-phase lock) - pattern: type: Cypher mode: MultipleValues query: >- MATCH (ready) WHERE ready.computingCells = 0 AND ready.applyingCells = 0 AND ready.state = "computing" RETURN id(ready) AS readyId outputs: start-wave-2: type: CypherQuery query: |- MATCH (ready)-[:ACTIVATES]->(cell) WHERE id(ready) = $that.data.readyId WITH ready, ready.totalCells AS TOTAL_CELLS, count(CASE WHEN cell.state = "calculated" THEN 1 END) AS calculatedCells WHERE calculatedCells = TOTAL_CELLS SET ready.applyingCells = TOTAL_CELLS, ready.state = "applying" RETURN "Starting Wave 2" AS message, TOTAL_CELLS AS cellCount, calculatedCells AS verifiedCells andThen: type: PrintToStandardOut # Wave coordination: Wave 2 complete -> Start next generation Wave 1 (two-phase lock) - pattern: type: Cypher mode: MultipleValues query: >- MATCH (ready) WHERE ready.applyingCells = 0 AND ready.computingCells = 0 AND ready.state = "applying" RETURN id(ready) AS readyId outputs: start-next-generation: type: CypherQuery query: |- MATCH (ready)-[:ACTIVATES]->(cell) WHERE id(ready) = $that.data.readyId WITH ready, ready.totalCells AS TOTAL_CELLS, count(CASE WHEN cell.state = "applied" THEN 1 END) AS appliedCells WHERE appliedCells = TOTAL_CELLS CALL int.add(ready, "generation", 1) YIELD result SET ready.computingCells = TOTAL_CELLS, ready.state = "computing" RETURN "Starting Generation" AS message, result AS generation, TOTAL_CELLS AS cellCount, appliedCells AS verifiedCells andThen: type: PrintToStandardOut # UI Configuration - works with any grid size nodeAppearances: - predicate: propertyKeys: ["alive", "x", "y"] knownValues: alive: true dbLabel: Cell icon: ion-record color: "#FF4500" size: 50.0 label: type: Property key: "x" prefix: "● (" suffix: ",{y})" - predicate: propertyKeys: ["alive", "x", "y"] knownValues: alive: false dbLabel: Cell icon: ion-record color: "#CCCCCC" size: 15.0 label: type: Property key: "x" prefix: "○ (" suffix: ",{y})" quickQueries: - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Refresh querySuffix: RETURN n queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Local Properties querySuffix: RETURN id(n), properties(n) queryLanguage: Cypher sort: Text - predicate: propertyKeys: ["x", "y"] knownValues: {} dbLabel: Cell quickQuery: name: "▶️ START Game" querySuffix: |- MATCH (ready) WHERE id(ready) = idFrom("ready") SET ready.computingCells = ready.totalCells, ready.state = "computing" RETURN n queryLanguage: Cypher sort: Node - predicate: propertyKeys: ["x", "y"] knownValues: {} dbLabel: Cell quickQuery: name: "⏸️ STOP Game" querySuffix: |- MATCH (ready) WHERE id(ready) = idFrom("ready") SET ready.computingCells = 0, ready.applyingCells = 0, ready.state = "stopped" RETURN n queryLanguage: Cypher sort: Node sampleQueries: - name: "● Show All Cells" query: |- MATCH (c:Cell) RETURN c - name: "📊 Show Game Configuration" query: |- MATCH (ready) WHERE id(ready) = idFrom("ready") MATCH (c:Cell) RETURN ready.name AS setup, ready.description AS description, ready.gridWidth AS width, ready.gridHeight AS height, ready.totalCells AS totalCells, count(CASE WHEN c.alive = true THEN 1 END) AS liveCells, ready.generation AS currentGeneration statusQuery: cypherQuery: |- MATCH (c:Cell) RETURN c ================================================ FILE: quine/recipes/duration.yaml ================================================ version: 1 title: Temporal Locality Example contributor: https://github.com/maglietti summary: Relate email messages sent or received by a specific user within a 4-6 minute window. description: |- This recipe looks for emails sent or received by cto@company.com within a 4-6 minute window as a means of highlighting a technique for matching on temporal locality of nodes. ingestStreams: - type: FileIngest path: email.json format: type: CypherJson query: |- MATCH (sender), (message) WHERE id(sender) = idFrom('email', $that.from) AND id(message) = idFrom('message', $that) SET sender.email = $that.from, sender: Email, message.from = $that.from, message.to = $that.to, message.subject = $that.subject, message.time = datetime({ epochMillis: $that.time}), message: Message CREATE (sender)-[:SENT_MSG]->(message) WITH $that as t, message UNWIND t.to AS rcv MATCH (receiver) WHERE id(receiver) = idFrom('email', rcv) SET receiver.email = rcv, receiver: Email CREATE (message)-[:RECEIVED_MSG]->(receiver) standingQueries: - pattern: type: Cypher mode: MultipleValues query: |- MATCH (n)-[:SENT_MSG]->(m)-[:RECEIVED_MSG]->(r) WHERE n.email="cto@company.com" OR r.email="cto@company.com" RETURN id(n) as ctoId, id(m) as ctoMsgId, m.time as mTime, id(r) as recId outputs: withinFourToSixMinuteWindow: type: CypherQuery query: |- MATCH (n)-[:SENT_MSG]->(m)-[:RECEIVED_MSG]->(r), (thisMsg) WHERE id(n) = $that.data.ctoId AND id(r) = $that.data.recId AND id(thisMsg) = $that.data.ctoMsgId AND id(m) <> id(thisMsg) AND duration("PT6M") > duration.between(m.time,thisMsg.time) > duration("PT4M") CREATE (m)-[:IN_WINDOW]->(thisMsg) CREATE (m)<-[:IN_WINDOW]-(thisMsg) WITH n, m, r, "http://localhost:8080/#MATCH" + text.urlencode(' (n)-[:SENT_MSG]->(m)-[:RECEIVED_MSG]->(r) WHERE strId(n)="' + strId(n) + '"AND strId(r)="' + strId(r) + '" AND strId(m)="' + strId(m) + '" RETURN n, r, m') as URL RETURN URL andThen: type: PrintToStandardOut nodeAppearances: - predicate: propertyKeys: - email knownValues: email: "cto@company.com" dbLabel: Email icon: ion-android-person color: "#F44336" size: label: type: Property key: email - predicate: propertyKeys: [] knownValues: {} dbLabel: Email icon: ion-android-person color: "#2ECC71" size: label: type: Property key: email - predicate: propertyKeys: [] knownValues: {} dbLabel: Message icon: ion-ios-email-outline color: "#2ECC71" size: label: type: Property key: subject quickQueries: - predicate: propertyKeys: [ ] knownValues: {} quickQuery: name: "[Node] Adjacent Nodes" querySuffix: MATCH (n)--(m) RETURN DISTINCT m queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} quickQuery: name: "[Node] Refresh" querySuffix: RETURN n queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} quickQuery: name: "[Text] Local Properties" querySuffix: RETURN id(n), properties(n) queryLanguage: Cypher sort: Text - predicate: propertyKeys: [] knownValues: {} dbLabel: Message quickQuery: name: "[Node] Messages in Window" querySuffix: MATCH (n)-[:IN_WINDOW]-(m) RETURN n,m queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} dbLabel: Message quickQuery: name: "[Text] Table of Messages in Window" querySuffix: MATCH (n)-[r:IN_WINDOW]-(m) RETURN DISTINCT n.time AS MSG1_TIME, n.subject AS MSG1_SUBJECT, m.time AS MSG2_TIME, m.subject AS MSG2_SUBJECT, toString(abs(duration.between(n.time,m.time).seconds/60)) + " Minutes " + toString(abs(duration.between(n.time,m.time).seconds)-abs(duration.between(n.time,m.time).seconds/60)*60) + " Seconds" AS DELTA_TIME queryLanguage: Cypher sort: Text sampleQueries: [] ================================================ FILE: quine/recipes/entity-resolution.yaml ================================================ version: 1 title: Entity Resolution Example contributor: https://github.com/rrwright summary: Entity Resolution description: Ingest address records from a public dataset and form each record into a subgraph built from its properties. The records are initially resolved according to the same use of the "addressee". Records are further resolved as their subgraphs overlap and a rule is applied with a standing query to resolve all entities with the same `poBox` and `postcode`. iconImage: 🤷 ingestStreams: - type: FileIngest path: public-record-addresses-2021.ndjson format: type: CypherJson query: >- WITH $that.parts AS parts MATCH (record), (entity), (cityDistrict), (unit), (country), (state), (level), (suburb), (city), (road), (house), (houseNumber), (poBox), (category), (near), (stateDistrict), (staircase), (postcode) WHERE id(record) = idFrom($that) AND id(entity) = idFrom($that.addressee, parts) AND id(cityDistrict) = idFrom("cityDistrict", CASE WHEN parts.cityDistrict IS NULL THEN -1 ELSE parts.cityDistrict END) AND id(unit) = idFrom("unit", CASE WHEN parts.unit IS NULL THEN -1 ELSE parts.unit END) AND id(country) = idFrom("country", CASE WHEN parts.country IS NULL THEN -1 ELSE parts.country END) AND id(state) = idFrom("state", CASE WHEN parts.state IS NULL THEN -1 ELSE parts.state END) AND id(level) = idFrom("level", CASE WHEN parts.level IS NULL THEN -1 ELSE parts.level END) AND id(suburb) = idFrom("suburb", CASE WHEN parts.suburb IS NULL THEN -1 ELSE parts.suburb END) AND id(city) = idFrom("city", CASE WHEN parts.city IS NULL THEN -1 ELSE parts.city END) AND id(road) = idFrom("road", CASE WHEN parts.road IS NULL THEN -1 ELSE parts.road END) AND id(house) = idFrom("house", CASE WHEN parts.house IS NULL THEN -1 ELSE parts.house END) AND id(houseNumber) = idFrom("houseNumber", CASE WHEN parts.houseNumber IS NULL THEN -1 ELSE parts.houseNumber END) AND id(poBox) = idFrom("poBox", CASE WHEN parts.poBox IS NULL THEN -1 ELSE parts.poBox END) AND id(category) = idFrom("category", CASE WHEN parts.category IS NULL THEN -1 ELSE parts.category END) AND id(near) = idFrom("near", CASE WHEN parts.near IS NULL THEN -1 ELSE parts.near END) AND id(stateDistrict) = idFrom("stateDistrict", CASE WHEN parts.stateDistrict IS NULL THEN -1 ELSE parts.stateDistrict END) AND id(staircase) = idFrom("staircase", CASE WHEN parts.staircase IS NULL THEN -1 ELSE parts.staircase END) AND id(postcode) = idFrom("postcode", CASE WHEN parts.postcode IS NULL THEN -1 ELSE parts.postcode END) FOREACH (p IN CASE WHEN parts.cityDistrict IS NULL THEN [] ELSE [parts.cityDistrict] END | SET cityDistrict.cityDistrict = p CREATE (entity)-[:cityDistrict]->(cityDistrict) ) FOREACH (p IN CASE WHEN parts.unit IS NULL THEN [] ELSE [parts.unit] END | SET unit.unit = p CREATE (entity)-[:unit]->(unit) ) FOREACH (p IN CASE WHEN parts.country IS NULL THEN [] ELSE [parts.country] END | SET country.country = p CREATE (entity)-[:country]->(country) ) FOREACH (p IN CASE WHEN parts.state IS NULL THEN [] ELSE [parts.state] END | SET state.state = p CREATE (entity)-[:state]->(state) ) FOREACH (p IN CASE WHEN parts.level IS NULL THEN [] ELSE [parts.level] END | SET level.level = p CREATE (entity)-[:level]->(level) ) FOREACH (p IN CASE WHEN parts.suburb IS NULL THEN [] ELSE [parts.suburb] END | SET suburb.suburb = p CREATE (entity)-[:suburb]->(suburb) ) FOREACH (p IN CASE WHEN parts.city IS NULL THEN [] ELSE [parts.city] END | SET city.city = p CREATE (entity)-[:city]->(city) ) FOREACH (p IN CASE WHEN parts.road IS NULL THEN [] ELSE [parts.road] END | SET road.road = p CREATE (entity)-[:road]->(road) ) FOREACH (p IN CASE WHEN parts.house IS NULL THEN [] ELSE [parts.house] END | SET house.house = p CREATE (entity)-[:house]->(house) ) FOREACH (p IN CASE WHEN parts.houseNumber IS NULL THEN [] ELSE [parts.houseNumber] END | SET houseNumber.houseNumber = p CREATE (entity)-[:houseNumber]->(houseNumber) ) FOREACH (p IN CASE WHEN parts.poBox IS NULL THEN [] ELSE [parts.poBox] END | SET poBox.poBox = p CREATE (entity)-[:poBox]->(poBox) ) FOREACH (p IN CASE WHEN parts.category IS NULL THEN [] ELSE [parts.category] END | SET category.category = p CREATE (entity)-[:category]->(category) ) FOREACH (p IN CASE WHEN parts.near IS NULL THEN [] ELSE [parts.near] END | SET near.near = p CREATE (entity)-[:near]->(near) ) FOREACH (p IN CASE WHEN parts.stateDistrict IS NULL THEN [] ELSE [parts.stateDistrict] END | SET stateDistrict.stateDistrict = p CREATE (entity)-[:stateDistrict]->(stateDistrict) ) FOREACH (p IN CASE WHEN parts.staircase IS NULL THEN [] ELSE [parts.staircase] END | SET staircase.staircase = p CREATE (entity)-[:staircase]->(staircase) ) FOREACH (p IN CASE WHEN parts.postcode IS NULL THEN [] ELSE [parts.postcode] END | SET postcode.postcode = p CREATE (entity)-[:postcode]->(postcode) ) SET entity = parts, entity.addressee = $that.addressee, entity: Entity, record = $that, record: Record CREATE (record)-[:record_for_entity]->(entity) standingQueries: - pattern: # This creates the `canoncial` record based on postcode and poBox and connects it. type: Cypher mode: MultipleValues query: >- MATCH (pb)<-[:poBox]-(e)-[:postcode]->(pc) RETURN id(e) AS entity, pb.poBox AS poBox, pc.postcode AS postcode outputs: resolved: type: CypherQuery query: >- MATCH (e), (canonical) WHERE id(e) = $that.data.entity AND id(canonical) = idFrom($that.data.poBox, $that.data.postcode) SET canonical.canonical = {poBox: $that.data.poBox, postcode: $that.data.postcode}, canonical: Canonical CREATE (e)-[:resolved]->(canonical) - pattern: # This re-emits the original record with a field showing its resolution. type: Cypher mode: MultipleValues query: >- MATCH (record)-[:record_for_entity]->(entity)-[:resolved]->(resolved) WHERE resolved.canonical IS NOT NULL RETURN id(record) AS record, id(resolved) AS resolved outputs: resolved-record: type: CypherQuery query: >- MATCH (record) WHERE id(record) = $that.data.record WITH properties(record) as props RETURN props {.*, resolved: $that.data.resolved} AS resolved_entity andThen: type: WriteToFile path: "entities-resolved.ndjson" nodeAppearances: - predicate: propertyKeys: - parts knownValues: {} label: prefix: "" key: id type: Property icon: "📝" - predicate: propertyKeys: - addressee knownValues: {} label: prefix: "" key: addressee type: Property icon: "🤷" - predicate: propertyKeys: - cityDistrict knownValues: {} label: prefix: "cityDistrict: " key: cityDistrict type: Property icon: "🏙️" - predicate: propertyKeys: - unit knownValues: {} label: prefix: "unit: " key: unit type: Property icon: "#" - predicate: propertyKeys: - country knownValues: {} label: prefix: "country: " key: country type: Property icon: "🇺🇳" - predicate: propertyKeys: - state knownValues: {} label: prefix: "state: " key: state type: Property icon: "🇺🇸" - predicate: propertyKeys: - level knownValues: {} label: prefix: "level: " key: level type: Property icon: "🎚️" - predicate: propertyKeys: - suburb knownValues: {} label: prefix: "suburb: " key: suburb type: Property icon: "🏘️" - predicate: propertyKeys: - city knownValues: {} label: prefix: "city: " key: city type: Property icon: "🌃" - predicate: propertyKeys: - road knownValues: {} label: prefix: "road: " key: road type: Property icon: "🛣️" - predicate: propertyKeys: - house knownValues: {} label: prefix: "house: " key: house type: Property icon: "🏡" - predicate: propertyKeys: - houseNumber knownValues: {} label: prefix: "houseNumber: " key: houseNumber type: Property icon: "💯" - predicate: propertyKeys: - poBox knownValues: {} label: prefix: "poBox: " key: poBox type: Property icon: "🔢" - predicate: propertyKeys: - category knownValues: {} label: prefix: "category: " key: category type: Property icon: "🐈" - predicate: propertyKeys: - near knownValues: {} label: prefix: "near: " key: near type: Property icon: "⤵️" - predicate: propertyKeys: - stateDistrict knownValues: {} label: prefix: "stateDistrict: " key: stateDistrict type: Property icon: "🌁" - predicate: propertyKeys: - staircase knownValues: {} label: prefix: "staircase: " key: staircase type: Property icon: "🪜" - predicate: propertyKeys: - postcode knownValues: {} label: prefix: "postcode: " key: postcode type: Property icon: "✉️" - predicate: propertyKeys: - canonical knownValues: {} label: value: "Canonical Entity" type: Constant icon: "🧑‍⚖️" quickQueries: - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Adjacent Nodes querySuffix: MATCH (n)--(m) RETURN DISTINCT m queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Refresh querySuffix: RETURN n queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Local Properties querySuffix: RETURN id(n), properties(n) queryLanguage: Cypher sort: Text - predicate: propertyKeys: [addressee] knownValues: {} quickQuery: name: Property Subgraph queryLanguage: Cypher sort: Node querySuffix: MATCH (n)-->(m) WHERE m.parsed IS NULL AND m.canonical IS NULL RETURN m - predicate: propertyKeys: [addressee] knownValues: {} quickQuery: name: Records querySuffix: MATCH (n)<-[:record_for_entity]-(r) RETURN r queryLanguage: Cypher sort: Node - predicate: propertyKeys: [addressee] knownValues: {} quickQuery: name: Resolved Entities querySuffix: MATCH (n)-[:resolved]->(r)<-[:resolved]-(e) RETURN e queryLanguage: Cypher sort: Node edgeLabel: Resolved - predicate: propertyKeys: [addressee] knownValues: {} quickQuery: name: Canonical Entity querySuffix: MATCH (n)-[:resolved]->(r) RETURN r queryLanguage: Cypher sort: Node - predicate: propertyKeys: [addressee] knownValues: {} quickQuery: name: A.K.A. querySuffix: MATCH (n)-[:resolved]->(c)<-[:resolved]-(o) RETURN DISTINCT replace(o.addressee, "\n", " ") AS AKA queryLanguage: Cypher sort: Text - predicate: propertyKeys: [canonical] knownValues: {} quickQuery: name: A.K.A. querySuffix: MATCH (n)<-[:resolved]-(o) RETURN replace(o.addressee, "\n", " ") AS AKA queryLanguage: Cypher sort: Text sampleQueries: - name: Recent node query: CALL recentNodes(1) - name: Show one record query: MATCH (a) WHERE id(a) = "00145c03-428c-3051-9d9c-c09c5f4eace4" RETURN a - name: Missing PO Box query: MATCH (n) WHERE strId(n) = "c2e78a44-05de-3fbf-98d1-c5bdad2790a0" RETURN n - name: Create missing PO BOX query: WITH "hand-created box 12345" as box MATCH (entity), (poBox) WHERE strId(entity) = "c2e78a44-05de-3fbf-98d1-c5bdad2790a0" AND id(poBox) = idFrom("poBox", box) SET poBox.poBox = box CREATE (entity)-[:poBox]->(poBox) RETURN poBox ================================================ FILE: quine/recipes/ethereum.yaml ================================================ version: 1 title: Ethereum Tag Propagation contributor: https://github.com/emanb29 summary: Ethereum Blockchain model with tag propagation description: |- Models data on the thoroughgoing Ethereum blockchain using tag propagation to track the flow of transactions from flagged accounts. Newly-mined Ethereum transaction metadata is imported via a Server-Sent Events data source. Transactions are grouped by the block in which they were mined then imported into the graph. Each wallet address is represented by a node, linked by an edge to each transaction sent or received by that account, and linked by an edge to any blocks mined by that account. Quick queries allow marking an account as "tainted". The tainted flag is propagated along outgoing transaction paths via Standing Queries to record the least degree of separation between a tainted source and an account receiving a transaction. Canonical (eth-node-provided) capitalization is maintained where possible, with `toLower` being used for idFrom-based ID resolution to reflect the case-insensitive nature of bytestrings (eg addresses, hashes) used by Ethereum. The Ethereum diamond logo is property of the Ethereum Foundation, used under the terms of the Creative Commons Attribution 3.0 License. iconImage: https://i.imgur.com/sSl6BQd.png ingestStreams: - format: query: |- MATCH (BA), (minerAcc), (blk), (parentBlk) WHERE id(blk) = idFrom('block', toLower($that.hash)) AND id(parentBlk) = idFrom('block', toLower($that.parentHash)) AND id(BA) = idFrom('block_assoc', toLower($that.hash)) AND id(minerAcc) = idFrom('account', toLower($that.miner)) CREATE (minerAcc)<-[:mined_by]-(blk)-[:header_for]->(BA), (blk)-[:preceded_by]->(parentBlk) SET BA:block_assoc, BA.number = $that.number, BA.hash = $that.hash, blk:block, blk = $that, minerAcc:account, minerAcc.address = $that.miner type: CypherJson url: https://ethereum.demo.thatdot.com/blocks_head type: ServerSentEventsIngest - format: query: |- WITH true AS validTransactionRecord WHERE $that.to IS NOT NULL AND $that.from IS NOT NULL MATCH (BA), (toAcc), (fromAcc), (tx) WHERE id(BA) = idFrom('block_assoc', toLower($that.blockHash)) AND id(toAcc) = idFrom('account', toLower($that.to)) AND id(fromAcc) = idFrom('account', toLower($that.from)) AND id(tx) = idFrom('transaction', toLower($that.hash)) CREATE (tx)-[:defined_in]->(BA), (tx)-[:from]->(fromAcc), (tx)-[:to]->(toAcc) SET tx:transaction, BA:block_assoc, toAcc:account, fromAcc:account, tx = $that, fromAcc.address = $that.from, toAcc.address = $that.to type: CypherJson url: https://ethereum.demo.thatdot.com/mined_transactions type: ServerSentEventsIngest standingQueries: - pattern: query: |- MATCH (tainted:account)<-[:from]-(tx:transaction)-[:to]->(otherAccount:account), (tx)-[:defined_in]->(ba:block_assoc) WHERE tainted.tainted IS NOT NULL RETURN id(tainted) AS accountId, tainted.tainted AS oldTaintedLevel, id(otherAccount) AS otherAccountId type: Cypher mode: MultipleValues outputs: propagate-tainted: query: |- MATCH (tainted), (otherAccount) WHERE tainted <> otherAccount AND id(tainted) = $that.data.accountId AND id(otherAccount) = $that.data.otherAccountId WITH *, coll.min([($that.data.oldTaintedLevel + 1), otherAccount.tainted]) AS newTaintedLevel SET otherAccount.tainted = newTaintedLevel RETURN strId(tainted) AS taintedSource, strId(otherAccount) AS newlyTainted, newTaintedLevel type: CypherQuery andThen: type: PrintToStandardOut nodeAppearances: - predicate: dbLabel: block propertyKeys: [ ] knownValues: { } icon: cube label: prefix: 'Block ' key: number type: Property - predicate: dbLabel: transaction propertyKeys: [ ] knownValues: { } icon: cash label: prefix: 'Wei Transfer: ' key: value type: Property - predicate: dbLabel: account propertyKeys: [ ] knownValues: tainted: 0 icon: social-bitcoin label: prefix: 'Account ' key: address type: Property color: '#fb00ff' - predicate: dbLabel: account propertyKeys: - tainted knownValues: { } icon: social-bitcoin label: prefix: 'Account ' key: address type: Property color: '#c94d44' - predicate: dbLabel: account propertyKeys: [ ] knownValues: { } icon: social-bitcoin label: prefix: 'Account ' key: address type: Property - predicate: dbLabel: block_assoc propertyKeys: [ ] knownValues: { } icon: ios-folder label: prefix: 'Transactions in block ' key: number type: Property quickQueries: - predicate: propertyKeys: [ ] knownValues: { } quickQuery: name: Adjacent Nodes querySuffix: MATCH (n)--(m) RETURN DISTINCT m queryLanguage: Cypher sort: Node - predicate: propertyKeys: [ ] knownValues: { } dbLabel: account quickQuery: name: Outgoing transactions querySuffix: MATCH (n)<-[:from]-(tx)-[:to]->(m:account) RETURN m edgeLabel: Sent Tx To queryLanguage: Cypher sort: Node - predicate: propertyKeys: [ ] knownValues: { } dbLabel: account quickQuery: name: Incoming transactions querySuffix: MATCH (n)<-[:to]-(tx)-[:from]->(m:account) RETURN m edgeLabel: Got Tx From queryLanguage: Cypher sort: Node - predicate: propertyKeys: [ ] knownValues: { } quickQuery: name: Refresh querySuffix: RETURN n queryLanguage: Cypher sort: Node - predicate: propertyKeys: [ ] knownValues: { } dbLabel: account quickQuery: name: Mark as tainted and refresh querySuffix: SET n.tainted = 0 WITH id(n) AS nId CALL { WITH nId MATCH (n) WHERE id(n) = nId RETURN n } RETURN n queryLanguage: Cypher sort: Node - predicate: propertyKeys: [ ] knownValues: { } dbLabel: account quickQuery: name: Incoming tainted transactions querySuffix: MATCH (n)<-[:to]-(tx)-[:from]->(m:account) WHERE m.tainted IS NOT NULL AND m<>n RETURN m edgeLabel: Got Tainted From queryLanguage: Cypher sort: Node - predicate: propertyKeys: [ ] knownValues: { } quickQuery: name: Local Properties querySuffix: RETURN id(n), properties(n) queryLanguage: Cypher sort: Text sampleQueries: - name: Get a few recently-accessed blocks query: CALL recentNodes(1000) YIELD node AS nId MATCH (n:block) WHERE id(n) = nId RETURN n - name: Find accounts that have both sent and received ETH query: MATCH (downstream:account)<-[:to]-(tx1)-[:from]->(a:account)<-[:to]-(tx2)-[:from]->(upstream:account) WHERE tx1<>tx2 AND upstream <> downstream AND upstream <> a AND downstream <> a RETURN downstream, tx1, a, tx2, upstream LIMIT 1 ================================================ FILE: quine/recipes/finance.yaml ================================================ version: 1 title: Financial Risk Recipe description: |- The financial industry’s current approach to managing mandated operational risk capital requirements, batch processing, often leads to over- or under-allocation of certain classes of funds, operating with tight time constraints, and slow reactions to changing market conditions. By responding to market changes in real time, organizations can provide adequate coverage for risk exposure while ensuring their compliance minimally affects their asset allocation. The intent of this recipe is to show an example of conditionally adjusting data (investment value) based on a property (investment class) of the manifested nodes prior to aggregating the value at multiple levels. Further, the adjusted aggregates are used to alert on threshold crossing (percentage of value of specific classes). This is accomplished via three technical strategies: 1. Use of `NumberIteratorIngest` to generate sample transactions 2. Conditional handling of data 3. Real-time graph-based data (from #2) aggregated across multiple levels ingestStreams: - type: NumberIteratorIngest ingestLimit: 1 format: type: CypherLine query: |- WITH 0 AS institutionId // Generate 10 desks - change the range bound to alter the number of generated desks UNWIND range(1, 10) AS deskId MATCH (institution), (desk) WHERE id(institution) = idFrom('institution', institutionId) AND id(desk) = idFrom('desk', institutionId, deskId) SET institution:institution SET desk:desk, desk.deskNumber = deskId CREATE (institution)-[:HAS]->(desk) WITH * // Generate 1000 investments per desk- change the range bound to alter the number of investments generated per desk UNWIND range(1, 1000) AS investmentId MATCH (investment) WHERE id(investment) = idFrom('investment', institutionId, deskId, investmentId) SET investment:investment, investment.investmentId = toInteger(toString(deskId) + toString(investmentId)), investment.type = toInteger(rand() * 10) + 1, investment.code = gen.string.from(strId(investment), 25), investment.value = gen.float.from(strId(investment)) * 100 WITH id(investment) AS invId, desk, investment CALL { WITH invId MATCH (investment:investment) WHERE id(investment) = invId SET investment.class = CASE WHEN investment.type <= 5 THEN '1' WHEN investment.type >= 6 AND investment.type <= 8 THEN '2a' WHEN investment.type >= 9 THEN '2b' END RETURN investment.type AS type } CREATE (desk)-[:HOLDS]->(investment) standingQueries: - pattern: type: Cypher query: |- MATCH (investment:investment)<-[:HOLDS]-(desk:desk)<-[:HAS]-(institution:institution) RETURN DISTINCT id(investment) AS id mode: DistinctId outputs: adjustValues: type: CypherQuery query: |- MATCH (investment:investment)<-[:HOLDS]-(desk:desk)<-[:HAS]-(institution:institution) WHERE id(investment) = $that.data.id SET investment.adjustedValue = CASE WHEN investment.class = '1' THEN investment.value WHEN investment.class = '2a' THEN investment.value * .85 WHEN investment.class = '2b' AND investment.type = 9 THEN investment.value * .75 WHEN investment.class = '2b' AND investment.type = 10 THEN investment.value * .5 END - pattern: type: Cypher query: |- MATCH (investment:investment)<-[:HOLDS]-(desk:desk)<-[:HAS]-(institution:institution) WHERE investment.adjustedValue IS NOT NULL RETURN DISTINCT id(investment) AS id mode: DistinctId outputs: rollUps: type: CypherQuery query: |- MATCH (investment)<-[:HOLDS]-(desk:desk)<-[:HAS]-(institution:institution) WHERE id(investment) = $that.data.id AND investment.adjustedValue IS NOT NULL UNWIND [["1","adjustedValue1"], ["2a","adjustedValue2a"], ["2b","adjustedValue2b"]] AS stuff WITH institution,investment,desk,stuff WHERE investment.class = stuff[0] CALL float.add(institution,stuff[1],investment.adjustedValue) YIELD result AS institutionAdjustedValueRollupByClass CALL float.add(institution,"totalAdjustedValue",investment.adjustedValue) YIELD result AS institutionAdjustedValueRollup CALL float.add(desk,stuff[1],investment.adjustedValue) YIELD result AS deskAdjustedValueRollupByClass CALL float.add(desk,"totalAdjustedValue",investment.adjustedValue) YIELD result AS deskAdjustedValueRollup SET institution.percentAdjustedValue2 = ((institution.adjustedValue2a + institution.adjustedValue2b)/institution.totalAdjustedValue) * 100, institution.percentAdjustedValue2b = (institution.adjustedValue2b/institution.totalAdjustedValue) * 100 - pattern: type: Cypher query: |- MATCH (investment:investment)<-[:HOLDS]-(desk:desk)<-[:HAS]-(institution:institution) RETURN DISTINCT id(investment) AS id mode: DistinctId outputs: class2CompositionAlert: type: CypherQuery query: |- MATCH (investment:investment)<-[:HOLDS]-(desk:desk)<-[:HAS]-(institution:institution) WHERE id(investment) = $that.data.id AND (institution.investments = 2500 OR institution.investments = 5000 OR institution.investments = 10000) AND institution.percentAdjustedValue2 > 40 RETURN institution.percentAdjustedValue2 AS Class_2_Composition andThen: type: PrintToStandardOut class2bCompositionAlert: type: CypherQuery query: |- MATCH (investment:investment)<-[:HOLDS]-(desk:desk)<-[:HAS]-(institution:institution) WHERE id(investment) = $that.data.id AND (institution.investments = 2500 OR institution.investments = 5000 OR institution.investments = 10000) AND institution.percentAdjustedValue2b > 15 RETURN institution.percentAdjustedValue2b AS Class_2b_Composition andThen: type: PrintToStandardOut quickQueries: - predicate: propertyKeys: [] knownValues: {} quickQuery: name: "[Node] Adjacent Nodes" querySuffix: MATCH (n)--(m) RETURN DISTINCT m queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} quickQuery: name: "[Node] Parent Node" querySuffix: MATCH (n)<-[]-(m) RETURN DISTINCT m queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} quickQuery: name: "[Node] Refresh" querySuffix: RETURN n queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} quickQuery: name: "[Text] Local Properties" querySuffix: RETURN id(n) AS NODE_ID, labels(n) AS NODE_LABELS, properties(n) AS NODE_PROPERTIES queryLanguage: Cypher sort: Text - predicate: propertyKeys: [] knownValues: {} quickQuery: name: "[Text] Node Label" querySuffix: RETURN labels(n) queryLanguage: Cypher sort: Text sampleQueries: - name: Last 10 Nodes query: CALL recentNodes(10) - name: Legend (show one of each node type) query: MATCH (n) WHERE labels(n) IS NOT NULL WITH labels(n) AS kind, collect(n) AS legend RETURN legend[0] - name: Show distribution of investment node classes (grouped by desk) query: MATCH (investment:investment)<-[]-(desk:desk) RETURN desk.deskNumber AS DESK, investment.investmentId AS INVESTMENT, investment.class AS CLASS ORDER BY desk.deskNumber - name: Wiretap Standing Query 1 query: 'CALL standing.wiretap({ name: "STANDING-1"}) YIELD meta, data WHERE meta.isPositiveMatch MATCH (n) WHERE id(n) = data.id RETURN properties(n)' - name: Wiretap Standing Query 2 query: 'CALL standing.wiretap({ name: "STANDING-2"}) YIELD meta, data WHERE meta.isPositiveMatch MATCH (n) WHERE id(n) = data.id RETURN properties(n)' - name: Wiretap Standing Query 3 query: 'CALL standing.wiretap({ name: "STANDING-3"}) YIELD meta, data WHERE meta.isPositiveMatch MATCH (n) WHERE id(n) = data.id RETURN properties(n)' nodeAppearances: - predicate: propertyKeys: - type knownValues: type: 1 dbLabel: investment icon: ion-cash color: "#85BB65" size: label: type: Property key: investmentId prefix: "Investment ID: " - predicate: propertyKeys: - type knownValues: type: 2 dbLabel: investment icon: ion-cash color: "#85BB65" size: label: type: Property key: investmentId prefix: "Investment ID: " - predicate: propertyKeys: - type knownValues: type: 3 dbLabel: investment icon: ion-cash color: "#85BB65" size: label: type: Property key: investmentId prefix: "Investment ID: " - predicate: propertyKeys: - type knownValues: type: 4 dbLabel: investment icon: ion-cash color: "#85BB65" size: label: type: Property key: investmentId prefix: "Investment ID: " - predicate: propertyKeys: - type knownValues: type: 5 dbLabel: investment icon: ion-cash color: "#85BB65" size: label: type: Property key: investmentId prefix: "Investment ID: " - predicate: propertyKeys: - type knownValues: type: 6 dbLabel: investment icon: ion-android-warning color: "#FFAA33" size: label: type: Property key: investmentId prefix: "Investment ID: " - predicate: propertyKeys: - type knownValues: type: 7 dbLabel: investment icon: ion-android-warning color: "#FFAA33" size: label: type: Property key: investmentId prefix: "Investment ID: " - predicate: propertyKeys: - type knownValues: type: 8 dbLabel: investment icon: ion-android-warning color: "#FFAA33" size: label: type: Property key: investmentId prefix: "Investment ID: " - predicate: propertyKeys: - type knownValues: type: 9 dbLabel: investment icon: ion-android-alert color: "#880808" size: label: type: Property key: investmentId prefix: "Investment ID: " - predicate: propertyKeys: - type knownValues: type: 10 dbLabel: investment icon: ion-android-alert color: "#880808" size: label: type: Property key: investmentId prefix: "Investment ID: " - predicate: propertyKeys: [] knownValues: {} dbLabel: desk icon: ion-archive color: "#aaa9ad" size: label: type: Property key: deskNumber prefix: "Desk: " - predicate: propertyKeys: [] knownValues: {} dbLabel: institution icon: ion-android-home color: "#AA4A44" size: ================================================ FILE: quine/recipes/hpotter.yaml ================================================ version: 1 title: Harry Potter contributor: https://github.com/harpocrates summary: Small graph of connected nodes description: |- This Recipe loads a small graph of connected nodes. Before running this Recipe, download the dataset using curl https://quine.io/recipes/images/harry_potter_data.json -o harry_potter_data.json ingestStreams: - type: FileIngest path: harry_potter_data.json format: type: CypherJson query: |- MATCH (p) WHERE id(p) = idFrom('name', $that.name) SET p = { name: $that.name, gender: $that.gender, birth_year: $that.birth_year }, p: Person WITH $that.children AS childrenNames, p UNWIND childrenNames AS childName MATCH (c) WHERE id(c) = idFrom('name', childName) CREATE (c)-[:has_parent]->(p) standingQueries: [ ] nodeAppearances: [ ] quickQueries: - quickQuery: name: Adjacent Nodes querySuffix: MATCH (n)--(m) RETURN DISTINCT m queryLanguage: Cypher sort: Node predicate: propertyKeys: [ ] knownValues: { } - quickQuery: name: Siblings querySuffix: >- MATCH (n)-[:has_parent]->(p)<-[:has_parent]-(s) RETURN DISTINCT s queryLanguage: Cypher sort: Node edgeLabel: has sibling predicate: propertyKeys: [ ] knownValues: { } sampleQueries: [ ] ================================================ FILE: quine/recipes/ingest.yaml ================================================ version: 1 title: Ingest contributor: https://github.com/landon9720 summary: Ingest input file lines as graph nodes description: Ingests each line in "$in-file" as graph node with property "line". ingestStreams: - type: FileIngest path: $in-file format: type: CypherLine query: |- MATCH (n) WHERE id(n) = idFrom($that) SET n.line = $that standingQueries: [ ] nodeAppearances: [ ] quickQueries: [ ] sampleQueries: [ ] ================================================ FILE: quine/recipes/kafka-ingest.yaml ================================================ version: 1 title: Kafka Ingest contributor: https://github.com/landon9720 summary: Ingest Kafka topic messages as graph nodes description: Ingests each message in the Kafka topic "test-topic" as a graph node ingestStreams: - type: KafkaIngest topics: - test-topic bootstrapServers: localhost:9092 format: type: CypherJson query: |- MATCH (n) WHERE id(n) = idFrom($that) SET n = $that standingQueries: [ ] nodeAppearances: [ ] quickQueries: [ ] sampleQueries: [ ] ================================================ FILE: quine/recipes/movieData.yaml ================================================ # Recipe schema version (currently only supported value is 1;) version: 1 # Identifies the Recipe but is not necessarily unique or immutable title: Ingesting CSV Files # URL to social profile of the person or organization responsible for this Recipe contributor: https://github.com/maglietti # Brief copy about this Recipe summary: |- This recipe takes sample movie and rating data, builds a graph, and alerts when an actor is also the director of a movie. # Longer form copy about this Recipe description: |- This recipe is part of the Quine Ingest Stream blog series. This time, we'll work with `CSV` data exported from IMDb to answer the question; *"Which actors have acted in and directed the same movie?"* # Ingest Streams connect to data sources and establish how data is processed and transformed ingestStreams: # INGEST-1 - type: FileIngest path: $movie_file format: type: CypherCsv headers: true query: |- WITH $that AS row MATCH (m) WHERE row.Entity = 'Movie' AND id(m) = idFrom("Movie", row.movieId) SET m:Movie, m.tmdbId = row.tmdbId, m.imdbId = row.imdbId, m.imdbRating = toFloat(row.imdbRating), m.released = row.released, m.title = row.title, m.year = toInteger(row.year), m.poster = row.poster, m.runtime = toInteger(row.runtime), m.countries = split(coalesce(row.countries,""), "|"), m.imdbVotes = toInteger(row.imdbVotes), m.revenue = toInteger(row.revenue), m.plot = row.plot, m.url = row.url, m.budget = toInteger(row.budget), m.languages = split(coalesce(row.languages,""), "|"), m.movieId = row.movieId WITH m,split(coalesce(row.genres,""), "|") AS genres UNWIND genres AS genre WITH m, genre MATCH (g) WHERE id(g) = idFrom("Genre", genre) SET g.genre = genre, g:Genre CREATE (m:Movie)-[:IN_GENRE]->(g:Genre) # INGEST-2 - type: FileIngest path: $movie_file format: type: CypherCsv headers: true query: |- WITH $that AS row MATCH (p) WHERE row.Entity = "Person" AND id(p) = idFrom("Person", row.tmdbId) SET p:Person, p.imdbId = row.imdbId, p.bornIn = row.bornIn, p.name = row.name, p.bio = row.bio, p.poster = row.poster, p.url = row.url, p.born = row.born, p.died = row.died, p.tmdbId = row.tmdbId, p.born = CASE row.born WHEN "" THEN null ELSE datetime(row.born + "T00:00:00Z") END, p.died = CASE row.died WHEN "" THEN null ELSE datetime(row.died + "T00:00:00Z") END # INGEST-3 - type: FileIngest path: $movie_file format: type: CypherCsv headers: true query: |- WITH $that AS row WITH row WHERE row.Entity = "Join" AND row.Work = "Acting" MATCH (p), (m), (r) WHERE id(p) = idFrom("Person", row.tmdbId) AND id(m) = idFrom("Movie", row.movieId) AND id(r) = idFrom("Role", row.tmdbId, row.movieId, row.role) SET r.role = row.role, r.movie = row.movieId, r.tmdbId = row.tmdbId, r:Role CREATE (p:Person)-[:PLAYED]->(r:Role)<-[:HAS_ROLE]-(m:Movie) CREATE (p:Person)-[:ACTED_IN]->(m:Movie) # INGEST-4 - type: FileIngest path: $movie_file format: type: CypherCsv headers: true query: |- WITH $that AS row WITH row WHERE row.Entity = "Join" AND row.Work = "Directing" MATCH (p), (m) WHERE id(p) = idFrom("Person", row.tmdbId) AND id(m) = idFrom("Movie", row.movieId) CREATE (p:Person)-[:DIRECTED]->(m:Movie) # INGEST-5 - type: FileIngest path: $rating_file format: type: CypherCsv headers: true query: |- WITH $that AS row MATCH (m), (u), (rtg) WHERE id(m) = idFrom("Movie", row.movieId) AND id(u) = idFrom("User", row.userId) AND id(rtg) = idFrom("Rating", row.movieId, row.userId, row.rating) SET u.name = row.name, u:User SET rtg.rating = row.rating, rtg.timestamp = toInteger(row.timestamp), rtg:Rating CREATE (u:User)-[:SUBMITTED]->(rtg:Rating)<-[:HAS_RATING]-(m:Movie) CREATE (u:User)-[:RATED]->(m:Movie) # Standing Queries define how data is transformed and output. standingQueries: - pattern: type: Cypher mode: MultipleValues query: |- MATCH (a:Movie)<-[:ACTED_IN]-(p:Person)-[:DIRECTED]->(m:Movie) WHERE id(a) = id(m) RETURN id(m) as movieId, id(p) as personId outputs: set-ActedDirected: type: CypherQuery query: |- MATCH (m),(p) WHERE id(m) = $that.data.movieId AND id(p) = $that.data.personId WITH * CREATE (p:Person)-[:ActedDirected]->(m:Movie) RETURN id(m) as movieId, m.title as Movie, id(p) as personId, p.name as Actor andThen: type: WriteToFile path: "ActorDirector.jsonl" # Customize node appearance in web UI. nodeAppearances: - predicate: dbLabel: Movie propertyKeys: [] knownValues: {} icon: ion-android-film label: key: title type: Property - predicate: dbLabel: Person propertyKeys: [] knownValues: {} icon: ion-android-person color: "#ffd700" label: key: name type: Property - predicate: dbLabel: Role propertyKeys: [] knownValues: {} icon: ion-android-microphone color: "#7CFC00" label: key: role type: Property - predicate: dbLabel: User propertyKeys: [] knownValues: {} icon: ion-android-chat color: "#7e7e7e" label: key: name type: Property - predicate: dbLabel: Genre propertyKeys: [] knownValues: {} icon: ion-android-menu color: "#00FFFF" label: key: genre type: Property - predicate: dbLabel: Rating propertyKeys: [] knownValues: {} icon: ion-android-star color: "#9932CC" label: key: rating type: Property # Add queries to node context menus in web UI quickQueries: [] # Customize sample queries listed in web UI sampleQueries: - name: Sample of Nodes query: MATCH (n) RETURN n LIMIT 10 - name: Count Nodes query: MATCH (n) RETURN DISTINCT labels(n), count(*) - name: Count Relationships query: MATCH (n)-[r]->() RETURN type(r), count(*) - name: Movie Genres query: MATCH (g:Genre) RETURN g - name: Person Acted In a movie query: MATCH (p:Person)-[:ACTED_IN]->(m:Movie) RETURN * - name: Person Directed a movie query: MATCH (p:Person)-[:DIRECTED]-(m:Movie) RETURN * - name: Person Acted In and Directed a movie query: MATCH (p:Person)-[:ActedDirected]->(m:Movie) RETURN * - name: User Rated a movie query: MATCH (u:User)-[:RATED]-(m:Movie) RETURN * ================================================ FILE: quine/recipes/pi.yaml ================================================ version: 1 title: Pi contributor: https://github.com/emanb29 summary: Incrementally approximates pi using Leibniz' formula description: |- Incrementally approximates pi using Leibniz' formula -- the arctangent function is incrementally (corecursively) computed along :improved_by edges, and each arctangent approximation is quadrupled to yield an approximation of pi. ingestStreams: [] standingQueries: - pattern: type: Cypher query: MATCH (n:arctan) WHERE n.approximation IS NOT NULL AND n.denominator IS NOT NULL RETURN DISTINCT id(n) AS id outputs: # iterate over arctan iterate: type: CypherQuery query: |- MATCH (n) WHERE id(n) = $that.data.id WITH n, -sign(n.denominator)*(abs(n.denominator)+2) as nextDenom WITH n, nextDenom, n.approximation+(1/nextDenom) as nextApprox MATCH (next) WHERE id(next) = idFrom(nextDenom) SET next:arctan, next.denominator = nextDenom, next.approximation=nextApprox CREATE (n)-[:improved_by]->(next) # map arctan to piApprox piApprox: type: CypherQuery query: |- MATCH (arctan) WHERE id(arctan) = $that.data.id WITH arctan, arctan.denominator AS denominator, arctan.approximation*4 AS approximatedPi MATCH (approximation) WHERE id(approximation) = idFrom('approximation', denominator) SET approximation:piApproximation, approximation.approximatedPi = approximatedPi CREATE (arctan)-[:approximates]->(approximation) RETURN approximatedPi andThen: type: WriteToFile path: $out_file nodeAppearances: - predicate: propertyKeys: [] knownValues: {} dbLabel: piApproximation icon: π size: 40 color: "#f1c232" label: type: Property key: approximatedPi prefix: - predicate: propertyKeys: [] knownValues: {} dbLabel: arctan icon: ⦛ size: 20 color: "#000000" label: type: Constant value: 𝚊𝚛𝚌𝚝𝚊𝚗 sampleQueries: - name: "[No Output] Run this query to begin processing." query: WITH 1 AS initialDenominator MATCH (n) WHERE id(n) = idFrom(1) SET n.denominator = toFloat(1), n.approximation = toFloat(1), n:arctan - name: "[Node] Get Best Approximation (so far)" query: CALL recentNodes(15) YIELD node AS nId MATCH (n) WHERE id(n) = nId AND n.approximatedPi IS NOT NULL RETURN n LIMIT 1 - name: "[Text] Get Best Approximation (so far)" query: CALL recentNodes(15) YIELD node AS nId MATCH (n) WHERE id(n) = nId AND n.approximatedPi IS NOT NULL RETURN n.approximatedPi LIMIT 1 - name: "[Text] Repeatedly Get Best Approximation (so far)" query: UNWIND range(0, 1000) AS x UNWIND range(0, 1000) AS y CALL util.sleep(1000) CALL cypher.doIt(" CALL recentNodes(15) YIELD node AS nId MATCH (n) WHERE id(n) = nId AND n.approximatedPi IS NOT NULL RETURN n.approximatedPi AS approximatedPi LIMIT 1 ") YIELD value RETURN value.approximatedPi AS approximatedPi, abs(pi() - value.approximatedPi) AS error quickQueries: [ ] ================================================ FILE: quine/recipes/ping.yaml ================================================ version: 1 title: Ping contributor: https://github.com/landon9720 summary: Ingest input file lines and echo to output file description: |- Ingests each line in "$in_file" as graph node with property "line". Writes all graph nodes to "$out_file". ingestStreams: - type: FileIngest path: $in_file format: type: CypherLine query: |- MATCH (n) WHERE id(n) = idFrom($that) SET n.line = $that standingQueries: - pattern: type: Cypher query: MATCH (n) RETURN DISTINCT id(n) AS id outputs: output-1: type: CypherQuery query: MATCH (n) WHERE id(n) = $that.data.id RETURN properties(n) AS record andThen: type: WriteToFile path: $out_file nodeAppearances: [ ] quickQueries: [ ] sampleQueries: [ ] statusQuery: cypherQuery: MATCH (n) RETURN count(n) ================================================ FILE: quine/recipes/pipe.yaml ================================================ version: 1 title: Pipe contributor: https://github.com/landon9720 summary: Ingest from Standard Input and writes to Standard Output description: |- Ingests each line from Standard Input as a graph node with property "line". All graph nodes all written to Standard Output. ingestStreams: - type: StandardInputIngest format: type: CypherLine query: |- MATCH (n) WHERE id(n) = idFrom($that) SET n.line = $that standingQueries: - pattern: type: Cypher query: |- MATCH (n) RETURN DISTINCT id(n) AS id outputs: output-1: type: CypherQuery query: |- MATCH (n) WHERE id(n) = $that.data.id RETURN n.line AS line andThen: type: PrintToStandardOut nodeAppearances: [ ] quickQueries: [ ] sampleQueries: [ ] ================================================ FILE: quine/recipes/planetside-2.yaml ================================================ version: 1 title: Planetside 2 contributor: https://github.com/emanb29 summary: Models real-time player kill data from Planetside 2 and supplements the killfeed graph with detailed information about the player characters and the weapons used. description: |- Ingests the websockets killfeed from Daybreak Games' MMOFPS "PlanetSide 2", invoking the getJsonLines procedure to lazily fill out unknown static data. Replace all instances of `s:example` with a service-id acquired from http://census.daybreakgames.com/#service-id ingestStreams: - type: WebsocketSimpleStartupIngest url: wss://push.planetside2.com/streaming?environment=ps2&service-id=s:example initMessages: # A couple notes: character names are not reused across servers, so we can subscribe to all servers ("worlds") and not worry about namespacing character names # Characters can be *renamed*, but this is rare because it costs the player $25 - |- { "service":"event", "action":"subscribe", "worlds": ["all"], "characters":["all"], "eventNames":["Death"] } format: type: CypherJson query: |- WITH * WHERE $that.type = 'serviceMessage' CREATE (m:murder) // these are never replayed, so no reason to idFrom SET m = COALESCE($that.payload, {}) WITH id(m) as mId MATCH (murder) WHERE id(murder) = mId MATCH (victim) WHERE id(victim) = idFrom('character', murder.character_id) MATCH (attacker) WHERE id(attacker) = idFrom('character', murder.attacker_character_id) MATCH (weapon) WHERE id(weapon) = idFrom('weapon', murder.attacker_weapon_id) SET weapon.uninitialized = weapon.weapon_id IS NULL // flag the weapon for initialization if applicable SET victim:character, attacker:character, weapon:weapon, victim.character_id = murder.character_id, attacker.character_id = murder.attacker_character_id, weapon.weapon_id = murder.attacker_weapon_id CREATE (victim)<-[:victim]-(murder)-[:attacker]->(attacker), (murder)-[:weapon]->(weapon) // characters contain mutable data, eg certs. We'll add the timestamp to give us something to hook for refreshing data WITH murder, victim, attacker UNWIND [victim, attacker] AS character SET character.last_update = murder.timestamp standingQueries: # Populate character data - pattern: type: Cypher # match each new character-label node query: MATCH (newCharacter:character) WHERE newCharacter.character_id IS NOT NULL RETURN DISTINCT id(newCharacter) AS id outputs: populate-fresh-character: type: CypherQuery query: |- MATCH (c) WHERE id(c) = $that.data.id CALL loadJsonLines("https://census.daybreakgames.com/s:example/get/ps2:v2/character/?character_id="+c.character_id) YIELD value SET c += COALESCE(value.character_list[0], {}) // there should always be a "character_list" with exactly 1 value: the character we queried # Populate weapon data - pattern: type: Cypher query: MATCH (weapon:weapon) WHERE weapon.uninitialized = true AND weapon.weapon_id IS NOT NULL RETURN DISTINCT id(weapon) AS id outputs: populate-weapon: type: CypherQuery query: |- MATCH (weapon) WHERE id(weapon) = $that.data.id CALL loadJsonLines("https://census.daybreakgames.com/s:example/get/ps2:v2/item?item_id="+weapon.weapon_id+"&c:join=weapon_datasheet") YIELD value SET weapon += COALESCE(value.item_list[0], {}) // there should always be a "item_list" with exactly 1 value: the weapon we queried REMOVE weapon.uninitialized # Future Standing Query idea: monitor for "trades" (ie, when two players kill each other simultaneously) nodeAppearances: - predicate: propertyKeys: [] dbLabel: character knownValues: {} icon: ion-android-person - predicate: propertyKeys: [] dbLabel: murder knownValues: {} icon: "\u2694\uFE0F" - predicate: propertyKeys: [] dbLabel: weapon knownValues: {} icon: "\uD83D\uDD2B" - predicate: propertyKeys: [] dbLabels: [] knownValues: {} quickQueries: [] sampleQueries: [] ================================================ FILE: quine/recipes/quine-logs-recipe.yaml ================================================ version: 1 title: Quine Log Reader contributor: https://github.com/maglietti summary: "Ingest Quine Log Lines" description: |- "This recipe processes Quine log lines using a regular expression. or pass `-Dthatdot.loglevel=DEBUG` to java when at runtime." ingestStreams: - type: FileIngest path: $in_file format: type: CypherLine query: |- // Quine log pattern "%date %level [%mdc{pekkoSource:-NotFromActor}] [%thread] %logger - %msg%n%ex" WITH text.regexFirstMatch($that, "(^\\d{4}-\\d{2}-\\d{2} \\d{1,2}:\\d{2}:\\d{2},\\d{3}) (FATAL|ERROR|WARN|INFO|DEBUG) \\[(\\S*)\\] \\[(\\S*)\\] (\\S*) - (.*)") AS r WHERE r IS NOT NULL // 0: whole matched line // 1: date time string // 2: log level // 3: actor address. Might be inside of `org.apache.pekko.stream.Log(...)` // 4: thread name // 5: logging class // 6: Message WITH r, split(r[3], "/") as path, split(r[6], "(") as msgPts WITH r, path, msgPts, replace(COALESCE(split(path[2], "@")[-1], 'No host'),")","") as qh MATCH (actor), (msg), (class), (host) WHERE id(host) = idFrom("host", qh) AND id(actor) = idFrom("actor", r[3]) AND id(msg) = idFrom("msg", r[0]) AND id(class) = idFrom("class", r[5]) SET host.address = split(qh, ":")[0], host.port = split(qh, ":")[-1], host.host = qh, host: Host SET actor.address = r[3], actor.id = replace(path[-1],")",""), actor.shard = path[-2], actor.type = path[-3], actor: Actor SET msg.msg = r[6], msg.path = path[0], msg.type = split(msgPts[0], " ")[0], msg.level = r[2], msg: Message SET class.class = r[5], class: Class WITH * CALL reify.time(datetime({date: localdatetime(r[1], "yyyy-MM-dd HH:mm:ss,SSS")})) YIELD node AS time CREATE (host)<-[:ON_HOST]-(actor)-[:SENT]->(msg), (actor)-[:OF_CLASS]->(class), (msg)-[:AT_TIME]->(time) standingQueries: [] nodeAppearances: - predicate: propertyKeys: [] knownValues: {} dbLabel: Actor label: type: Property key: id prefix: "Actor: " - predicate: propertyKeys: [] knownValues: {} dbLabel: Message label: type: Property key: type prefix: "Message: " - predicate: propertyKeys: [] knownValues: {} dbLabel: Host label: type: Property key: host prefix: "Host: " - predicate: propertyKeys: [] knownValues: {} dbLabel: Class label: type: Property key: class prefix: "Class: " quickQueries: - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Adjacent Nodes querySuffix: MATCH (n)--(m) RETURN DISTINCT m queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Refresh querySuffix: RETURN n queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Local Properties querySuffix: RETURN id(n), properties(n) queryLanguage: Cypher sort: Text - predicate: propertyKeys: [] knownValues: {} dbLabel: Actor quickQuery: name: Associated Host querySuffix: MATCH (n)-[:ON_HOST]->(host) RETURN DISTINCT host queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} dbLabel: Actor quickQuery: name: One Associated Message querySuffix: MATCH (n)-[:SENT]->(msg) RETURN DISTINCT msg LIMIT 1 queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} dbLabel: Actor quickQuery: name: Associated Class querySuffix: MATCH (n)-[:OF_CLASS]->(class) RETURN DISTINCT class LIMIT 1 queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} dbLabel: Message quickQuery: name: Associated Actor querySuffix: MATCH (actor)-[:SENT]->(n) RETURN DISTINCT actor LIMIT 1 queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} dbLabel: Message quickQuery: name: Associated Host querySuffix: MATCH (host)<-[:ON_HOST]-(actor)-[:SENT]->(n) RETURN DISTINCT host LIMIT 1 queryLanguage: Cypher sort: Node edgeLabel: ON_HOST sampleQueries: - name: Last 10 Nodes query: CALL recentNodes(10) - name: Get Actors query: "MATCH (a: Actor) RETURN a" statusQuery: ================================================ FILE: quine/recipes/sq-test.yaml ================================================ version: 1 title: Standing Query Test Recipe contributor: https://github.com/rrwright summary: Create a mathematically defined graph and count the number of Standing Query results. description: iconImage: ingestStreams: - format: query: |- WITH gen.node.from(toInteger($that)) AS n, toInteger($that) AS i MATCH (thisNode), (nextNode), (divNode) WHERE id(thisNode) = id(n) AND id(nextNode) = idFrom(i + 1) AND id(divNode) = idFrom(i / 10) SET thisNode.id = i, thisNode.prop = gen.string.from(i) CREATE (thisNode)-[:next]->(nextNode), (thisNode)-[:div_by_ten]->(divNode) type: CypherLine type: NumberIteratorIngest ingestLimit: 100000 standingQueries: - pattern: query: |- MATCH (a)-[:div_by_ten]->(b)-[:div_by_ten]->(c) WHERE c.prop IS NOT NULL RETURN DISTINCT id(c) as id type: Cypher outputs: count-1000-results: type: Drop #inspect-results: # type: CypherQuery # query: |- # MATCH (a)-[:div_by_ten]->(b)-[:div_by_ten]->(c) # WHERE id(c) = $that.data.id # RETURN a.id, a.prop, b.id, b.prop, c.id, c.prop # andThen: # type: PrintToStandardOut nodeAppearances: - predicate: propertyKeys: - id knownValues: {} label: prefix: '' key: id type: Property quickQueries: - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Adjacent Nodes querySuffix: MATCH (n)--(m) RETURN DISTINCT m queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Refresh querySuffix: RETURN n queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Local Properties querySuffix: RETURN id(n), properties(n) queryLanguage: Cypher sort: Text - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Multiply by 10 querySuffix: MATCH (n)<-[:div_by_ten]-(m) RETURN m queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Divide by 10 querySuffix: MATCH (n)-[:div_by_ten]->(m) RETURN m queryLanguage: Cypher sort: Node - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Subscriber Results querySuffix: CALL subscribers(n) yield queryId, queryDepth, receiverId, lastResult RETURN queryId, queryDepth, receiverId, lastResult queryLanguage: Cypher sort: Text - predicate: propertyKeys: [] knownValues: {} quickQuery: name: Subscription Results querySuffix: CALL subscriptions(n) yield queryId, queryDepth, receiverId, lastResult RETURN queryId, queryDepth, receiverId, lastResult queryLanguage: Cypher sort: Text sampleQueries: [] ================================================ FILE: quine/recipes/template-recipe.yaml ================================================ # Recipe schema version (currently only supported value is 1; 🎉) version: 1 # Identifies the Recipe but is not necessarily unique or immutable title: Template Recipe # URL to social profile of the person or organization responsible for this Recipe contributor: https://github.com/example-user # Brief copy about this Recipe summary: This is a valid Recipe that ingests and writes back a file # Longer form copy about this Recipe description: |- This Recipe description has multiple lines. # Ingest Streams define how data is processed and transformed ingestStreams: - type: FileIngest path: $in_file format: type: CypherLine query: |- MATCH (n) WHERE id(n) = idFrom($that) SET n.line = $that # Standing Queries define how data is transformed and output. standingQueries: - pattern: type: Cypher query: MATCH (n) RETURN DISTINCT id(n) AS id outputs: output-1: type: CypherQuery query: |- MATCH (n) WHERE id(n) = $that.data.id RETURN n.line andThen: type: WriteToFile path: $out_file # Customize node appearance in web UI. nodeAppearances: [ ] # Add queries to node context menus in web UI quickQueries: [ ] # Customize sample queries listed in web UI sampleQueries: - name: Nodes query: MATCH(n) RETURN n LIMIT 10 - name: Count Nodes query: MATCH (n) RETURN count(n) # Optional Cypher query to be executed and reported to the Recipe user statusQuery: cypherQuery: MATCH (n) RETURN n LIMIT 10 ================================================ FILE: quine/recipes/webhook.yaml ================================================ version: 1 title: Data Enrichment with Webhooks contributor: https://github.com/mastapegs summary: Stream numbers into graph and notify HTTP endpoint to enrich graph description: |- This recipe will stream numbers into the graph and stream them out to an HTTP endpoint, which will then calculate the factors of those numbers, and create relationships between the numbers and their factors. ingestStreams: - type: NumberIteratorIngest startAtOffset: 1 ingestLimit: 13 format: type: CypherLine query: |- WITH toInteger($that) AS number MATCH (n) WHERE id(n) = idFrom("Number", number) SET n:Number, n.number = number standingQueries: - pattern: type: Cypher mode: DistinctId query: |- MATCH (n:Number) WHERE n.number IS NOT NULL RETURN DISTINCT id(n) AS id outputs: log-to-console: type: CypherQuery query: |- MATCH (n:Number) WHERE id(n) = $that.data.id RETURN n.number AS number, $that.data.id AS id andThen: type: PrintToStandardOut post-to-webhook: type: CypherQuery query: |- MATCH (n:Number) WHERE id(n) = $that.data.id RETURN n.number AS number, $that.data.id AS id andThen: type: PostToEndpoint url: http://127.0.0.1:3000/webhook nodeAppearances: - predicate: propertyKeys: [] knownValues: {} dbLabel: Number label: type: Property key: number prefix: "Number: " quickQueries: [] sampleQueries: - name: Return all Number nodes query: MATCH (n:Number) RETURN n statusQuery: null ================================================ FILE: quine/recipes/wikipedia-non-bot-revisions.yaml ================================================ version: 1 title: Wikipedia non-bot page update event stream contributor: https://github.com/thatdot summary: Stream page-update events that were not created by bots description: |- This recipe will separate human generated events from bot generated events in the english wikipedia database page-update event stream and store them for additional processing. API Reference: https://stream.wikimedia.org/?doc#/streams/get_v2_stream_mediawiki_revision_create ingestStreams: - type: ServerSentEventsIngest url: https://stream.wikimedia.org/v2/stream/mediawiki.revision-create format: type: CypherJson parameter: that query: |- MATCH (revNode),(pageNode),(dbNode),(userNode),(parentNode) WHERE id(revNode) = idFrom('revision', $that.rev_id) AND id(pageNode) = idFrom('page', $that.page_id) AND id(dbNode) = idFrom('db', $that.database) AND id(userNode) = idFrom('id', $that.performer.user_id) AND id(parentNode) = idFrom('revision', $that.rev_parent_id) SET revNode = $that, revNode.bot = $that.performer.user_is_bot, revNode:revision SET parentNode.rev_id = $that.rev_parent_id SET pageNode.id = $that.page_id, pageNode.namespace = $that.page_namespace, pageNode.title = $that.page_title, pageNode.comment = $that.comment, pageNode.is_redirect = $that.page_is_redirect, pageNode:page SET dbNode.database = $that.database, dbNode:db SET userNode = $that.performer, userNode.name = $that.performer.user_text, userNode:user CREATE (revNode)-[:TO]->(pageNode), (pageNode)-[:IN]->(dbNode), (userNode)-[:RESPONSIBLE_FOR]->(revNode), (parentNode)-[:NEXT]->(revNode) standingQueries: - pattern: query: |- MATCH (userNode:user {user_is_bot: false})-[:RESPONSIBLE_FOR]->(revNode:revision {database: 'enwiki'}) RETURN DISTINCT id(revNode) as id type: Cypher outputs: print-output: type: CypherQuery query: |- MATCH (n) WHERE id(n) = $that.data.id RETURN properties(n) andThen: type: PrintToStandardOut nodeAppearances: [ ] quickQueries: [ ] sampleQueries: [ ] statusQuery: null ================================================ FILE: quine/recipes/wikipedia.yaml ================================================ version: 1 title: Ingest Wikipedia Page Create stream contributor: https://github.com/landon9720 summary: Consume events about new Wikipedia pages to build a time series reified graph description: |- Wikipedia page creation events are instantiated in the graph with relationships to a reified time model. Additionally, page creation event comments are echoed to standard output. Data source documentation: https://stream.wikimedia.org/?doc#/streams/get_v2_stream_page_create ingestStreams: - type: ServerSentEventsIngest url: https://stream.wikimedia.org/v2/stream/page-create format: type: CypherJson query: |- MATCH (revNode), (dbNode), (userNode) WHERE id(revNode) = idFrom("revision", $that.rev_id) AND id(dbNode) = idFrom("db", $that.database) AND id(userNode) = idFrom("id", $that.performer.user_id) // Set labels for nodes // CALL create.setLabels(revNode, ["rev:" + $that.page_title]) CALL create.setLabels(dbNode, ["db:" + $that.database]) CALL create.setLabels(userNode, ["user:" + $that.performer.user_text]) // Create timeNode node to provide day/hour/minute bucketing and counting of revNodes // CALL reify.time(datetime($that.rev_timestamp), ["year", "month", "day", "hour", "minute", "second"]) YIELD node AS timeNode CALL incrementCounter(timeNode, "count", 1) YIELD count AS timeNodeCount // Set properties for nodes // SET revNode = $that, revNode.type = "rev" SET dbNode.database = $that.database, dbNode.type = "db" SET userNode = $that.performer, userNode.type = "user" // Create edges between nodes // CREATE (revNode)-[:DB]->(dbNode), (revNode)-[:BY]->(userNode), (revNode)-[:AT]->(timeNode) standingQueries: - pattern: type: Cypher query: |- MATCH (n) WHERE n.comment IS NOT NULL RETURN DISTINCT id(n) AS id outputs: output-1: type: CypherQuery query: |- MATCH (n) WHERE id(n) = $that.data.id RETURN n.comment AS line andThen: type: PrintToStandardOut nodeAppearances: [] quickQueries: [] sampleQueries: - name: Show time nodes query: > MATCH (n) WHERE n.period IS NOT NULL RETURN n - name: Show revision nodes query: > MATCH (n) WHERE n.type = "rev" RETURN n - name: Show database nodes query: > MATCH (n) WHERE n.type = "db" RETURN n - name: Show user nodes query: > MATCH (n) WHERE n.type = "user" RETURN n ================================================ FILE: quine/src/main/resources/ionicons.tsv ================================================ ion-alert  ion-alert-circled  ion-android-add  ion-android-add-circle  ion-android-alarm-clock  ion-android-alert  ion-android-apps  ion-android-archive  ion-android-arrow-back  ion-android-arrow-down  ion-android-arrow-dropdown  ion-android-arrow-dropdown-circle  ion-android-arrow-dropleft  ion-android-arrow-dropleft-circle  ion-android-arrow-dropright  ion-android-arrow-dropright-circle  ion-android-arrow-dropup  ion-android-arrow-dropup-circle  ion-android-arrow-forward  ion-android-arrow-up  ion-android-attach  ion-android-bar  ion-android-bicycle  ion-android-boat  ion-android-bookmark  ion-android-bulb  ion-android-bus  ion-android-calendar  ion-android-call  ion-android-camera  ion-android-cancel  ion-android-car  ion-android-cart  ion-android-chat  ion-android-checkbox  ion-android-checkbox-blank  ion-android-checkbox-outline  ion-android-checkbox-outline-blank  ion-android-checkmark-circle  ion-android-clipboard  ion-android-close  ion-android-cloud  ion-android-cloud-circle  ion-android-cloud-done  ion-android-cloud-outline  ion-android-color-palette  ion-android-compass  ion-android-contact  ion-android-contacts  ion-android-contract  ion-android-create  ion-android-delete  ion-android-desktop  ion-android-document  ion-android-done  ion-android-done-all  ion-android-download  ion-android-drafts  ion-android-exit  ion-android-expand  ion-android-favorite  ion-android-favorite-outline  ion-android-film  ion-android-folder  ion-android-folder-open  ion-android-funnel  ion-android-globe  ion-android-hand  ion-android-hangout  ion-android-happy  ion-android-home  ion-android-image  ion-android-laptop  ion-android-list  ion-android-locate  ion-android-lock  ion-android-mail  ion-android-map  ion-android-menu  ion-android-microphone  ion-android-microphone-off  ion-android-more-horizontal  ion-android-more-vertical  ion-android-navigate  ion-android-notifications  ion-android-notifications-none  ion-android-notifications-off  ion-android-open  ion-android-options  ion-android-people  ion-android-person  ion-android-person-add  ion-android-phone-landscape  ion-android-phone-portrait  ion-android-pin  ion-android-plane  ion-android-playstore  ion-android-print  ion-android-radio-button-off  ion-android-radio-button-on  ion-android-refresh  ion-android-remove  ion-android-remove-circle  ion-android-restaurant  ion-android-sad  ion-android-search  ion-android-send  ion-android-settings  ion-android-share  ion-android-share-alt  ion-android-star  ion-android-star-half  ion-android-star-outline  ion-android-stopwatch  ion-android-subway  ion-android-sunny  ion-android-sync  ion-android-textsms  ion-android-time  ion-android-train  ion-android-unlock  ion-android-upload  ion-android-volume-down  ion-android-volume-mute  ion-android-volume-off  ion-android-volume-up  ion-android-walk  ion-android-warning  ion-android-watch  ion-android-wifi  ion-aperture  ion-archive  ion-arrow-down-a  ion-arrow-down-b  ion-arrow-down-c  ion-arrow-expand  ion-arrow-graph-down-left  ion-arrow-graph-down-right  ion-arrow-graph-up-left  ion-arrow-graph-up-right  ion-arrow-left-a  ion-arrow-left-b  ion-arrow-left-c  ion-arrow-move  ion-arrow-resize  ion-arrow-return-left  ion-arrow-return-right  ion-arrow-right-a  ion-arrow-right-b  ion-arrow-right-c  ion-arrow-shrink  ion-arrow-swap  ion-arrow-up-a  ion-arrow-up-b  ion-arrow-up-c  ion-asterisk  ion-at  ion-backspace  ion-backspace-outline  ion-bag  ion-battery-charging  ion-battery-empty  ion-battery-full  ion-battery-half  ion-battery-low  ion-beaker  ion-beer  ion-bluetooth  ion-bonfire  ion-bookmark  ion-bowtie  ion-briefcase  ion-bug  ion-calculator  ion-calendar  ion-camera  ion-card  ion-cash  ion-chatbox  ion-chatbox-working  ion-chatboxes  ion-chatbubble  ion-chatbubble-working  ion-chatbubbles  ion-checkmark  ion-checkmark-circled  ion-checkmark-round  ion-chevron-down  ion-chevron-left  ion-chevron-right  ion-chevron-up  ion-clipboard  ion-clock  ion-close  ion-close-circled  ion-close-round  ion-closed-captioning  ion-cloud  ion-code  ion-code-download  ion-code-working  ion-coffee  ion-compass  ion-compose  ion-connection-bars  ion-contrast  ion-crop  ion-cube  ion-disc  ion-document  ion-document-text  ion-drag  ion-earth  ion-easel  ion-edit  ion-egg  ion-eject  ion-email  ion-email-unread  ion-erlenmeyer-flask  ion-erlenmeyer-flask-bubbles  ion-eye  ion-eye-disabled  ion-female  ion-filing  ion-film-marker  ion-fireball  ion-flag  ion-flame  ion-flash  ion-flash-off  ion-folder  ion-fork  ion-fork-repo  ion-forward  ion-funnel  ion-gear-a  ion-gear-b  ion-grid  ion-hammer  ion-happy  ion-happy-outline  ion-headphone  ion-heart  ion-heart-broken  ion-help  ion-help-buoy  ion-help-circled  ion-home  ion-icecream  ion-image  ion-images  ion-information  ion-information-circled  ion-ionic  ion-ios-alarm  ion-ios-alarm-outline  ion-ios-albums  ion-ios-albums-outline  ion-ios-americanfootball  ion-ios-americanfootball-outline  ion-ios-analytics  ion-ios-analytics-outline  ion-ios-arrow-back  ion-ios-arrow-down  ion-ios-arrow-forward  ion-ios-arrow-left  ion-ios-arrow-right  ion-ios-arrow-thin-down  ion-ios-arrow-thin-left  ion-ios-arrow-thin-right  ion-ios-arrow-thin-up  ion-ios-arrow-up  ion-ios-at  ion-ios-at-outline  ion-ios-barcode  ion-ios-barcode-outline  ion-ios-baseball  ion-ios-baseball-outline  ion-ios-basketball  ion-ios-basketball-outline  ion-ios-bell  ion-ios-bell-outline  ion-ios-body  ion-ios-body-outline  ion-ios-bolt  ion-ios-bolt-outline  ion-ios-book  ion-ios-book-outline  ion-ios-bookmarks  ion-ios-bookmarks-outline  ion-ios-box  ion-ios-box-outline  ion-ios-briefcase  ion-ios-briefcase-outline  ion-ios-browsers  ion-ios-browsers-outline  ion-ios-calculator  ion-ios-calculator-outline  ion-ios-calendar  ion-ios-calendar-outline  ion-ios-camera  ion-ios-camera-outline  ion-ios-cart  ion-ios-cart-outline  ion-ios-chatboxes  ion-ios-chatboxes-outline  ion-ios-chatbubble  ion-ios-chatbubble-outline  ion-ios-checkmark  ion-ios-checkmark-empty  ion-ios-checkmark-outline  ion-ios-circle-filled  ion-ios-circle-outline  ion-ios-clock  ion-ios-clock-outline  ion-ios-close  ion-ios-close-empty  ion-ios-close-outline  ion-ios-cloud  ion-ios-cloud-download  ion-ios-cloud-download-outline  ion-ios-cloud-outline  ion-ios-cloud-upload  ion-ios-cloud-upload-outline  ion-ios-cloudy  ion-ios-cloudy-night  ion-ios-cloudy-night-outline  ion-ios-cloudy-outline  ion-ios-cog  ion-ios-cog-outline  ion-ios-color-filter  ion-ios-color-filter-outline  ion-ios-color-wand  ion-ios-color-wand-outline  ion-ios-compose  ion-ios-compose-outline  ion-ios-contact  ion-ios-contact-outline  ion-ios-copy  ion-ios-copy-outline  ion-ios-crop  ion-ios-crop-strong  ion-ios-download  ion-ios-download-outline  ion-ios-drag  ion-ios-email  ion-ios-email-outline  ion-ios-eye  ion-ios-eye-outline  ion-ios-fastforward  ion-ios-fastforward-outline  ion-ios-filing  ion-ios-filing-outline  ion-ios-film  ion-ios-film-outline  ion-ios-flag  ion-ios-flag-outline  ion-ios-flame  ion-ios-flame-outline  ion-ios-flask  ion-ios-flask-outline  ion-ios-flower  ion-ios-flower-outline  ion-ios-folder  ion-ios-folder-outline  ion-ios-football  ion-ios-football-outline  ion-ios-game-controller-a  ion-ios-game-controller-a-outline  ion-ios-game-controller-b  ion-ios-game-controller-b-outline  ion-ios-gear  ion-ios-gear-outline  ion-ios-glasses  ion-ios-glasses-outline  ion-ios-grid-view  ion-ios-grid-view-outline  ion-ios-heart  ion-ios-heart-outline  ion-ios-help  ion-ios-help-empty  ion-ios-help-outline  ion-ios-home  ion-ios-home-outline  ion-ios-infinite  ion-ios-infinite-outline  ion-ios-information  ion-ios-information-empty  ion-ios-information-outline  ion-ios-ionic-outline  ion-ios-keypad  ion-ios-keypad-outline  ion-ios-lightbulb  ion-ios-lightbulb-outline  ion-ios-list  ion-ios-list-outline  ion-ios-location  ion-ios-location-outline  ion-ios-locked  ion-ios-locked-outline  ion-ios-loop  ion-ios-loop-strong  ion-ios-medical  ion-ios-medical-outline  ion-ios-medkit  ion-ios-medkit-outline  ion-ios-mic  ion-ios-mic-off  ion-ios-mic-outline  ion-ios-minus  ion-ios-minus-empty  ion-ios-minus-outline  ion-ios-monitor  ion-ios-monitor-outline  ion-ios-moon  ion-ios-moon-outline  ion-ios-more  ion-ios-more-outline  ion-ios-musical-note  ion-ios-musical-notes  ion-ios-navigate  ion-ios-navigate-outline  ion-ios-nutrition  ion-ios-nutrition-outline  ion-ios-paper  ion-ios-paper-outline  ion-ios-paperplane  ion-ios-paperplane-outline  ion-ios-partlysunny  ion-ios-partlysunny-outline  ion-ios-pause  ion-ios-pause-outline  ion-ios-paw  ion-ios-paw-outline  ion-ios-people  ion-ios-people-outline  ion-ios-person  ion-ios-person-outline  ion-ios-personadd  ion-ios-personadd-outline  ion-ios-photos  ion-ios-photos-outline  ion-ios-pie  ion-ios-pie-outline  ion-ios-pint  ion-ios-pint-outline  ion-ios-play  ion-ios-play-outline  ion-ios-plus  ion-ios-plus-empty  ion-ios-plus-outline  ion-ios-pricetag  ion-ios-pricetag-outline  ion-ios-pricetags  ion-ios-pricetags-outline  ion-ios-printer  ion-ios-printer-outline  ion-ios-pulse  ion-ios-pulse-strong  ion-ios-rainy  ion-ios-rainy-outline  ion-ios-recording  ion-ios-recording-outline  ion-ios-redo  ion-ios-redo-outline  ion-ios-refresh  ion-ios-refresh-empty  ion-ios-refresh-outline  ion-ios-reload  ion-ios-reverse-camera  ion-ios-reverse-camera-outline  ion-ios-rewind  ion-ios-rewind-outline  ion-ios-rose  ion-ios-rose-outline  ion-ios-search  ion-ios-search-strong  ion-ios-settings  ion-ios-settings-strong  ion-ios-shuffle  ion-ios-shuffle-strong  ion-ios-skipbackward  ion-ios-skipbackward-outline  ion-ios-skipforward  ion-ios-skipforward-outline  ion-ios-snowy  ion-ios-speedometer  ion-ios-speedometer-outline  ion-ios-star  ion-ios-star-half  ion-ios-star-outline  ion-ios-stopwatch  ion-ios-stopwatch-outline  ion-ios-sunny  ion-ios-sunny-outline  ion-ios-telephone  ion-ios-telephone-outline  ion-ios-tennisball  ion-ios-tennisball-outline  ion-ios-thunderstorm  ion-ios-thunderstorm-outline  ion-ios-time  ion-ios-time-outline  ion-ios-timer  ion-ios-timer-outline  ion-ios-toggle  ion-ios-toggle-outline  ion-ios-trash  ion-ios-trash-outline  ion-ios-undo  ion-ios-undo-outline  ion-ios-unlocked  ion-ios-unlocked-outline  ion-ios-upload  ion-ios-upload-outline  ion-ios-videocam  ion-ios-videocam-outline  ion-ios-volume-high  ion-ios-volume-low  ion-ios-wineglass  ion-ios-wineglass-outline  ion-ios-world  ion-ios-world-outline  ion-ipad  ion-iphone  ion-ipod  ion-jet  ion-key  ion-knife  ion-laptop  ion-leaf  ion-levels  ion-lightbulb  ion-link  ion-load-a  ion-load-b  ion-load-c  ion-load-d  ion-location  ion-lock-combination  ion-locked  ion-log-in  ion-log-out  ion-loop  ion-magnet  ion-male  ion-man  ion-map  ion-medkit  ion-merge  ion-mic-a  ion-mic-b  ion-mic-c  ion-minus  ion-minus-circled  ion-minus-round  ion-model-s  ion-monitor  ion-more  ion-mouse  ion-music-note  ion-navicon  ion-navicon-round  ion-navigate  ion-network  ion-no-smoking  ion-nuclear  ion-outlet  ion-paintbrush  ion-paintbucket  ion-paper-airplane  ion-paperclip  ion-pause  ion-person  ion-person-add  ion-person-stalker  ion-pie-graph  ion-pin  ion-pinpoint  ion-pizza  ion-plane  ion-planet  ion-play  ion-playstation  ion-plus  ion-plus-circled  ion-plus-round  ion-podium  ion-pound  ion-power  ion-pricetag  ion-pricetags  ion-printer  ion-pull-request  ion-qr-scanner  ion-quote  ion-radio-waves  ion-record  ion-refresh  ion-reply  ion-reply-all  ion-ribbon-a  ion-ribbon-b  ion-sad  ion-sad-outline  ion-scissors  ion-search  ion-settings  ion-share  ion-shuffle  ion-skip-backward  ion-skip-forward  ion-social-android  ion-social-android-outline  ion-social-angular  ion-social-angular-outline  ion-social-apple  ion-social-apple-outline  ion-social-bitcoin  ion-social-bitcoin-outline  ion-social-buffer  ion-social-buffer-outline  ion-social-chrome  ion-social-chrome-outline  ion-social-codepen  ion-social-codepen-outline  ion-social-css3  ion-social-css3-outline  ion-social-designernews  ion-social-designernews-outline  ion-social-dribbble  ion-social-dribbble-outline  ion-social-dropbox  ion-social-dropbox-outline  ion-social-euro  ion-social-euro-outline  ion-social-facebook  ion-social-facebook-outline  ion-social-foursquare  ion-social-foursquare-outline  ion-social-freebsd-devil  ion-social-github  ion-social-github-outline  ion-social-google  ion-social-google-outline  ion-social-googleplus  ion-social-googleplus-outline  ion-social-hackernews  ion-social-hackernews-outline  ion-social-html5  ion-social-html5-outline  ion-social-instagram  ion-social-instagram-outline  ion-social-javascript  ion-social-javascript-outline  ion-social-linkedin  ion-social-linkedin-outline  ion-social-markdown  ion-social-nodejs  ion-social-octocat  ion-social-pinterest  ion-social-pinterest-outline  ion-social-python  ion-social-reddit  ion-social-reddit-outline  ion-social-rss  ion-social-rss-outline  ion-social-sass  ion-social-skype  ion-social-skype-outline  ion-social-snapchat  ion-social-snapchat-outline  ion-social-tumblr  ion-social-tumblr-outline  ion-social-tux  ion-social-twitch  ion-social-twitch-outline  ion-social-twitter  ion-social-twitter-outline  ion-social-usd  ion-social-usd-outline  ion-social-vimeo  ion-social-vimeo-outline  ion-social-whatsapp  ion-social-whatsapp-outline  ion-social-windows  ion-social-windows-outline  ion-social-wordpress  ion-social-wordpress-outline  ion-social-yahoo  ion-social-yahoo-outline  ion-social-yen  ion-social-yen-outline  ion-social-youtube  ion-social-youtube-outline  ion-soup-can  ion-soup-can-outline  ion-speakerphone  ion-speedometer  ion-spoon  ion-star  ion-stats-bars  ion-steam  ion-stop  ion-thermometer  ion-thumbsdown  ion-thumbsup  ion-toggle  ion-toggle-filled  ion-transgender  ion-trash-a  ion-trash-b  ion-trophy  ion-tshirt  ion-tshirt-outline  ion-umbrella  ion-university  ion-unlocked  ion-upload  ion-usb  ion-videocamera  ion-volume-high  ion-volume-low  ion-volume-medium  ion-volume-mute  ion-wand  ion-waterdrop  ion-wifi  ion-wineglass  ion-woman  ion-wrench  ion-xbox  ================================================ FILE: quine/src/main/resources/reference.conf ================================================ include classpath("quine-pekko-overrides") pekko { // This timeout controls the browsers timeout when waiting for API responses to return. The current value is arbitrary. http.server.request-timeout = 300 seconds // This timeout is used by the stream reading data for the S3 Ingest. http.client.stream-cancellation-delay = 10 seconds // SSE (Server-Sent Events) configuration for larger event payloads // These defaults can be overridden by user configuration http.sse { // Maximum size of a single SSE line (default in Pekko is 4096 bytes) max-line-size = 5242880 // 5 MB // Maximum size of a single SSE event (default in Pekko is 8192 bytes) // Must be larger than max-line-size max-event-size = 5242881 // 5 MB + 1 byte } coordinated-shutdown.exit-jvm = true } datastax-java-driver { advanced { connection { // NoNodeAvailableException is thrown when this is exceeded. // For more info, see: // https://community.datastax.com/questions/5204/approaches-to-accommodating-the-1024-connection-li.html max-requests-per-connection = 50000 } metrics { session.enabled = [cql-requests, bytes-sent, bytes-received] node.enabled = [pool.available-streams, pool.in-flight] } } } ================================================ FILE: quine/src/main/resources/web/browserconfig.xml ================================================ #da532c ================================================ FILE: quine/src/main/resources/web/quine-ui-startup.js ================================================ // Given some value meant to represent time, return either integer milliseconds or undefined function parseMillis(atTime) { if (atTime === undefined || atTime === null) return undefined; // Input is a string number var isPositiveNumberString = typeof (atTime) === "string" && atTime.match(/^\d+$/); if (isPositiveNumberString) return Number.parseInt(atTime); // Try to parse a date var dateStringMillis = Date.parse(atTime); if (!isNaN(dateStringMillis)) return dateStringMillis; return undefined; } var network = undefined; var urlParams = new URLSearchParams(window.location.search); // Template variable - replaced by backend with config value // WARNING: Do NOT change the 'true' literal below! The backend searches for the exact string // "/*{{DEFAULT_V2_API}}*/true" and replaces it with the config value (true or false). // See: BaseAppRoutes.scala:50 - content.replace("/*{{DEFAULT_V2_API}}*/true", defaultV2Api.toString) var defaultQueriesOverV2Api = /*{{DEFAULT_V2_API}}*/true; var apiPaths = ["dashboard", "v2docs", "docs"]; function deriveProxySafeBaseURI() { return apiPaths.reduce((incrementalDerivationString, terminalPath) => { var regexA = new RegExp(`${terminalPath}$`); var regexB = new RegExp(`${terminalPath}\/$`); return incrementalDerivationString.replace(regexA,"").replace(regexB,""); }, window.location.pathname); }; var derivedBaseURI = deriveProxySafeBaseURI(); window.onload = function() { quineBrowser.quineAppMount(document.getElementById("root"), { initialQuery: decodeURIComponent(window.location.hash.replace(/^#/, "")), isQueryBarVisible: urlParams.get("interactive") != "false", layout: urlParams.get("layout") || "graph", queriesOverWs: urlParams.get("wsQueries") != "false", queriesOverV2Api: urlParams.get("v2Api") !== null ? urlParams.get("v2Api") != "false" : defaultQueriesOverV2Api, queryHistoricalTime: parseMillis(urlParams.get("atTime")), onNetworkCreate: function(n) { network = n; }, documentationUrl: "docs/openapi.json?relative=true", documentationV2Url: "api/v2/openapi.json", baseURI: derivedBaseURI, serverUrl: derivedBaseURI.replace(/\/$/, ""), }); }; ================================================ FILE: quine/src/main/resources/web/quine-ui.html ================================================ Quine
================================================ FILE: quine/src/main/resources/web/site.webmanifest ================================================ { "name": "", "short_name": "", "icons": [ { "src": "/android-chrome-192x192.png", "sizes": "192x192", "type": "image/png" }, { "src": "/android-chrome-512x512.png", "sizes": "512x512", "type": "image/png" } ], "theme_color": "#ffffff", "background_color": "#ffffff", "display": "standalone" } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/BaseApp.scala ================================================ package com.thatdot.quine.app import java.nio.charset.StandardCharsets.UTF_8 import scala.concurrent.{ExecutionContext, Future} import scala.util.{Failure, Try} import org.apache.pekko.stream.Materializer import cats.data.Validated.invalidNel import cats.data.ValidatedNel import endpoints4s.{Invalid, Valid, Validated} import io.circe.{Encoder, jawn} import com.thatdot.quine.app.QuineApp.{V2IngestStreamsKey, makeNamespaceMetaDataKey} import com.thatdot.quine.exceptions import com.thatdot.quine.exceptions.NamespaceNotFoundException import com.thatdot.quine.graph.{BaseGraph, MemberIdx, NamespaceId} import com.thatdot.quine.serialization.EncoderDecoder import com.thatdot.quine.util.BaseError /** Applications running over top of Quine should define an application state that extends this. * Then, individual settings can be stored here (for easy persistence, reset, etc). Under the hood, * this will take advantage of the persistor APIs for meta-data * * @param graph reference to the underlying graph */ abstract class BaseApp(graph: BaseGraph) { val defaultExecutionContext: ExecutionContext = graph.nodeDispatcherEC implicit val materializer: Materializer = graph.materializer /** Store a key-value pair that is relevant only for one particular app instance (i.e. "local") * * @note the value is serialized as the UTF-8 bytes of its JSON representation * @param key name of the setting * @param value setting value */ final protected def storeLocalMetaData[A: EncoderDecoder]( key: String, localMemberId: MemberIdx, value: A, ): Future[Unit] = graph.namespacePersistor.setLocalMetaData(key, localMemberId, Some(encodeMetaData(value))) /** Store a key-value pair that is relevant for the entire graph * * @note the value is serialized as the UTF-8 bytes of its JSON representation * @param key name of the setting * @param value setting value */ final protected def storeGlobalMetaData[A: EncoderDecoder](key: String, value: A): Future[Unit] = graph.namespacePersistor.setMetaData(key, Some(encodeMetaData(value))) final protected def deleteGlobalMetaData(key: String): Future[Unit] = graph.namespacePersistor.setMetaData(key, None) /** Serialize a value intended to be stored as metadata * * @param value the value to be serialized as the UTF-8 bytes of its JSON representation * @param schema an endpoints4s ujson schema derived to provide the string codec * @tparam A The type of the value to be encoded * @return The encoded value as a byte array */ final protected def encodeMetaData[A](value: A)(implicit encoderDecoder: EncoderDecoder[A]): Array[Byte] = encoderDecoder.encoder(value).noSpaces.getBytes(UTF_8) final protected def encodeMetaData[A](value: A, encoder: Encoder[A]): Array[Byte] = encoder(value).noSpaces.getBytes(UTF_8) /** Retrieve a value associated with a key which was stored for the local app * * @note the value is serialized as the UTF-8 bytes of its JSON representation * @param key name of the setting * @return the value, if found */ final protected def getLocalMetaData[A](key: String, localMemberId: MemberIdx)(implicit encoderDecoder: EncoderDecoder[A], ): Future[Option[A]] = graph.namespacePersistor .getLocalMetaData(key, localMemberId) .map { _.flatMap { jsonBytes => Some(validateMetaData(decodeMetaData(jsonBytes)(encoderDecoder))) // throws to fail the future } }(graph.system.dispatcher) /** Retrieve a value associated with a key which was stored for the entire graph * * @note the value is serialized as the UTF-8 bytes of its JSON representation * @param key name of the setting * @return the value, if found */ final protected def getGlobalMetaData[A](key: String)(implicit encoderDecoder: EncoderDecoder[A]): Future[Option[A]] = graph.namespacePersistor .getMetaData(key) .map { _.flatMap { jsonBytes => Some(validateMetaData(decodeMetaData(jsonBytes)(encoderDecoder))) // throws to fail the future } }(graph.system.dispatcher) /** Deserialize a value intended to be stored as metadata * * @param value the value serialized value as the UTF-8 bytes of its JSON representation to be deserialized * @param encoderDecoder a codec * @tparam A The type of the value to be encoded * @return The encoded value as a byte array */ final protected def decodeMetaData[A](jsonBytes: Array[Byte])(implicit encoderDecoder: EncoderDecoder[A], ): Validated[A] = Validated.fromEither(jawn.decodeByteArray(jsonBytes)(encoderDecoder.decoder).left.map(err => Seq(err.toString))) //Codec.sequentially(BaseApp.utf8Codec)(schema.stringCodec).decode(jsonBytes) /** A convenience method for unwrapping the decoded (validated) deserialized value. Throws an exception if invalid. * * @param decoded the deserialized metadata value; likely returned from `decodeMetaData` * @tparam A the type for which the bytes are being deserialized * @throws if the bytes fail to be deserialized as the intended type * @return the deserialized type */ @throws[MetaDataDeserializationException] final def validateMetaData[A](decoded: Validated[A]): A = decoded match { case Valid(value) => value case Invalid(errs) => throw new MetaDataDeserializationException(errs.mkString("\n")) } /** Retrieve a value associated with a key stored for this local app, but write and return in a default value * if the key is not already defined for the local app * * @note the value is serialized as the UTF-8 bytes of its JSON representation * @param key name of the setting * @param defaultValue default setting value * @return the (possibly updated) value */ final protected def getOrDefaultLocalMetaData[A: EncoderDecoder]( key: String, localMemberId: MemberIdx, defaultValue: => A, ): Future[A] = getLocalMetaData[A](key, localMemberId).flatMap { case Some(value) => Future.successful(value) case None => val defaulted = defaultValue storeLocalMetaData(key, localMemberId, defaulted).map(_ => defaulted)(graph.system.dispatcher) }(graph.system.dispatcher) protected def saveV2IngestsToPersistor[IngestWithStatusType: EncoderDecoder]( namespace: NamespaceId, thisMemberIdx: Int, ingests: Map[String, IngestWithStatusType], key: String = V2IngestStreamsKey, ): Future[Unit] = storeLocalMetaData[Map[String, IngestWithStatusType]]( makeNamespaceMetaDataKey(namespace, key), thisMemberIdx, ingests, )(EncoderDecoder.ofMap) protected def loadV2IngestsFromPersistor[IngestWithStatusType: EncoderDecoder]( thisMemberIdx: Int, key: String = V2IngestStreamsKey, )(implicit ex: ExecutionContext): Future[Map[NamespaceId, Map[String, IngestWithStatusType]]] = Future .sequence( getNamespaces.map(ns => getOrDefaultLocalMetaData[Map[String, IngestWithStatusType]]( makeNamespaceMetaDataKey(ns, key), thisMemberIdx, Map.empty[String, IngestWithStatusType], )(EncoderDecoder.ofMap).map(v => ns -> v), ), ) .map(_.toMap) /** Retrieve a value associated with a key stored for this local app, but write and return in a default value * if the key is not already defined for the local app. Upon encountering an unrecognized value, will attempt * to decode as type B and convert to type A. Used for backwards-compatible migrations. * * @note NOT threadsafe. Should be used in synchronized contexts * @note the value is serialized as the UTF-8 bytes of its JSON representation * @param key name of the setting * @param defaultValue default setting value * @param recovery a function converting a value from the fallback schema to the desired schema * @return the (possibly updated) value */ final protected def getOrDefaultLocalMetaDataWithFallback[A: EncoderDecoder, B: EncoderDecoder]( key: String, localMemberId: MemberIdx, defaultValue: => A, recovery: B => A, ): Future[A] = getLocalMetaData[A](key, localMemberId) .flatMap { case Some(value) => Future.successful(value) case None => val defaulted = defaultValue storeLocalMetaData(key, localMemberId, defaulted).map(_ => defaulted)(graph.system.dispatcher) }(graph.system.dispatcher) .recoverWith { case _: MetaDataDeserializationException => getLocalMetaData[B](key, localMemberId).flatMap { case Some(value) => Future.successful(recovery(value)) case None => val defaulted = defaultValue storeLocalMetaData(key, localMemberId, defaulted).map(_ => defaulted)(graph.system.dispatcher) }(graph.system.dispatcher) }(graph.nodeDispatcherEC) (graph.system.dispatcher) /** Retrieve a value associated with a key stored for the entire graph as a * whole, but write and return in a default value if the key is not already * defined. * * @note the value is serialized as the UTF-8 bytes of its JSON representation * @param key name of the setting * @param defaultValue default setting value * @return the (possibly updated) value */ final protected def getOrDefaultGlobalMetaData[A: EncoderDecoder](key: String, defaultValue: => A): Future[A] = getGlobalMetaData[A](key).flatMap { case Some(value) => Future.successful(value) case None => val defaulted = defaultValue storeGlobalMetaData(key, defaulted).map(_ => defaulted)(graph.system.dispatcher) }(graph.system.dispatcher) /** Instantiate a new namespace to store nodes separately. * @param namespace the name of the new namespace to be created * @param shouldWriteToPersistor True for all individual runtime operations. False during startup while rehydrating. * @return Future status according to persistence. Boolean indicates whether a chance was made. */ def createNamespace(namespace: NamespaceId, shouldWriteToPersistor: Boolean = true): Future[Boolean] = Future.failed(new UnsupportedOperationException(s"Namespace management is not supported.")) /** Delete an existing namespace and all the data in it. * @param namespace the name of the new namespace to be deleted * @return Future status according to persistence. Boolean indicates whether a chance was made. */ def deleteNamespace(namespace: NamespaceId): Future[Boolean] = Future.failed(new UnsupportedOperationException(s"Namespace management is not supported.")) /** Reads the local cache of available namespaces. */ def getNamespaces: collection.Set[NamespaceId] = graph.getNamespaces def onlyIfNamespaceExists[A](namespace: NamespaceId)(f: => Future[A]): Future[A] = if (getNamespaces.contains(namespace)) f else Future.failed(NamespaceNotFoundException(namespace)) def noneIfNoNamespace[A](namespace: NamespaceId)(f: => Option[A]): Option[A] = if (getNamespaces.contains(namespace)) f else None def failIfNoNamespace[A](namespace: NamespaceId)(f: => Try[A]): Try[A] = if (getNamespaces.contains(namespace)) f else Failure(exceptions.NamespaceNotFoundException(namespace)) def invalidIfNoNamespace[A](namespace: NamespaceId)(f: => ValidatedNel[BaseError, A]): ValidatedNel[BaseError, A] = if (getNamespaces.contains(namespace)) f else invalidNel(exceptions.NamespaceNotFoundException(namespace)) /** Validate that all persisted namespace names conform to the canonical rules * (1-16 lowercase alphanumeric characters starting with a letter). If any name is * non-conforming, log the offending names and shut down. This prevents startup with * namespace data that was created under older, looser validation rules. */ protected def validateNamespaceNames(names: Iterable[String]): Unit = BaseApp.findNonConformingNamespaces(names) match { case Nil => () case nonConforming => throw new IllegalStateException( s"Cannot start: namespace(s) ${nonConforming.mkString("'", "', '", "'")} do not match the required format " + "(1-16 characters, must start with a letter, lowercase letters and digits only). " + "Rename the storage artifacts before starting.", ) } } object BaseApp { /** Identify namespace names that don't conform to canonical rules. * * A conforming name must: * - Be 1-16 characters * - Start with a letter * - Contain only lowercase letters and digits * - Already be lowercased (no uppercase characters) * * @return the list of non-conforming names, empty if all are valid */ def findNonConformingNamespaces(names: Iterable[String]): List[String] = { import com.thatdot.quine.routes.exts.NamespaceParameter names.filter { s => val normalized = s.toLowerCase !NamespaceParameter.isValidNamespaceParameter(normalized) || s != normalized }.toList } } class MetaDataDeserializationException(msg: String) extends RuntimeException(msg) ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/CmdArgs.scala ================================================ package com.thatdot.quine.app import scopt.OEffect._ import scopt.OParser /** Data model for Quine command line program arguments. * * @param disableWebservice indicates if the web service should not be started * @param port indicates what port the web service should be started on * @param recipe specifies a recipe (by URL or file path) to be loaded and executed * @param recipeValues specifies recipe parameter substitution values * @param printVersion indicates the program should print the current version and exit * @param forceConfig indicates the typelevel configuration should be read and used verbatim, * without overrides that normally occur to persistence configuration * @param deleteDataFile indicates the local database file should not be deleted when the program exists */ final case class CmdArgs( disableWebservice: Boolean = false, port: Option[Int] = None, recipe: Option[String] = None, recipeValues: Map[String, String] = Map.empty[String, String], printVersion: Boolean = false, forceConfig: Boolean = false, deleteDataFile: Boolean = true, ) object CmdArgs { /** Uses scopt library to parse command line arguments to the CmdArgs data model. */ def apply(args: Array[String]): Either[String, CmdArgs] = { val builder = OParser.builder[CmdArgs] val parser = { import builder._ OParser.sequence( programName("quine"), head("Quine universal program"), opt[Unit]('W', "disable-web-service") .action((_, c) => c.copy(disableWebservice = true)) .text("disable Quine web service"), opt[Int]('p', "port") .action((port, c) => c.copy(port = Some(port))) .text("web service port (default is 8080)"), opt[String]('r', "recipe") .action((url, c) => c.copy(recipe = Some(url))) .valueName("name, file, or URL") .text("follow the specified recipe"), opt[Map[String, String]]('x', "recipe-value") .unbounded() .action((x, c) => c.copy(recipeValues = c.recipeValues ++ x)) .text("recipe parameter substitution") .valueName("key=value"), opt[Unit]("force-config") .action((_, c) => c.copy(forceConfig = true)) .text("disable recipe configuration defaults"), opt[Unit]("no-delete") .action((_, c) => c.copy(deleteDataFile = false)) .text("disable deleting data file when process exits"), help('h', "help"), opt[Unit]('v', "version") .action((_, c) => c.copy(printVersion = true)) .text("print Quine program version"), checkConfig { c => if (c.forceConfig && !c.deleteDataFile) { failure("use only one: --force-config, or --no-delete") } else if (c.disableWebservice && c.port.isDefined) { failure("use only one: --disable-web-service, or --port") } else { Right(()) } }, ) } OParser.runParser(parser, args, CmdArgs()) match { case (_, effects) if effects.nonEmpty => Left { effects collect { case DisplayToOut(msg: String) => msg case DisplayToErr(msg: String) => msg case ReportError(msg: String) => s"Error: $msg" case ReportWarning(msg: String) => s"Warning: $msg" } mkString "\n" } case (Some(config), _) => Right(config) case _ => Left("Error: unknown") // TODO } } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/ImproveQuine.scala ================================================ package com.thatdot.quine.app import java.net.NetworkInterface import java.nio.ByteBuffer import java.nio.charset.StandardCharsets import java.security.MessageDigest import java.time.Instant import java.time.format.DateTimeFormatter import java.time.temporal.ChronoUnit import java.util.Base64.Encoder import java.util.{Base64, UUID} import scala.concurrent.duration._ import scala.concurrent.{ExecutionContext, Future} import scala.jdk.CollectionConverters._ import scala.util.{Success, Try} import org.apache.pekko.actor.{ActorSystem, Scheduler} import org.apache.pekko.http.scaladsl.Http import org.apache.pekko.http.scaladsl.model.{HttpEntity, HttpMethods, HttpRequest, Uri} import org.apache.pekko.pattern.retry import io.circe.generic.semiauto.deriveEncoder import io.circe.syntax._ import io.circe.{Encoder => CirceEncoder} import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, Safe, SafeLoggableInterpolator, StrictSafeLogging} import com.thatdot.quine.app.routes.IngestStreamWithControl import com.thatdot.quine.routes.{IngestStreamConfiguration, RegisteredStandingQuery, StandingQueryResultOutputUserDef} /** Schedules and sends reportable activity telemetry. Performs core value derivations * and serves as a helper for the same purpose via controllers in clustered settings. */ class ImproveQuine( service: String, version: String, persistorSlug: String, getSources: () => Future[Option[List[String]]], getSinks: () => Future[Option[List[String]]], recipe: Option[Recipe] = None, recipeCanonicalName: Option[String] = None, apiKey: () => Option[String] = () => None, )(implicit system: ActorSystem, logConfig: LogConfig) extends LazySafeLogging { import ImproveQuine._ /** Since MessageDigest is not thread-safe, each function should create an instance its own use */ private def hasherInstance(): MessageDigest = MessageDigest.getInstance("SHA-256") private val base64: Encoder = Base64.getEncoder private def recipeContentsHashV1(recipe: RecipeV1): Array[Byte] = { val sha256: MessageDigest = hasherInstance() // Since this is not mission-critical, letting the JVM object hash function do the heavy lifting sha256.update(recipe.ingestStreams.hashCode().toByte) sha256.update(recipe.standingQueries.hashCode().toByte) sha256.update(recipe.nodeAppearances.hashCode().toByte) sha256.update(recipe.quickQueries.hashCode().toByte) sha256.update(recipe.sampleQueries.hashCode().toByte) sha256.update(recipe.statusQuery.hashCode().toByte) sha256.digest() } private def recipeContentsHashV2(recipe: RecipeV2.Recipe): Array[Byte] = { val sha256: MessageDigest = hasherInstance() sha256.update(recipe.ingestStreams.hashCode().toByte) sha256.update(recipe.standingQueries.hashCode().toByte) sha256.update(recipe.nodeAppearances.hashCode().toByte) sha256.update(recipe.quickQueries.hashCode().toByte) sha256.update(recipe.sampleQueries.hashCode().toByte) sha256.update(recipe.statusQuery.hashCode().toByte) sha256.digest() } private val recipeUsed: Boolean = recipe.isDefined private val recipeInfo: Option[RecipeInfo] = recipe.map { case Recipe.V1(r) => val sha256: MessageDigest = hasherInstance() RecipeInfo( base64.encodeToString(sha256.digest(r.title.getBytes(StandardCharsets.UTF_8))), base64.encodeToString(recipeContentsHashV1(r)), ) case Recipe.V2(r) => val sha256: MessageDigest = hasherInstance() RecipeInfo( base64.encodeToString(sha256.digest(r.title.getBytes(StandardCharsets.UTF_8))), base64.encodeToString(recipeContentsHashV2(r)), ) } private val invalidMacAddresses: Set[ByteBuffer] = Set( Array.fill[Byte](6)(0x00), Array.fill[Byte](6)(0xFF.toByte), ).map(ByteBuffer.wrap) private def hostMac(): Array[Byte] = NetworkInterface.getNetworkInterfaces.asScala .filter(_.getHardwareAddress != null) .map(nic => ByteBuffer.wrap(nic.getHardwareAddress)) .filter(address => !invalidMacAddresses.contains(address)) .toVector .sorted .headOption .getOrElse(ByteBuffer.wrap(Array.emptyByteArray)) .array() private val prefixBytes: Array[Byte] = "Quine_".getBytes(StandardCharsets.UTF_8) private def hostHash(): String = Try { val sha256: MessageDigest = hasherInstance() val mac = hostMac() // Salt the input to prevent a SHA256 of a MAC address from matching another system using a SHA256 of a MAC // address for extra anonymity. val prefixedBytes = Array.concat(prefixBytes, mac) val hash = sha256.digest(prefixedBytes) base64.encodeToString(hash) }.getOrElse("host_unavailable") protected val sessionId: UUID = UUID.randomUUID() protected val startTime: Instant = Instant.now() protected def send( event: Event, sources: Option[List[String]], sinks: Option[List[String]], sessionStartedAt: Instant = startTime, sessionIdentifier: UUID = sessionId, )(implicit system: ActorSystem, logConfig: LogConfig): Future[Unit] = TelemetryRequest( event = event, service = service, version = version, hostHash = hostHash(), sessionId = sessionIdentifier, uptime = sessionStartedAt.until(Instant.now(), ChronoUnit.SECONDS), persistor = persistorSlug, sources = sources, sinks = sinks, recipeUsed = recipeUsed, recipeCanonicalName = recipeCanonicalName, recipeInfo = recipeInfo, apiKey = apiKey(), // Call the function to get the current value ).run() def startup( sources: Option[List[String]], sinks: Option[List[String]], sessionStartedAt: Instant = startTime, sessionIdentifier: UUID = sessionId, ): Future[Unit] = send(InstanceStarted, sources, sinks, sessionStartedAt, sessionIdentifier) def heartbeat( sources: Option[List[String]], sinks: Option[List[String]], sessionStartedAt: Instant = startTime, sessionIdentifier: UUID = sessionId, ): Future[Unit] = send(InstanceHeartbeat, sources, sinks, sessionStartedAt, sessionIdentifier) /** A runnable for use in an actor system schedule that fires-and-forgets the heartbeat Future */ private val heartbeatRunnable: Runnable = () => { implicit val ec: ExecutionContext = ExecutionContext.parasitic val _ = for { sources <- getSources() sinks <- getSinks() _ <- heartbeat(sources, sinks) } yield () } /** Fire and forget function to send startup telemetry and schedule regular heartbeat events. */ def startTelemetry(): Unit = { logger.info(safe"Starting usage telemetry") implicit val ec: ExecutionContext = ExecutionContext.parasitic for { sources <- getSources() sinks <- getSinks() _ <- startup(sources, sinks) } yield () // Schedule run-up "instance.heartbeat" events runUpIntervals.foreach(system.scheduler.scheduleOnce(_, heartbeatRunnable)) // Schedule regular "instance.heartbeat" events system.scheduler.scheduleAtFixedRate(regularHeartbeatInterval, regularHeartbeatInterval)(heartbeatRunnable) // Intentionally discard the cancellables for the scheduled heartbeats. // In future these could be retained if desired. () } } object ImproveQuine { val runUpIntervals: List[FiniteDuration] = List( 15.minutes, 1.hours, 3.hours, 6.hours, 12.hours, ) val regularHeartbeatInterval: FiniteDuration = 1.day /** Type for the category of a telemetry event */ sealed abstract class Event(val slug: String) /** Telemetry event when the application first starts */ private case object InstanceStarted extends Event("instance.started") /** Telemetry event sent during a regular interval */ private case object InstanceHeartbeat extends Event("instance.heartbeat") private[app] case class RecipeInfo(recipe_name_hash: String, recipe_contents_hash: String) private[app] object RecipeInfo { implicit val encoder: CirceEncoder[RecipeInfo] = deriveEncoder } private[app] case class TelemetryData( event: String, service: String, version: String, host_hash: String, time: String, session_id: String, uptime: Long, persistor: String, sources: Option[List[String]], sinks: Option[List[String]], recipe: Boolean, recipe_canonical_name: Option[String], recipe_info: Option[RecipeInfo], apiKey: Option[String], ) private[app] object TelemetryData { implicit val encoder: CirceEncoder[TelemetryData] = deriveEncoder } private val eventUri: Uri = Uri("https://improve.quine.io/event") private case class TelemetryRequest( event: Event, service: String, version: String, hostHash: String, sessionId: UUID, uptime: Long, persistor: String, sources: Option[List[String]], sinks: Option[List[String]], recipeUsed: Boolean, recipeCanonicalName: Option[String], recipeInfo: Option[RecipeInfo], apiKey: Option[String], )(implicit system: ActorSystem) extends StrictSafeLogging { implicit private val executionContext: ExecutionContext = system.dispatcher implicit private val scheduler: Scheduler = system.scheduler def run()(implicit logConfig: LogConfig): Future[Unit] = { val now = java.time.OffsetDateTime.now().format(DateTimeFormatter.ISO_OFFSET_DATE_TIME) val telemetryData = TelemetryData( event.slug, service, version, hostHash, now, sessionId.toString, uptime, persistor, sources, sinks, recipeUsed, recipeCanonicalName, recipeInfo, apiKey, ) val body = telemetryData.asJson.noSpaces val send = () => Http() .singleRequest( HttpRequest( method = HttpMethods.POST, uri = eventUri, entity = HttpEntity(body), ), ) logger.info(log"Sending anonymous usage data: ${Safe(body)}") retry(send, 3, 5.seconds) .transform(_ => Success(())) } } def sourcesFromIngestStreams( ingestStreams: Map[String, IngestStreamWithControl[IngestStreamConfiguration]], ): List[String] = ingestStreams.values .map(_.settings.slug) .toSet .toList private def unrollCypherOutput(output: StandingQueryResultOutputUserDef): List[StandingQueryResultOutputUserDef] = output match { case cypherOutput: StandingQueryResultOutputUserDef.CypherQuery => cypherOutput.andThen match { case None => List(cypherOutput) case Some(nextOutput) => cypherOutput :: unrollCypherOutput(nextOutput) } case otherOutput => List(otherOutput) } def sinksFromStandingQueries(standingQueries: List[RegisteredStandingQuery]): List[String] = standingQueries .flatMap(_.outputs.values) .flatMap(unrollCypherOutput) .map(_.slug) .distinct } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/Main.scala ================================================ package com.thatdot.quine.app import java.io.File import java.net.URL import java.nio.charset.{Charset, StandardCharsets} import java.text.NumberFormat import scala.concurrent.duration._ import scala.concurrent.{Await, ExecutionContext, Future} import scala.util.control.NonFatal import scala.util.{Failure, Success} import org.apache.pekko.Done import org.apache.pekko.actor.{ActorSystem, Cancellable, CoordinatedShutdown} import org.apache.pekko.util.Timeout import cats.syntax.either._ import ch.qos.logback.classic.LoggerContext import org.slf4j.LoggerFactory import pureconfig.ConfigSource import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, Safe, SafeLoggableInterpolator, SafeLogger} import com.thatdot.quine.app.config.errors.ErrorFormatterConfig import com.thatdot.quine.app.config.{ FileAccessPolicy, PersistenceAgentType, QuineConfig, QuinePersistenceBuilder, ResolutionMode, UseMtls, WebServerBindConfig, } import com.thatdot.quine.app.migrations.QuineMigrations import com.thatdot.quine.app.routes.{HealthAppRoutes, QuineAppRoutes} import com.thatdot.quine.graph._ import com.thatdot.quine.migrations.{MigrationError, MigrationVersion} import com.thatdot.quine.util.Log.implicits._ object Main extends App with LazySafeLogging { private val statusLines = new StatusLines( // This name comes from quine's logging.conf SafeLogger("thatdot.Interactive"), System.err, ) /** Configuration for error message formatting */ private val configErrorFormatterConfig = ErrorFormatterConfig( expectedRootKey = "quine", productName = "Quine", requiredFields = Set.empty, docsUrl = "https://quine.io/docs/", ) // Warn if character encoding is unexpected if (Charset.defaultCharset != StandardCharsets.UTF_8) { statusLines.warn( log"System character encoding is ${Safe(Charset.defaultCharset)} - did you mean to specify -Dfile.encoding=UTF-8?", ) } // Parse command line arguments. // On any failure, print messages and terminate process. val cmdArgs: CmdArgs = CmdArgs(args) match { case Right(cmdArgs) if cmdArgs.printVersion => Console.err.println(s"Quine universal program version ${BuildInfo.version}") sys.exit(0) case Right(cmdArgs) => cmdArgs case Left(message) => Console.err.println(message) sys.exit(1) } // If there's a recipe URL or file path, block and read it, apply substitutions, and fail fast. // Uses RecipeLoader to support both V1 and V2 recipes. val recipe: Option[Recipe] = cmdArgs.recipe.map { (recipeIdentifyingString: String) => RecipeLoader.getAndSubstituteAny(recipeIdentifyingString, cmdArgs.recipeValues) valueOr { messages => messages foreach Console.err.println sys.exit(1) } } // Extract V1 recipe for backward compatibility (QuineApp, file paths, etc.) val recipeV1: Option[RecipeV1] = recipe.collect { case Recipe.V1(r) => r } // Parse config for Quine and apply command line overrides. val config: QuineConfig = { // Regular HOCON loading of options (from java properties and `conf` files) val withoutOverrides = ConfigSource.default.load[QuineConfig] valueOr { failures => Console.err.println(ErrorFormatterConfig.formatErrors(configErrorFormatterConfig, failures)) sys.exit(1) } // Override webserver options import QuineConfig.{webserverEnabledLens, webserverPortLens} val withPortOverride = cmdArgs.port.fold(withoutOverrides)(webserverPortLens.set(withoutOverrides)) val withWebserverOverrides = if (cmdArgs.disableWebservice) withPortOverride else webserverEnabledLens.set(withPortOverride)(true) // Recipe overrides (unless --force-config command line flag is used) // Apply temp data file for both V1 and V2 recipes if (recipe.isDefined && !cmdArgs.forceConfig) { val tempDataFile: File = File.createTempFile("quine-", ".db") tempDataFile.delete() if (cmdArgs.deleteDataFile) { tempDataFile.deleteOnExit() } else { // Only print the data file name when NOT DELETING the temporary file statusLines.info(log"Using data path ${Safe(tempDataFile.getAbsolutePath)}") } withWebserverOverrides.copy( store = PersistenceAgentType.RocksDb( filepath = Some(tempDataFile), ), ) } else withWebserverOverrides } implicit protected def logConfig: LogConfig = config.logConfig // Optionally print a message on startup if (BuildInfo.startupMessage.nonEmpty) { statusLines.warn(log"${Safe(BuildInfo.startupMessage)}") } logger.info { val maxHeapSize = sys.runtime.maxMemory match { case Long.MaxValue => "no max heap size" case maxBytes => val maxGigaBytes = maxBytes.toDouble / 1024d / 1024d / 1024d NumberFormat.getInstance.format(maxGigaBytes) + "GiB max heap size" } val numCores = NumberFormat.getInstance.format(sys.runtime.availableProcessors.toLong) safe"Running ${Safe(BuildInfo.version)} with ${Safe(numCores)} available cores and ${Safe(maxHeapSize)}." } if (config.dumpConfig) { statusLines.info(log"${Safe(config.loadedConfigHocon)}") } val timeout: Timeout = config.timeout config.metricsReporters.foreach(Metrics.addReporter(_, "quine")) Metrics.startReporters() val graph: GraphService = try Await .result( GraphService( persistorMaker = system => { val persistor = QuinePersistenceBuilder.instance.build(config.store, config.persistence)(system, logConfig) persistor.initializeOnce // Initialize the default namespace persistor }, idProvider = config.id.idProvider, shardCount = config.shardCount, inMemorySoftNodeLimit = config.inMemorySoftNodeLimit, inMemoryHardNodeLimit = config.inMemoryHardNodeLimit, effectOrder = config.persistence.effectOrder, declineSleepWhenWriteWithinMillis = config.declineSleepWhenWriteWithin.toMillis, declineSleepWhenAccessWithinMillis = config.declineSleepWhenAccessWithin.toMillis, maxCatchUpSleepMillis = config.maxCatchUpSleep.toMillis, labelsProperty = config.labelsProperty, edgeCollectionFactory = config.edgeIteration.edgeCollectionFactory, metricRegistry = Metrics, enableDebugMetrics = config.metrics.enableDebugMetrics, ).flatMap(graph => graph.namespacePersistor .syncVersion( "Quine app state", QuineApp.VersionKey, QuineApp.CurrentPersistenceVersion, () => QuineApp.quineAppIsEmpty(graph.namespacePersistor), ) .map(_ => graph)(ExecutionContext.parasitic), )(ExecutionContext.parasitic), atMost = timeout.duration, ) catch { case NonFatal(err) => statusLines.error(log"Unable to start graph", err) sys.exit(1) } implicit val system: ActorSystem = graph.system val ec: ExecutionContext = graph.shardDispatcherEC // Create FileAccessPolicy once at startup (especially important for static mode which enumerates files) // Extract file paths from recipe to automatically allow them val recipeFilePaths: List[String] = recipe.toList.flatMap { case Recipe.V1(r) => r.extractFileIngestPaths case Recipe.V2(r) => r.ingestStreams.collect { case is if is.source.isInstanceOf[com.thatdot.quine.app.v2api.definitions.ingest2.ApiIngest.IngestSource.File] => is.source.asInstanceOf[com.thatdot.quine.app.v2api.definitions.ingest2.ApiIngest.IngestSource.File].path } } val fileAccessPolicy: FileAccessPolicy = FileAccessPolicy.fromConfigWithRecipePaths( config.fileIngest.allowedDirectories.getOrElse(List(".")), config.fileIngest.resolutionMode.getOrElse(ResolutionMode.Dynamic), recipeFilePaths, ) match { case cats.data.Validated.Valid(policy) => policy case cats.data.Validated.Invalid(errors) => errors.toList.foreach { error => statusLines.error(log"File ingest configuration error: ${Safe(error)}") } sys.exit(1) } val quineApp = new QuineApp( graph = graph, helpMakeQuineBetter = config.helpMakeQuineBetter, fileAccessPolicy = fileAccessPolicy, recipe = recipe, recipeCanonicalName = recipe.flatMap(_ => cmdArgs.recipe.flatMap(RecipeV1.getCanonicalName)), ) // Initialize the namespaces and apply migrations val hydrateAndMigrate: Future[Either[MigrationError, Unit]] = { val allMigrations = migrations.instances.all val GoalVersion: MigrationVersion = allMigrations.last.to val currentVersionFut = MigrationVersion .getFrom(graph.namespacePersistor) .map(_.getOrElse(MigrationVersion(0)))(ExecutionContext.parasitic) currentVersionFut.flatMap[Either[MigrationError, Unit]] { case GoalVersion => // we are already at our goal version, so we can just load namespaces quineApp.restoreNonDefaultNamespacesFromMetaData(ec).map(Right(_))(ExecutionContext.parasitic) case versionWentBackwards if versionWentBackwards > GoalVersion => // the version we pulled from the persistor is greater than the `to` of the final migration we're aware of Future.successful(Left(MigrationError.PreviousMigrationTooAdvanced(versionWentBackwards, GoalVersion))) case currentVersion => // the found version indicates we need to run at least one migration // TODO figure out which Migration.Apply instances to run based on the needed Migrations and the product // running the migrations. For now, with one migration, and in Quine's main, we know what to run require( currentVersion == MigrationVersion(0) && GoalVersion == MigrationVersion(1), s"Unexpected migration versions (current: $currentVersion, goal: $GoalVersion)", ) val migrationApply = new QuineMigrations.ApplyMultipleValuesRewrite( graph.namespacePersistor, graph.getNamespaces.toSet, ) quineApp .restoreNonDefaultNamespacesFromMetaData(ec) .flatMap { _ => migrationApply.run()(graph.dispatchers) }(graph.nodeDispatcherEC) .flatMap { case err @ Left(_) => Future.successful(err) case Right(_) => // the migration succeeded, so we can set the version to the `to` version of the migration MigrationVersion .set(graph.namespacePersistor, migrationApply.migration.to) .map(Right(_))(ExecutionContext.parasitic) }(ExecutionContext.parasitic) }(graph.nodeDispatcherEC) } // if there was a migration error, present it to the user then exit Await.result(hydrateAndMigrate, timeout.duration).left.foreach { error: MigrationError => error match { case includeDiagnosticInfo: Throwable => statusLines.error( log"Encountered a migration error during startup. Shutting down." withException includeDiagnosticInfo, ) case opaque => statusLines.error( log"Encountered a migration error during startup. Shutting down. Error: ${opaque.message}", ) } sys.exit(1) } val loadDataFut: Future[Unit] = quineApp.loadAppData(timeout, config.shouldResumeIngest) Await.result(loadDataFut, timeout.duration * 2) statusLines.info(log"Graph is ready") // Determine the bind address and resolvable URL for the web server, if enabled val bindAndResolvableAddresses: Option[(WebServerBindConfig, URL)] = Option.when(config.webserver.enabled) { // if a canonical URL is configured, use that for presentation (e.g. logging) purposes. Otherwise, infer // from the bind URL config.webserver -> config.webserverAdvertise.fold(config.webserver.guessResolvableUrl)( _.url(config.webserver.protocol), ) } var recipeInterpreterTask: Option[Cancellable] = recipe.map { case Recipe.V1(r) => val interpreter = RecipeInterpreter(statusLines, r, quineApp, graph, bindAndResolvableAddresses.map(_._2))( graph.idProvider, ) interpreter.run(quineApp.thisMemberIdx) interpreter case Recipe.V2(r) => val interpreter = RecipeInterpreterV2( statusLines, r, quineApp, graph, bindAndResolvableAddresses.map(_._2), quineApp.protobufSchemaCache, )(graph.idProvider) interpreter.run(quineApp.thisMemberIdx) interpreter } bindAndResolvableAddresses foreach { case (bindAddress, resolvableUrl) => new QuineAppRoutes(graph, quineApp, config, resolvableUrl, timeout)( ExecutionContext.parasitic, logConfig, ) .bindWebServer( bindAddress.address.asString, bindAddress.port.asInt, bindAddress.useTls, bindAddress.useMtls, ) .onComplete { case Success(binding) => binding.addToCoordinatedShutdown(hardTerminationDeadline = 30.seconds) statusLines.info(log"Quine web server available at ${Safe(resolvableUrl.toString)}") statusLines.info(log"Default API version: ${Safe(config.defaultApiVersion)}") quineApp.notifyWebServerStarted() case Failure(_) => // pekko will have logged a stacktrace to the debug logger }(ec) // Bind health endpoints if enabled if (bindAddress.useMtls.healthEndpoints.enabled) { val healthRoutes = new HealthAppRoutes(graph, quineApp, config, timeout)(ec, logConfig) healthRoutes .bindWebServer( "127.0.0.1", bindAddress.useMtls.healthEndpoints.port.asInt, useTls = false, useMTls = UseMtls(enabled = false), ) .onComplete { case Success(binding) => binding.addToCoordinatedShutdown(hardTerminationDeadline = 30.seconds) statusLines.info( log"Health endpoints available at http://127.0.0.1:${Safe(bindAddress.useMtls.healthEndpoints.port.asInt.toString)}", ) case Failure(ex) => statusLines.warn( log"Failed to start health endpoints on port ${Safe(bindAddress.useMtls.healthEndpoints.port.asInt.toString)}" withException ex, ) }(ec) } } CoordinatedShutdown(system).addTask(CoordinatedShutdown.PhaseBeforeClusterShutdown, "Shutdown") { () => statusLines.info(log"Quine is shutting down... ") try recipeInterpreterTask.foreach(_.cancel()) catch { case NonFatal(e) => statusLines.error(log"Graceful shutdown of Recipe interpreter encountered an error:", e) } implicit val ec = ExecutionContext.parasitic for { _ <- quineApp.shutdown() _ <- graph.shutdown() } yield { statusLines.info(log"Shutdown complete.") Done } } CoordinatedShutdown(system).addTask(CoordinatedShutdown.PhaseActorSystemTerminate, "Cleanup of reporters") { () => Metrics.stopReporters() LoggerFactory.getILoggerFactory match { case context: LoggerContext => context.stop() case _ => () } Future.successful(Done) } // Block the main thread for as long as the ActorSystem is running. try Await.ready(system.whenTerminated, Duration.Inf) catch { case _: InterruptedException => () } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/MeteredExecutors.scala ================================================ package com.thatdot.quine.app import java.util.concurrent.{ExecutorService, ThreadFactory} import org.apache.pekko.dispatch.{ DefaultExecutorServiceConfigurator, DispatcherPrerequisites, ExecutorServiceConfigurator, ExecutorServiceFactory, ForkJoinExecutorConfigurator, ThreadPoolExecutorConfigurator, } import com.codahale.metrics.InstrumentedExecutorService import com.github.blemale.scaffeine.{Cache, Scaffeine} import com.typesafe.config.{Config => TypesafeConfig, ConfigException, ConfigRenderOptions} import pureconfig.ConfigWriter import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, Safe, SafeLoggableInterpolator} import com.thatdot.quine.graph.metrics.HostQuineMetrics import com.thatdot.quine.util.Log.implicits._ /** Morally, Metered Executors are more of a Quine construct (internal metering of internal properties) but because * MeteredExecutors depend on access to the same HostQuineMetrics instance that the application uses at runtime, * we must define these in Quine App. */ object MeteredExecutors extends LazySafeLogging { private val instrumentedExecutors: Cache[String, InstrumentedExecutorService] = Scaffeine().build() sealed abstract class Configurator( config: TypesafeConfig, prerequisites: DispatcherPrerequisites, underlying: ExecutorServiceConfigurator, registry: HostQuineMetrics, ) extends ExecutorServiceConfigurator(config, prerequisites) with LazySafeLogging { implicit protected def logConfig: LogConfig logger.whenDebugEnabled { var verbose = false logger.whenTraceEnabled { verbose = true } logger.debug( safe"Metered Configurator created with config read from ${Safe(config.origin())}: ${Safe( ConfigWriter[TypesafeConfig] .to(config) .render( ConfigRenderOptions.defaults().setComments(verbose).setOriginComments(false).setJson(false), ), )}", ) } def createExecutorServiceFactory(id: String, threadFactory: ThreadFactory): ExecutorServiceFactory = new ExecutorServiceFactory { def createExecutorService: ExecutorService = // TODO consider making the cache sensitive to the provided threadFactory -- invalidating entries when // threadFactory changes so that the `underlying` delegate is always using the "latest" threadFactory instrumentedExecutors.get( id, executorId => new InstrumentedExecutorService( underlying.createExecutorServiceFactory(executorId, threadFactory).createExecutorService, registry.metricRegistry, executorId, ), ) } } /** merges config with one of its own keys -- pekko's AbstractDispatcher "normally" passes the full `config` to a * custom Configurator, but it special cases pekko's own configurators, instead passing them only a part of the config * based on some key -- this function returns a config which will default to the same behavior as * AbstractDispatcher's scoping, but fall back to pekko's default but fall back to pekko's special casing * * In effect, this allows using only a single config block for both the underlying configurator *and* the metering * wrapper itself, making it easier to switch between the two */ private def mergeConfigWithUnderlying(config: TypesafeConfig, underlyingConfigKey: String): TypesafeConfig = config.withFallback(config.getConfig(underlyingConfigKey)) def quineMetrics(config: TypesafeConfig)(implicit logConfig: LogConfig): HostQuineMetrics = { val ConfigPath = "quine.metrics.enable-debug-metrics" val useEnhancedMetrics: Boolean = try config.getBoolean(ConfigPath) catch { case _: ConfigException.Missing => false case wrongType: ConfigException.WrongType => logger.warn(log"Found invalid setting for boolean config key ${Safe(ConfigPath)}" withException wrongType) false } // TODO the invariant below is violated by hard-coding the application here in otherwise shared code HostQuineMetrics( useEnhancedMetrics, Metrics, omitDefaultNamespace = true, ) // INV the metrics instance here matches the one used by the app's Main } /** An Executor that delegates execution to a Pekko [[ThreadPoolExecutorConfigurator]], wrapped in an * [[InstrumentedExecutorService]]. * * @note this may used by adding a line within any pekko "dispatcher" config block as follows: * `executor = "com.thatdot.quine.app.MeteredExecutors$MeteredThreadPoolConfigurator"`. * Options may still be passed to the underlying thread-pool-executor as normal * @see for metrics reported: */ final class MeteredThreadPoolConfigurator(config: TypesafeConfig, prerequisites: DispatcherPrerequisites)(implicit protected val logConfig: LogConfig, ) extends Configurator( mergeConfigWithUnderlying(config, "thread-pool-executor"), prerequisites, new ThreadPoolExecutorConfigurator(mergeConfigWithUnderlying(config, "thread-pool-executor"), prerequisites), quineMetrics(config), ) /** An Executor that delegates execution to a Pekko [[ForkJoinExecutorConfigurator]], wrapped in an * [[InstrumentedExecutorService]]. * * @note this may used by adding a line within any pekko "dispatcher" config block as follows: * `executor = "com.thatdot.quine.app.MeteredExecutors$MeteredForkJoinConfigurator"`. * Options may still be passed to the underlying fork-join-executor as normal * @see for metrics reported: */ final class MeteredForkJoinConfigurator(config: TypesafeConfig, prerequisites: DispatcherPrerequisites)(implicit protected val logConfig: LogConfig, ) extends Configurator( mergeConfigWithUnderlying(config, "fork-join-executor"), prerequisites, new ForkJoinExecutorConfigurator( mergeConfigWithUnderlying(config, "fork-join-executor"), prerequisites, ), quineMetrics(config), ) /** An Executor that delegates execution to a Pekko [[DefaultExecutorServiceConfigurator]], wrapped in an * [[InstrumentedExecutorService]]. * * @note this may used by adding a line within any pekko "dispatcher" config block as follows: * `executor = "com.thatdot.quine.app.MeteredExecutors$MeteredDefaultConfigurator"`. * Options may still be passed to the underlying default-executor as normal, except that * default-executor.fallback is ignored in favor of MeteredForkJoin (chosen because the default value as of pekko 1.0.0 was fork-join-executor) */ final class MeteredDefaultConfigurator(config: TypesafeConfig, prerequisites: DispatcherPrerequisites)(implicit protected val logConfig: LogConfig, ) extends Configurator( mergeConfigWithUnderlying(config, "default-executor"), prerequisites, { if (prerequisites.defaultExecutionContext.isEmpty) logger.warn( safe"The default pekko executor should only be metered in conjunction with an explicit default executor" + safe" (this may be set at pekko.actor.default-dispatcher.default-executor). Defaulting to fork-join", ) new DefaultExecutorServiceConfigurator( mergeConfigWithUnderlying(config, "default-executor"), prerequisites, new MeteredForkJoinConfigurator( config, prerequisites, ), ) }, quineMetrics(config), ) // AffinityPoolConfigurator is private and @ApiMayChange as of 2.6.16, so there is no MeteredAffinityPoolConfigurator } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/Metrics.scala ================================================ package com.thatdot.quine.app import java.lang.management.ManagementFactory import scala.collection.mutable.ListBuffer import com.codahale.metrics.MetricRegistry import com.codahale.metrics.jvm.{BufferPoolMetricSet, GarbageCollectorMetricSet, MemoryUsageGaugeSet} import com.thatdot.quine.app.config.{MetricsReporter, ReporterWrapper} object Metrics extends MetricRegistry { val garbageCollection: GarbageCollectorMetricSet = register("gc", new GarbageCollectorMetricSet()) val memoryUsage: MemoryUsageGaugeSet = register("memory", new MemoryUsageGaugeSet()) val bufferPools: BufferPoolMetricSet = register("buffers", new BufferPoolMetricSet(ManagementFactory.getPlatformMBeanServer)) private val reporters: ListBuffer[ReporterWrapper] = ListBuffer.empty[ReporterWrapper] def addReporter(reporter: MetricsReporter, namespace: String): Unit = { reporters += reporter.register(this, namespace) () } def startReporters(): Unit = reporters.foreach(_.start()) def stopReporters(): Unit = reporters.foreach(_.stop()) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/QuineApp.scala ================================================ package com.thatdot.quine.app import java.time.Instant import java.time.temporal.ChronoUnit.MILLIS import java.util.UUID import scala.concurrent.duration.{DurationInt, FiniteDuration} import scala.concurrent.{Await, ExecutionContext, Future, blocking} import scala.util.{Failure, Success, Try} import org.apache.pekko.Done import org.apache.pekko.stream.KillSwitches import org.apache.pekko.stream.scaladsl.Keep import org.apache.pekko.util.Timeout import cats.Applicative import cats.data.{Validated, ValidatedNel} import cats.instances.future.catsStdInstancesForFuture import cats.syntax.all._ import com.thatdot.api.{v2 => Api2} import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, Safe, SafeLoggableInterpolator} import com.thatdot.common.security.Secret import com.thatdot.quine.app.config.FileAccessPolicy import com.thatdot.quine.app.model.ingest.serialization.{CypherParseProtobuf, CypherToProtobuf} import com.thatdot.quine.app.model.ingest.{IngestSrcDef, QuineIngestSource} import com.thatdot.quine.app.model.ingest2.V2IngestEntities.{QuineIngestConfiguration, QuineIngestStreamWithStatus} import com.thatdot.quine.app.model.ingest2.{V1ToV2, V2IngestEntities} import com.thatdot.quine.app.routes._ import com.thatdot.quine.app.util.QuineLoggables._ import com.thatdot.quine.app.v2api.converters.ApiToStanding import com.thatdot.quine.app.v2api.definitions.query.{standing => V2ApiStanding} import com.thatdot.quine.compiler.cypher import com.thatdot.quine.compiler.cypher.{CypherStandingWiretap, registerUserDefinedProcedure} import com.thatdot.quine.graph.InvalidQueryPattern._ import com.thatdot.quine.graph.MasterStream.SqResultsExecToken import com.thatdot.quine.graph.StandingQueryPattern.{ DomainGraphNodeStandingQueryPattern, MultipleValuesQueryPattern, QuinePatternQueryPattern, } import com.thatdot.quine.graph.cypher.quinepattern.{OutputTarget => V2OutputTarget, QueryPlanner, RuntimeMode} import com.thatdot.quine.graph.metrics.HostQuineMetrics import com.thatdot.quine.graph.quinepattern.LoadQuery import com.thatdot.quine.graph.{ GraphService, MemberIdx, NamespaceId, PatternOrigin, StandingQueryId, StandingQueryInfo, defaultNamespaceId, namespaceFromString, namespaceToString, } import com.thatdot.quine.model.QuineIdProvider import com.thatdot.quine.persistor.{PrimePersistor, Version} import com.thatdot.quine.routes.IngestStreamStatus import com.thatdot.quine.serialization.{AvroSchemaCache, EncoderDecoder, ProtobufSchemaCache} import com.thatdot.quine.util.Log.implicits._ import com.thatdot.quine.util.{BaseError, SwitchMode} import com.thatdot.quine.{routes => V1} /** The Quine application state * * @param graph reference to the underlying graph */ final class QuineApp( graph: GraphService, helpMakeQuineBetter: Boolean, val fileAccessPolicy: FileAccessPolicy, recipe: Option[Recipe] = None, recipeCanonicalName: Option[String] = None, )(implicit val logConfig: LogConfig) extends BaseApp(graph) with AdministrationRoutesState with QueryUiConfigurationState with StandingQueryStoreV1 with StandingQueryInterfaceV2 with IngestStreamState with V1.QueryUiConfigurationSchemas with V1.StandingQuerySchemas with V1.IngestSchemas with EncoderDecoder.DeriveEndpoints4s with com.thatdot.quine.routes.exts.CirceJsonAnySchema with SchemaCache with LazySafeLogging { import QuineApp._ import com.thatdot.quine.app.StandingQueryResultOutput.OutputTarget implicit private[this] val idProvider: QuineIdProvider = graph.idProvider /** == Local state == * Notes on synchronization: * Accesses to the following collections must be threadsafe. Additionally, the persisted copy of these collections * (ie those accessed by `*Metadata` functions) must be kept in sync with the in-memory copy. Because all of these * functions are expected to have a low volume of usage, and thus don't need to be performance-optimized, we * aggressively synchronize on locks. In particular, synchronizing on the collection itself is not sufficient, because * the lock offered by `synchronize` is with respect to the locked *value*, not the locked *field* -- so locking on * a mutating variable does not result in a mutex. By contrast, locking on a lock is more than is strictly necessary, * but represents a deliberate choice to simplify the synchronization logic at the cost of reduced performance, * as all these synchronization points should be low-volume. * * In the case of collections with only `get`/`set` functions, the @volatile annotation is sufficient to ensure the * thread-safety of `get`. `set` functions must synchronize with a lock on `this` to ensure that setting both the * in-memory and persisted copies of the collection happens at the same time. * * Get/set example: * - `getQuickQueries` relies only on @volatile for its synchronization, because @volatile ensures all threads * read the same state of the underlying `quickQueries` variable * - `setQuickQueries` is wrapped in a `...Lock.synchronized` to ensure that 2 simultaneous calls to `setQuickQueries` * will not interleave their local and remote update steps. Without synchronized, execution (1) might set the local * variable while execution (2) sets the persisted version * * In the case of collections with update (eg `add`/`remove`) semantics, all accesses must be synchronized * with a lock on `this`, because all accesses involve both a read and a write which might race concurrent executions. * * Add example: * - `addIngestStream` is wrapped in a `...Lock.synchronized` because the updates it makes to `ingestStreams` depend on * the results of a read of `ingestStreams`. Thus, the read and the write must happen atomically with respect to * other `addIngestStream` invocations. Additionally, the `synchronized` ensures the local and persisted copies of * the collection are kept in sync (as in the get/set case) * * Additionally, note that each synchronized{} block forces execution synchronization of futures it invokes (ie, * each time a future is created, it is Await-ed). By Await-ing all futures created, we ensure that the * synchronization boundary accounts for *all* work involved in the operation, not just the parts that happen on the * local thread. TODO: instead of Await(), use actors or strengthen persistor guarantees to preserve happens-before */ @volatile private[this] var sampleQueries: Vector[V1.SampleQuery] = Vector.empty // Locks are on the object; we can't use a var (e.g. the collection) as something to synchronize on // as it's always being updated to point to a new object. final private[this] val sampleQueriesLock = new AnyRef @volatile private[this] var quickQueries: Vector[V1.UiNodeQuickQuery] = Vector.empty final private[this] val quickQueriesLock = new AnyRef @volatile private[this] var nodeAppearances: Vector[V1.UiNodeAppearance] = Vector.empty final private[this] val nodeAppearancesLock = new AnyRef @volatile private[this] var outputTargets: NamespaceOutputTargets = Map(defaultNamespaceId -> Map.empty) final private[this] val outputTargetsLock = new AnyRef final private[this] val ingestStreamsLock = new AnyRef // Constant member index 0 for Quine val thisMemberIdx: MemberIdx = 0 /** == Accessors == */ def getSampleQueries: Future[Vector[V1.SampleQuery]] = Future.successful(sampleQueries) def getQuickQueries: Future[Vector[V1.UiNodeQuickQuery]] = Future.successful(quickQueries) def getNodeAppearances: Future[Vector[V1.UiNodeAppearance]] = Future.successful(nodeAppearances) def setSampleQueries(newSampleQueries: Vector[V1.SampleQuery]): Future[Unit] = synchronizedFakeFuture(sampleQueriesLock) { sampleQueries = newSampleQueries storeGlobalMetaData(SampleQueriesKey, sampleQueries) } def setQuickQueries(newQuickQueries: Vector[V1.UiNodeQuickQuery]): Future[Unit] = synchronizedFakeFuture(quickQueriesLock) { quickQueries = newQuickQueries storeGlobalMetaData(QuickQueriesKey, quickQueries) } def setNodeAppearances(newNodeAppearances: Vector[V1.UiNodeAppearance]): Future[Unit] = synchronizedFakeFuture(nodeAppearancesLock) { nodeAppearances = newNodeAppearances.map(QueryUiConfigurationState.renderNodeIcons) storeGlobalMetaData(NodeAppearancesKey, nodeAppearances) } def addStandingQueryV2( queryName: String, inNamespace: NamespaceId, standingQueryDefinition: V2ApiStanding.StandingQuery.StandingQueryDefinition, ): Future[StandingQueryInterfaceV2.Result] = onlyIfNamespaceExists(inNamespace) { synchronizedFakeFuture(outputTargetsLock) { outputTargets .get(inNamespace) .fold( Future.successful[StandingQueryInterfaceV2.Result]( StandingQueryInterfaceV2.Result.NotFound(namespaceToString(inNamespace)), ), ) { sqOutputTargets => if (sqOutputTargets.contains(queryName)) { Future.successful( StandingQueryInterfaceV2.Result.AlreadyExists(queryName), ) } else { val sqId = StandingQueryId.fresh() implicit val ec: ExecutionContext = graph.nodeDispatcherEC Future .traverse(standingQueryDefinition.outputs.toVector) { apiWorkflow => ApiToStanding(apiWorkflow, inNamespace)(graph, protobufSchemaCache).map(workflowInterpreter => apiWorkflow.name -> workflowInterpreter .flow(graph) .viaMat(KillSwitches.single)(Keep.right) .map(_ => SqResultsExecToken(s"SQ: ${apiWorkflow.name} in: $inNamespace")) .to(graph.masterStream.standingOutputsCompletionSink), ) } .map(_.toMap) .flatMap { sqResultsConsumers => val (pattern, dgnPackage) = standingQueryDefinition.pattern match { case V2ApiStanding.StandingQueryPattern.Cypher(cypherQuery, mode) => mode match { case V2ApiStanding.StandingQueryPattern.StandingQueryMode.DistinctId => val graphPattern = cypher.compileStandingQueryGraphPattern(cypherQuery)(graph.idProvider, logConfig) val origin = PatternOrigin.GraphPattern(graphPattern, Some(cypherQuery)) if (!graphPattern.distinct) { // TODO unit test this behavior throw DistinctIdMustDistinct } val (branch, returnColumn) = graphPattern.compiledDomainGraphBranch(graph.labelsProperty) val dgnPackage = branch.toDomainGraphNodePackage val dgnPattern = DomainGraphNodeStandingQueryPattern( dgnPackage.dgnId, returnColumn.formatAsString, returnColumn.aliasedAs, standingQueryDefinition.includeCancellations, origin, ) (dgnPattern, Some(dgnPackage)) case V2ApiStanding.StandingQueryPattern.StandingQueryMode.MultipleValues => val graphPattern = cypher.compileStandingQueryGraphPattern(cypherQuery)(graph.idProvider, logConfig) val origin = PatternOrigin.GraphPattern(graphPattern, Some(cypherQuery)) if (graphPattern.distinct) throw MultipleValuesCantDistinct val compiledQuery = graphPattern.compiledMultipleValuesStandingQuery(graph.labelsProperty, idProvider) val sqv4Pattern = MultipleValuesQueryPattern( compiledQuery, standingQueryDefinition.includeCancellations, origin, ) (sqv4Pattern, None) case V2ApiStanding.StandingQueryPattern.StandingQueryMode.QuinePattern => // QuinePattern mode uses the new parser and planner directly, // bypassing the traditional StandingQueryPatterns validation val maybeIsQPEnabled = for { pv <- Option(System.getProperty("qp.enabled")) b <- pv.toBooleanOption } yield b maybeIsQPEnabled match { case Some(true) => val planned = QueryPlanner.planFromString(cypherQuery) match { case Right(p) => p case Left(error) => sys.error(s"Failed to compile query: $error") } val qpPattern = QuinePatternQueryPattern( planned.plan, RuntimeMode.Lazy, planned.returnColumns, planned.outputNameMapping, ) (qpPattern, None) case _ => sys.error("Quine pattern must be enabled using -Dqp.enabled=true to use this feature.") } } } (dgnPackage match { case Some(p) => graph.dgnRegistry.registerAndPersistDomainGraphNodePackage(p, sqId, skipPersistor = false) case None => Future.unit }).flatMap { _ => graph .standingQueries(inNamespace) .fold( Future .successful[StandingQueryInterfaceV2.Result]( StandingQueryInterfaceV2.Result.NotFound(queryName), ), ) { sqns => // Ignore if namespace is no longer available. val (sq, killSwitches) = sqns.createStandingQuery( name = queryName, pattern = pattern, outputs = sqResultsConsumers, queueBackpressureThreshold = standingQueryDefinition.inputBufferSize, sqId = sqId, ) val outputsWithKillSwitches = standingQueryDefinition.outputs.map { workflow => workflow.name -> OutputTarget.V2(workflow, killSwitches(workflow.name)) }.toMap val updatedInnerMap = sqOutputTargets + (queryName -> (sq.query.id -> outputsWithKillSwitches)) outputTargets += inNamespace -> updatedInnerMap storeStandingQueryOutputs2().map(_ => StandingQueryInterfaceV2.Result.Success)( ExecutionContext.parasitic, ) } }(graph.system.dispatcher) } } } } } def addStandingQuery( queryName: FriendlySQName, inNamespace: NamespaceId, query: V1.StandingQueryDefinition, ): Future[Boolean] = onlyIfNamespaceExists(inNamespace) { synchronizedFakeFuture(outputTargetsLock) { outputTargets.get(inNamespace).fold(Future.successful(false)) { namespaceTargets => if (namespaceTargets.contains(queryName)) Future.successful(false) else { val sqId = StandingQueryId.fresh() val sqResultsConsumers = query.outputs.map { case (outputName, outputDefinition) => outputName -> StandingQueryResultOutput .resultHandlingSink(outputName, inNamespace, outputDefinition, graph)(protobufSchemaCache, logConfig) } val (pattern, dgnPackage) = query.pattern match { case V1.StandingQueryPattern.Cypher(cypherQuery, mode) => mode match { case V1.StandingQueryPattern.StandingQueryMode.DistinctId => val graphPattern = cypher.compileStandingQueryGraphPattern(cypherQuery)(graph.idProvider, logConfig) val origin = PatternOrigin.GraphPattern(graphPattern, Some(cypherQuery)) if (!graphPattern.distinct) { // TODO unit test this behavior throw DistinctIdMustDistinct } val (branch, returnColumn) = graphPattern.compiledDomainGraphBranch(graph.labelsProperty) val dgnPackage = branch.toDomainGraphNodePackage val dgnPattern = DomainGraphNodeStandingQueryPattern( dgnPackage.dgnId, returnColumn.formatAsString, returnColumn.aliasedAs, query.includeCancellations, origin, ) (dgnPattern, Some(dgnPackage)) case V1.StandingQueryPattern.StandingQueryMode.MultipleValues => val graphPattern = cypher.compileStandingQueryGraphPattern(cypherQuery)(graph.idProvider, logConfig) val origin = PatternOrigin.GraphPattern(graphPattern, Some(cypherQuery)) if (graphPattern.distinct) throw MultipleValuesCantDistinct val compiledQuery = graphPattern.compiledMultipleValuesStandingQuery(graph.labelsProperty, idProvider) val sqv4Pattern = MultipleValuesQueryPattern(compiledQuery, query.includeCancellations, origin) (sqv4Pattern, None) case V1.StandingQueryPattern.StandingQueryMode.QuinePattern => // QuinePattern mode uses the new parser and planner directly, // bypassing the traditional StandingQueryPatterns validation val maybeIsQPEnabled = for { pv <- Option(System.getProperty("qp.enabled")) b <- pv.toBooleanOption } yield b maybeIsQPEnabled match { case Some(true) => val planned = QueryPlanner.planFromString(cypherQuery) match { case Right(p) => p case Left(error) => sys.error(s"Failed to compile query: $error") } val qpPattern = QuinePatternQueryPattern( planned.plan, RuntimeMode.Lazy, planned.returnColumns, planned.outputNameMapping, ) (qpPattern, None) case _ => sys.error("Quine pattern must be enabled using -Dqp.enabled=true to use this feature.") } } } (dgnPackage match { case Some(p) => graph.dgnRegistry.registerAndPersistDomainGraphNodePackage(p, sqId, skipPersistor = false) case None => Future.unit }).flatMap { _ => graph .standingQueries(inNamespace) .fold(Future.successful(false)) { sqns => // Ignore if namespace is no longer available. val (sq, killSwitches) = sqns.createStandingQuery( queryName, pattern, outputs = sqResultsConsumers, queueBackpressureThreshold = query.inputBufferSize, shouldCalculateResultHashCode = query.shouldCalculateResultHashCode, sqId = sqId, ) sq.query.queryPattern match { case QuinePatternQueryPattern(queryPlanV2, mode, returnColumns, outputNameMapping) => graph.getLoader ! LoadQuery( sq.query.id, queryPlanV2, mode, Map.empty, inNamespace, V2OutputTarget.StandingQuerySink(sq.query.id, inNamespace), returnColumns, outputNameMapping, // `atTime` is `None` by default (current time)—this is where we would // pass in `atTime` for historically aware Standing Queries (if we wanted to do that) ) case _ => // Non-QuinePattern queries don't need additional loading } val outputsWithKillSwitches = query.outputs.map { case (name, out) => name -> OutputTarget.V1(out, killSwitches(name)) } val updatedInnerMap = namespaceTargets + (queryName -> (sq.query.id -> outputsWithKillSwitches)) outputTargets += inNamespace -> updatedInnerMap storeStandingQueryOutputs1().map(_ => true)(ExecutionContext.parasitic) } }(graph.system.dispatcher) } } } } def cancelStandingQueryV2( queryName: String, inNamespace: NamespaceId, ): Future[Option[V2ApiStanding.StandingQuery.RegisteredStandingQuery]] = onlyIfNamespaceExists(inNamespace) { synchronizedFakeFuture(outputTargetsLock) { val cancelledSqState = for { (sqId, outputs: Map[SQOutputName, OutputTarget]) <- outputTargets.get(inNamespace).flatMap(_.get(queryName)) v2Outputs = outputs.collect { case (_, target: OutputTarget.V2) => target.definition } cancelledSq <- graph.standingQueries(inNamespace).flatMap(_.cancelStandingQuery(sqId)) } yield { // Remove key from the inner map: outputTargets += inNamespace -> (outputTargets(inNamespace) - queryName) // Map to return type cancelledSq.map { case (internalSq, startTime, bufferSize) => makeRegisteredStandingQueryV2( internal = internalSq, inNamespace = inNamespace, outputs = v2Outputs.toSeq, startTime = startTime, bufferSize = bufferSize, metrics = graph.metrics, ) }(graph.system.dispatcher) } // must be implicit for cats sequence implicit val applicative: Applicative[Future] = catsStdInstancesForFuture(ExecutionContext.parasitic) cancelledSqState.sequence productL storeStandingQueryOutputs() } } /** Cancels an existing standing query. * * @return Future succeeds/fails when the storing of the updated collection of SQs succeeds/fails. The Option is * `None` when the SQ or namespace doesn't exist. The inner `V1.RegisteredStandingQuery` is the definition of the * successfully removed standing query. */ def cancelStandingQuery( queryName: String, inNamespace: NamespaceId, ): Future[Option[V1.RegisteredStandingQuery]] = onlyIfNamespaceExists(inNamespace) { synchronizedFakeFuture(outputTargetsLock) { val cancelledSqState: Option[Future[V1.RegisteredStandingQuery]] = for { (sqId, outputs) <- outputTargets.get(inNamespace).flatMap(_.get(queryName)) v1Outputs = outputs.collect { case (name, target: OutputTarget.V1) => name -> target.definition } cancelledSq <- graph.standingQueries(inNamespace).flatMap(_.cancelStandingQuery(sqId)) } yield { // Remove key from the inner map: outputTargets += inNamespace -> (outputTargets(inNamespace) - queryName) // Map to return type cancelledSq.map { case (internalSq, startTime, bufferSize) => makeRegisteredStandingQuery( internal = internalSq, inNamespace = inNamespace, outputs = v1Outputs, startTime = startTime, bufferSize = bufferSize, metrics = graph.metrics, ) }(graph.system.dispatcher) } // must be implicit for cats sequence implicit val applicative: Applicative[Future] = catsStdInstancesForFuture(ExecutionContext.parasitic) cancelledSqState.sequence productL storeStandingQueryOutputs() } } private def getSources: Future[Option[List[String]]] = Future.successful(Some(ImproveQuine.sourcesFromIngestStreams(getIngestStreams(defaultNamespaceId)))) private def getSinks: Future[Option[List[String]]] = getStandingQueries(defaultNamespaceId) .map(ImproveQuine.sinksFromStandingQueries)(ExecutionContext.parasitic) .map(Some(_))(ExecutionContext.parasitic) /** Adds a new user-defined output handler to an existing standing query. * * @return Future succeeds/fails when the storing of SQs succeeds/fails. The Option is None when the SQ or * namespace doesn't exist. The Boolean indicates whether an output with that name was successfully added (false if * the out name is already in use). */ def addStandingQueryOutputV2( queryName: String, outputName: String, inNamespace: NamespaceId, workflow: V2ApiStanding.StandingQueryResultWorkflow, ): Future[StandingQueryInterfaceV2.Result] = onlyIfNamespaceExists(inNamespace) { synchronizedFakeFuture(outputTargetsLock) { val optionFut = for { (sqId, outputs) <- outputTargets.get(inNamespace).flatMap(_.get(queryName)) sqResultsHub <- graph.standingQueries(inNamespace).flatMap(_.standingResultsHub(sqId)) } yield if (outputs.contains(outputName)) { Future.successful(StandingQueryInterfaceV2.Result.AlreadyExists(outputName)) } else { ApiToStanding(workflow, inNamespace)(graph, protobufSchemaCache).flatMap { workflowInterpreter => val killSwitch = sqResultsHub .viaMat(KillSwitches.single)(Keep.right) .via(workflowInterpreter.flow(graph)(logConfig)) .map(_ => SqResultsExecToken(s"SQ: $outputName in: $inNamespace")) .to(graph.masterStream.standingOutputsCompletionSink) .run() val updatedInnerMap = outputTargets(inNamespace) + (queryName -> (sqId -> (outputs + (outputName -> OutputTarget.V2(workflow, killSwitch))))) outputTargets += inNamespace -> updatedInnerMap storeStandingQueryOutputs2().map(_ => StandingQueryInterfaceV2.Result.Success)(ExecutionContext.parasitic) }(graph.nodeDispatcherEC) } optionFut.getOrElse(Future.successful(StandingQueryInterfaceV2.Result.NotFound(queryName))) } } /** Adds a new user-defined output handler to an existing standing query. * * @return Future succeeds/fails when the storing of SQs succeeds/fails. The Option is None when the SQ or * namespace doesn't exist. The Boolean indicates whether an output with that name was successfully added (false if * the out name is already in use). */ def addStandingQueryOutput( queryName: String, outputName: String, inNamespace: NamespaceId, sqResultOutput: V1.StandingQueryResultOutputUserDef, ): Future[Option[Boolean]] = onlyIfNamespaceExists(inNamespace) { synchronizedFakeFuture(outputTargetsLock) { val optionFut = for { (sqId, outputs) <- outputTargets.get(inNamespace).flatMap(_.get(queryName)) sqResultsHub <- graph.standingQueries(inNamespace).flatMap(_.standingResultsHub(sqId)) } yield if (outputs.contains(outputName)) { Future.successful(false) } else { // Materialize the new output stream val killSwitch = sqResultsHub.runWith( StandingQueryResultOutput.resultHandlingSink(outputName, inNamespace, sqResultOutput, graph)( protobufSchemaCache, logConfig, ), ) val updatedInnerMap = outputTargets(inNamespace) + (queryName -> (sqId -> (outputs + (outputName -> OutputTarget.V1(sqResultOutput, killSwitch))))) outputTargets += inNamespace -> updatedInnerMap storeStandingQueryOutputs1().map(_ => true)(ExecutionContext.parasitic) } // must be implicit for cats sequence implicit val futureApplicative: Applicative[Future] = catsStdInstancesForFuture(ExecutionContext.parasitic) optionFut.sequence } } def removeStandingQueryOutputV2( queryName: String, outputName: String, inNamespace: NamespaceId, ): Future[Option[V2ApiStanding.StandingQueryResultWorkflow]] = onlyIfNamespaceExists(inNamespace) { synchronizedFakeFuture(outputTargetsLock) { val outputOpt = for { (sqId, outputs) <- outputTargets.get(inNamespace).flatMap(_.get(queryName)) OutputTarget.V2(output, killSwitch) <- outputs.get(outputName) } yield { killSwitch.shutdown() val updatedInnerMap = outputTargets(inNamespace) + (queryName -> (sqId -> (outputs - outputName))) outputTargets += inNamespace -> updatedInnerMap output } storeStandingQueryOutputs2().map(_ => outputOpt)(ExecutionContext.parasitic) Future.successful(outputOpt) } } /** Removes a standing query output handler by name from an existing standing query. * * @return Future succeeds/fails when the storing of SQs succeeds/fails. The Option is None when the SQ or * namespace doesn't exist, or if the SQ does not have an output with that name. The inner * `V1.StandingQueryResultOutputUserDef` is the output that was successfully removes. */ def removeStandingQueryOutput( queryName: String, outputName: String, inNamespace: NamespaceId, ): Future[Option[V1.StandingQueryResultOutputUserDef]] = onlyIfNamespaceExists(inNamespace) { synchronizedFakeFuture(outputTargetsLock) { val outputOpt = for { (sqId, outputs) <- outputTargets.get(inNamespace).flatMap(_.get(queryName)) OutputTarget.V1(output, killSwitch) <- outputs.get(outputName) } yield { killSwitch.shutdown() val updatedInnerMap = outputTargets(inNamespace) + (queryName -> (sqId -> (outputs - outputName))) outputTargets += inNamespace -> updatedInnerMap output } storeStandingQueryOutputs1().map(_ => outputOpt)(ExecutionContext.parasitic) } } def getStandingQueriesV2( inNamespace: NamespaceId, ): Future[List[V2ApiStanding.StandingQuery.RegisteredStandingQuery]] = getStandingQueriesWithNames2(Nil, inNamespace) def getStandingQueryV2( queryName: String, inNamespace: NamespaceId, ): Future[Option[V2ApiStanding.StandingQuery.RegisteredStandingQuery]] = getStandingQueriesWithNames2(List(queryName), inNamespace).map(_.headOption)(graph.system.dispatcher) /** Get standing queries live on the graph with the specified names * * @param queryNames which standing queries to retrieve, empty list corresponds to all SQs * @return queries registered on the graph. Future never fails. List contains each live `V1.RegisteredStandingQuery`. */ private def getStandingQueriesWithNames2( queryNames: List[String], inNamespace: NamespaceId, ): Future[List[V2ApiStanding.StandingQuery.RegisteredStandingQuery]] = onlyIfNamespaceExists(inNamespace) { synchronizedFakeFuture(outputTargetsLock) { val matchingInfo = for { queryName <- queryNames match { case Nil => outputTargets.get(inNamespace).map(_.keys).getOrElse(Iterable.empty) case names => names } (sqId, outputs) <- outputTargets .get(inNamespace) .flatMap(_.get(queryName).map { case (sqId, outputs) => ( sqId, outputs.collect { case (name, out: OutputTarget.V2) => (name, out) }, ) }) (internalSq, startTime, bufferSize) <- graph .standingQueries(inNamespace) .flatMap(_.listStandingQueries.get(sqId)) } yield makeRegisteredStandingQueryV2( internal = internalSq, inNamespace = inNamespace, outputs = outputs.values.map(_.definition).toSeq, startTime = startTime, bufferSize = bufferSize, metrics = graph.metrics, ) Future.successful(matchingInfo.toList) } } def getStandingQueries(inNamespace: NamespaceId): Future[List[V1.RegisteredStandingQuery]] = onlyIfNamespaceExists(inNamespace) { getStandingQueriesWithNames(Nil, inNamespace) } def getStandingQuery(queryName: String, inNamespace: NamespaceId): Future[Option[V1.RegisteredStandingQuery]] = onlyIfNamespaceExists(inNamespace) { getStandingQueriesWithNames(List(queryName), inNamespace).map(_.headOption)(graph.system.dispatcher) } /** Get standing queries live on the graph with the specified names * * @param queryNames which standing queries to retrieve, empty list corresponds to all SQs * @return queries registered on the graph. Future never fails. List contains each live `V1.RegisteredStandingQuery`. */ private def getStandingQueriesWithNames( queryNames: List[String], inNamespace: NamespaceId, ): Future[List[V1.RegisteredStandingQuery]] = onlyIfNamespaceExists(inNamespace) { synchronizedFakeFuture(outputTargetsLock) { val matchingInfo = for { queryName <- queryNames match { case Nil => outputTargets.get(inNamespace).map(_.keys).getOrElse(Iterable.empty) case names => names } (sqId, outputs) <- outputTargets.get(inNamespace).flatMap(_.get(queryName)) v1Outputs = outputs.collect { case (name, target: OutputTarget.V1) => name -> target.definition } (internalSq, startTime, bufferSize) <- graph .standingQueries(inNamespace) .flatMap(_.listStandingQueries.get(sqId)) } yield makeRegisteredStandingQuery( internalSq, inNamespace, v1Outputs, startTime, bufferSize, graph.metrics, ) Future.successful(matchingInfo.toList) } } def getStandingQueryIdV2(queryName: String, inNamespace: NamespaceId): Option[StandingQueryId] = noneIfNoNamespace(inNamespace) { outputTargets.get(inNamespace).flatMap(_.get(queryName)).map(_._1) } def getStandingQueryId(queryName: String, inNamespace: NamespaceId): Option[StandingQueryId] = noneIfNoNamespace(inNamespace) { outputTargets.get(inNamespace).flatMap(_.get(queryName)).map(_._1) } def registerTerminationHooks(name: String, metrics: IngestMetrics)(ec: ExecutionContext): Future[Done] => Unit = { termSignal => termSignal.onComplete { case Failure(err) => val now = Instant.now metrics.stop(now) logger.error( log"Ingest stream '${Safe(name)}' has failed after ${Safe(metrics.millisSinceStart(now))}ms" withException err, ) case Success(_) => val now = Instant.now metrics.stop(now) logger.info( safe"Ingest stream '${Safe(name)}' successfully completed after ${Safe(metrics.millisSinceStart(now))}ms", ) }(ec) } val protobufSchemaCache: ProtobufSchemaCache = new ProtobufSchemaCache.AsyncLoading(graph.dispatchers) val avroSchemaCache: AvroSchemaCache = new AvroSchemaCache.AsyncLoading(graph.dispatchers) def addIngestStream( name: String, settings: V1.IngestStreamConfiguration, intoNamespace: NamespaceId, previousStatus: Option[V1.IngestStreamStatus], // previousStatus is None if stream was not restored at all shouldResumeRestoredIngests: Boolean, timeout: Timeout, shouldSaveMetadata: Boolean = true, memberIdx: Option[MemberIdx] = Some(thisMemberIdx), ): Try[Boolean] = failIfNoNamespace(intoNamespace) { val isQPEnabled = sys.props.get("qp.enabled").flatMap(_.toBooleanOption) getOrElse false settings match { case fileIngest: V1.FileIngest => fileIngest.format match { case _: V1.FileIngestFormat.QuinePatternLine => if (!isQPEnabled) { sys.error("To use this experimental feature, you must set the `qp.enabled` property to `true`.") } case _: V1.FileIngestFormat.QuinePatternJson => if (!isQPEnabled) { sys.error("To use this experimental feature, you must set the `qp.enabled` property to `true`.") } case _ => logger.trace(safe"Not using QuinePattern") } case _ => logger.trace(safe"Not using QuinePattern") } blocking(ingestStreamsLock.synchronized { ingestStreams.get(intoNamespace) match { case None => Success(false) case Some(ingests) if ingests.contains(name) => Success(false) case Some(ingests) => val (initialValveSwitchMode, initialStatus) = previousStatus match { case None => // This is a freshly-created ingest, so there is no status to restore SwitchMode.Open -> V1.IngestStreamStatus.Running case Some(lastKnownStatus) => val newStatus = V1.IngestStreamStatus.decideRestoredStatus(lastKnownStatus, shouldResumeRestoredIngests) val switchMode = newStatus.position match { case V1.ValvePosition.Open => SwitchMode.Open case V1.ValvePosition.Closed => SwitchMode.Close } switchMode -> newStatus } val src: ValidatedNel[IngestName, QuineIngestSource] = IngestSrcDef .createIngestSrcDef( name, intoNamespace, settings, initialValveSwitchMode, fileAccessPolicy, )(graph, protobufSchemaCache, logConfig) src .leftMap(errs => V1.IngestStreamConfiguration.InvalidStreamConfiguration(errs)) .map { ingestSrcDef => val metrics = IngestMetrics(Instant.now, None, ingestSrcDef.meter) val ingestSrc = ingestSrcDef.stream( intoNamespace, registerTerminationHooks = registerTerminationHooks(name, metrics)(graph.nodeDispatcherEC), ) val streamDefWithControl: IngestStreamWithControl[UnifiedIngestConfiguration] = IngestStreamWithControl( UnifiedIngestConfiguration(Right(settings)), metrics, () => ingestSrcDef.getControl.map(_.valveHandle)(ExecutionContext.parasitic), () => ingestSrcDef.getControl.map(_.termSignal)(ExecutionContext.parasitic), close = () => { ingestSrcDef.getControl.flatMap(c => c.terminate())(ExecutionContext.parasitic) () // Intentional fire and forget }, initialStatus, ) val newNamespaceIngests = ingests + (name -> streamDefWithControl) ingestStreams += intoNamespace -> newNamespaceIngests ingestSrc.runWith(graph.masterStream.ingestCompletionsSink) if (shouldSaveMetadata) Await.result( syncIngestStreamsMetaData(thisMemberIdx), timeout.duration, ) true } .toEither .toTry } }) } /** Create ingest stream using updated V2 Ingest api. */ override def addV2IngestStream( name: IngestName, settings: QuineIngestConfiguration, intoNamespace: NamespaceId, timeout: Timeout, memberIdx: MemberIdx, )(implicit logConfig: LogConfig): Future[Either[Seq[String], Unit]] = Future.successful { invalidIfNoNamespace(intoNamespace) { blocking(ingestStreamsLock.synchronized { val meter = IngestMetered.ingestMeter(intoNamespace, name, graph.metrics) val metrics = IngestMetrics(Instant.now, None, meter) val validatedSrc = createV2IngestSource( name, settings, intoNamespace, None, shouldResumeRestoredIngests = false, // This is always a new ingest, so this shouldn't matter metrics, meter, graph, )(protobufSchemaCache, avroSchemaCache, logConfig) validatedSrc.map { quineIngestSrc => val streamSource = quineIngestSrc.stream( intoNamespace, registerTerminationHooks(name, metrics)(graph.nodeDispatcherEC), ) streamSource.runWith(graph.masterStream.ingestCompletionsSink) Await.result( syncIngestStreamsMetaData(memberIdx), timeout.duration, ) Right(()) } }) }.fold( errors => Left(errors.map(err => err.getMessage).toNev.toVector), success => success, ) } override def createV2IngestStream( name: IngestName, settings: QuineIngestConfiguration, intoNamespace: NamespaceId, timeout: Timeout, )(implicit logConfig: LogConfig): ValidatedNel[BaseError, Unit] = invalidIfNoNamespace(intoNamespace) { blocking(ingestStreamsLock.synchronized { val meter = IngestMetered.ingestMeter(intoNamespace, name, graph.metrics) val metrics = IngestMetrics(Instant.now, None, meter) val validatedSrc = createV2IngestSource( name, settings, intoNamespace, previousStatus = None, shouldResumeRestoredIngests = false, metrics = metrics, meter = meter, graph = graph, )(protobufSchemaCache, avroSchemaCache, logConfig) validatedSrc.map { quineIngestSrc => val streamSource = quineIngestSrc.stream( intoNamespace, registerTerminationHooks(name, metrics)(graph.nodeDispatcherEC), ) streamSource.runWith(graph.masterStream.ingestCompletionsSink) Await.result( syncIngestStreamsMetaData(thisMemberIdx), timeout.duration, ) () } }) } override def restoreV2IngestStream( name: String, settings: QuineIngestConfiguration, intoNamespace: NamespaceId, previousStatus: Option[IngestStreamStatus], shouldResumeRestoredIngests: Boolean, timeout: Timeout, thisMemberIdx: MemberIdx, )(implicit logConfig: LogConfig): ValidatedNel[BaseError, Unit] = invalidIfNoNamespace(intoNamespace) { blocking(ingestStreamsLock.synchronized { val meter = IngestMetered.ingestMeter(intoNamespace, name, graph.metrics) val metrics = IngestMetrics(Instant.now, None, meter) val validatedSrc = createV2IngestSource( name, settings, intoNamespace, previousStatus, shouldResumeRestoredIngests, metrics, meter, graph, )(protobufSchemaCache, avroSchemaCache, logConfig) validatedSrc.map { quineIngestSrc => val streamSource = quineIngestSrc.stream( intoNamespace, registerTerminationHooks(name, metrics)(graph.nodeDispatcherEC), ) streamSource.runWith(graph.masterStream.ingestCompletionsSink) () } }) } def getV2IngestStream( name: String, namespace: NamespaceId, memberIdx: MemberIdx, )(implicit logConfig: LogConfig): Future[Option[V2IngestEntities.IngestStreamInfoWithName]] = getIngestStreamFromState(name, namespace) .fold[Future[Option[V2IngestEntities.IngestStreamInfoWithName]]](Future.successful(None))(stream => unifiedIngestStreamToInternalModel(stream).map( _.map(_.withName(name)), )(ExecutionContext.parasitic), ) def getIngestStreams(namespace: NamespaceId): Map[String, IngestStreamWithControl[V1.IngestStreamConfiguration]] = if (getNamespaces.contains(namespace)) getIngestStreamsFromState(namespace).view .mapValues(isc => isc.copy(settings = isc.settings.asV1Config)) .toMap else Map.empty def getV2IngestStreams( namespace: NamespaceId, memberIdx: MemberIdx, ): Future[Map[String, V2IngestEntities.IngestStreamInfo]] = if (getNamespaces.contains(namespace)) Future .traverse(getIngestStreamsFromState(namespace).toSeq) { case (name, isc) => unifiedIngestStreamToInternalModel(isc).map(maybeInfo => name -> maybeInfo)(ExecutionContext.parasitic) }(implicitly, ExecutionContext.parasitic) .map(mapWithOptions => mapWithOptions.collect { case (name, Some(info)) => name -> info }.toMap)( graph.nodeDispatcherEC, ) else Future.successful(Map.empty) protected def getIngestStreamsWithStatus( namespace: NamespaceId, ): Future[Map[IngestName, Either[V1.IngestStreamWithStatus, QuineIngestStreamWithStatus]]] = onlyIfNamespaceExists(namespace) { implicit val ec: ExecutionContext = graph.nodeDispatcherEC getIngestStreamsFromState(namespace).toList .traverse { case (name, isc) => for { status <- isc.status } yield ( name, { isc.settings.config match { case Left(v2Settings) => Right(QuineIngestStreamWithStatus(v2Settings, Some(status))) case Right(v1Settings) => Left(V1.IngestStreamWithStatus(v1Settings, Some(status))) } }, ) } .map(_.toMap)(ExecutionContext.parasitic) } private def syncIngestStreamsMetaData(thisMemberId: Int): Future[Unit] = { import Secret.Unsafe._ implicit val ec: ExecutionContext = graph.nodeDispatcherEC Future .sequence( getNamespaces.map(namespace => for { streamsWithStatus <- getIngestStreamsWithStatus(namespace) (v1StreamsWithStatus, v2StreamsWithStatus) = streamsWithStatus.partitionMap { case (name, Left(v1)) => Left((name, v1)) case (name, Right(v2)) => Right((name, v2)) } _ <- storeLocalMetaData[Map[String, V1.IngestStreamWithStatus]]( makeNamespaceMetaDataKey(namespace, IngestStreamsKey), thisMemberId, v1StreamsWithStatus.toMap, ) _ <- saveV2IngestsToPersistor( namespace, thisMemberId, v2StreamsWithStatus.toMap, )(QuinePreservingCodecs.ingestStreamWithStatusCodec) } yield (), ), ) .map(_ => ()) } def removeIngestStream( name: String, namespace: NamespaceId, ): Option[IngestStreamWithControl[V1.IngestStreamConfiguration]] = noneIfNoNamespace(namespace) { Try { blocking(ingestStreamsLock.synchronized { ingestStreams.get(namespace).flatMap(_.get(name)).map { stream => ingestStreams += namespace -> (ingestStreams(namespace) - name) Await.result( syncIngestStreamsMetaData(thisMemberIdx), QuineApp.ConfigApiTimeout, ) stream } }) }.toOption.flatten.map(isc => isc.copy(settings = isc.settings.asV1Config)) } def removeV2IngestStream( name: String, namespace: NamespaceId, memberIdx: MemberIdx, ): Future[Option[V2IngestEntities.IngestStreamInfoWithName]] = graph.requiredGraphIsReadyFuture { blocking(ingestStreamsLock.synchronized { ingestStreams .get(namespace) .flatMap(_.get(name)) .map { stream => val finalStatusFut = stream.status.map(determineFinalStatus)(ExecutionContext.parasitic) val terminationFut = terminateIngestStream(stream) ingestStreams += namespace -> (ingestStreams(namespace) - name) syncIngestStreamsMetaData(thisMemberIdx) .flatMap(_ => finalStatusFut .zip(terminationFut) .flatMap { case (finalStatus, maybeErr) => unifiedIngestStreamToInternalModel(stream) .map( _.map(_.withName(name).copy(status = V1ToV2(finalStatus), message = maybeErr)), )(ExecutionContext.parasitic) }(ExecutionContext.parasitic), )(graph.nodeDispatcherEC) } .fold(ifEmpty = Future.successful[Option[V2IngestEntities.IngestStreamInfoWithName]](None))(identity) }) } def pauseV2IngestStream( name: String, namespace: NamespaceId, memberIdx: MemberIdx, ): Future[Option[V2IngestEntities.IngestStreamInfoWithName]] = graph.requiredGraphIsReadyFuture { setIngestStreamPauseState(name, namespace, SwitchMode.Close) } def unpauseV2IngestStream( name: String, namespace: NamespaceId, memberIdx: MemberIdx, ): Future[Option[V2IngestEntities.IngestStreamInfoWithName]] = graph.requiredGraphIsReadyFuture { setIngestStreamPauseState(name, namespace, SwitchMode.Open) } /** == Utilities == */ private def stopAllIngestStreams(): Future[Unit] = { implicit val ec: ExecutionContext = graph.nodeDispatcherEC Future .traverse(ingestStreams.toList) { case (ns, ingestMap) => Future.sequence(ingestMap.map { case (name, ingest) => IngestMetered.removeIngestMeter(ns, name, graph.metrics) ingest.close() ingest.terminated().recover { case _ => Future.successful(Done) } }) }(implicitly, graph.system.dispatcher) .map(_ => ())(graph.system.dispatcher) } /** Report telemetry unless the user has opted out. * This needs to be loaded after the webserver is started; if not, the initial telemetry * startup message may not get sent. * * @param testOnlyImproveQuine ⚠️ only for testing: this [unfortunate] approach makes it possible, * with limited refactoring, to observe the effects of an [[ImproveQuine]] * class when the relationship between it and the Quine App is the * effectful relationship under test */ private def initializeTelemetry(testOnlyImproveQuine: Option[ImproveQuine]): Unit = if (helpMakeQuineBetter) { val iq = testOnlyImproveQuine.getOrElse { new ImproveQuine( service = "Quine", version = BuildInfo.version, persistorSlug = graph.namespacePersistor.slug, getSources = () => getSources, getSinks = () => getSinks, recipe = recipe, recipeCanonicalName = recipeCanonicalName, )(system = graph.system, logConfig = logConfig) } iq.startTelemetry() } /** Notifies this Quine App that the web server has started. * Intended to enable the App to execute tasks that are not * safe to execute until the web server has started. * * @param testOnlyImproveQuine ⚠️ only for testing: this [unfortunate] approach makes it possible, * with limited refactoring, to observe the effects of an [[ImproveQuine]] * class when the relationship between it and the Quine App is the * effectful relationship under test */ def notifyWebServerStarted(testOnlyImproveQuine: Option[ImproveQuine] = None): Unit = initializeTelemetry(testOnlyImproveQuine) /** Prepare for a shutdown */ def shutdown()(implicit ec: ExecutionContext): Future[Unit] = for { _ <- syncIngestStreamsMetaData(thisMemberIdx) _ <- stopAllIngestStreams() // ... but don't update what is saved to disk } yield () def restoreNonDefaultNamespacesFromMetaData(implicit ec: ExecutionContext): Future[Unit] = getOrDefaultGlobalMetaData(NonDefaultNamespacesKey, List.empty[String]) .flatMap { nss => validateNamespaceNames(nss) Future.traverse(nss)(n => createNamespace(namespaceFromString(n), shouldWriteToPersistor = false)) } .map(rs => require(rs.forall(identity), "Some namespaces could not be restored from persistence.")) /** Load all the state from the persistor * * Not threadsafe, but we wait for this to complete before serving up the API. * * @param timeout used repeatedly for individual calls to get metadata when restoring ingest streams. * @param shouldResumeIngest should restored ingest streams be resumed * @return A Future that success/fails indicating whether or not state was successfully restored (if any). */ def loadAppData(timeout: Timeout, shouldResumeIngest: Boolean): Future[Unit] = { implicit val ec: ExecutionContext = graph.system.dispatcher val sampleQueriesFut = getOrDefaultGlobalMetaData(SampleQueriesKey, V1.SampleQuery.defaults) val quickQueriesFut = getOrDefaultGlobalMetaData(QuickQueriesKey, V1.UiNodeQuickQuery.defaults) val nodeAppearancesFut = getOrDefaultGlobalMetaData(NodeAppearancesKey, V1.UiNodeAppearance.defaults) // Register all user-defined procedures that require app/graph information (the rest will be loaded // when the first query is compiled by the [[resolveCalls]] step of the Cypher compilation pipeline) registerUserDefinedProcedure( new CypherParseProtobuf(protobufSchemaCache), ) registerUserDefinedProcedure( new CypherToProtobuf(protobufSchemaCache), ) registerUserDefinedProcedure( new CypherStandingWiretap((queryName, namespace) => getStandingQueryId(queryName, namespace)), ) val standingQueryOutputsFut = { import Secret.Unsafe._ Future .sequence( getNamespaces.map(ns => getOrDefaultGlobalMetaData( makeNamespaceMetaDataKey(ns, StandingQueryOutputsKey), Map.empty: V1StandingQueryDataMap, )(sqOutputs1MapPersistenceCodec).map(ns -> _), ), ) .map(_.toMap) } val standingQueryOutputs2DataFut = { import Secret.Unsafe._ Future .sequence( getNamespaces.map(ns => getOrDefaultGlobalMetaData( makeNamespaceMetaDataKey(ns, V2StandingQueryOutputsKey), Map.empty: V2StandingQueryDataMap, )(sqOutputs2PersistenceCodec).map(ns -> _), ), ) .map(_.toMap) } // Constructing an output 2 interpreter is asynchronous. It is chained onto the async read of the data version // rather than done as a synchronous step afterward like it is for the V1 outputs. val standingQueryOutput2Fut = standingQueryOutputs2DataFut.flatMap { nsMap => Future .traverse(nsMap.toVector) { case (ns, queryOutputs) => val queriesWithResultHubs = queryOutputs .map { case (queryName, (sqId, outputToWorkflowDef)) => (queryName, sqId, outputToWorkflowDef, graph.standingQueries(ns).flatMap(_.standingResultsHub(sqId))) } .collect { case (queryName, sqId, outputToWorkflowDef, Some(resultHub)) => (queryName, sqId, outputToWorkflowDef, resultHub) } Future .traverse(queriesWithResultHubs.toVector) { case (queryName, sqId, outputToWorkflowDef, resultHub) => Future .traverse(outputToWorkflowDef.toVector) { case (outputName, workflowDef) => ApiToStanding(workflowDef, ns)(graph, protobufSchemaCache).map { workflowInterpreter => val killSwitch = resultHub .viaMat(KillSwitches.single)(Keep.right) .via(workflowInterpreter.flow(graph)(logConfig)) .map(_ => SqResultsExecToken(s"SQ: $outputName in: $ns")) .to(graph.masterStream.standingOutputsCompletionSink) .run() outputName -> OutputTarget.V2(workflowDef, killSwitch) } } .map { outputNameToV2TargetPairs => val outputsMap = outputNameToV2TargetPairs.toMap queryName -> (sqId, outputsMap) } } .map(queryNameToSqIdAndOutputTargetPairs => ns -> queryNameToSqIdAndOutputTargetPairs.toMap) } .map(nsToQueryOutput2TargetPairs => nsToQueryOutput2TargetPairs.toMap) } val ingestStreamFut = Future .sequence( getNamespaces.map(ns => getOrDefaultLocalMetaDataWithFallback[Map[IngestName, V1.IngestStreamWithStatus], Map[ IngestName, V1.IngestStreamConfiguration, ]]( makeNamespaceMetaDataKey(ns, IngestStreamsKey), thisMemberIdx, Map.empty[IngestName, V1.IngestStreamWithStatus], _.view.mapValues(i => V1.IngestStreamWithStatus(config = i, status = None)).toMap, ).map(v => ns -> v), ), ) .map(_.toMap) val v2IngestStreamFut = { import Secret.Unsafe._ loadV2IngestsFromPersistor(thisMemberIdx)( QuinePreservingCodecs.ingestStreamWithStatusCodec, implicitly, ) } for { sq <- sampleQueriesFut qq <- quickQueriesFut na <- nodeAppearancesFut so <- standingQueryOutputsFut so2 <- standingQueryOutput2Fut is <- ingestStreamFut is2 <- v2IngestStreamFut } yield { sampleQueries = sq quickQueries = qq nodeAppearances = na // Note: SQs on _the graph_ are restored and started during GraphService initialization. // This sections restores the external handler for those results that publishes to outside systems. val v1OutputNamespaces = so.flatMap { case (namespace, outputTarget) => graph .standingQueries(namespace) .map { sqns => // Silently ignores any SQs in an absent namespace. val restoredOutputTargets = outputTarget .map { case (sqName, (sqId, outputsStored)) => (sqName, (sqId, outputsStored, sqns.standingResultsHub(sqId))) } .collect { case (sqName, (sqId, outputsStored, Some(sqResultSource))) => val outputs = outputsStored.map { case (outputName, sqResultOutput) => // Attach the SQ result source to each consumer and track completion tokens in the masterStream val killSwitch = sqResultSource.runWith( StandingQueryResultOutput.resultHandlingSink(outputName, namespace, sqResultOutput, graph)( protobufSchemaCache, logConfig, ), ) outputName -> OutputTarget.V1(sqResultOutput, killSwitch) } sqName -> (sqId -> outputs) } Map(namespace -> restoredOutputTargets) } .getOrElse(Map.empty) } outputTargets = mergeOutputNamespaces(v1OutputNamespaces, so2) is.foreach { case (namespace, ingestMap) => ingestMap.foreach { case (name, ingest) => addIngestStream( name, ingest.config, namespace, previousStatus = ingest.status, shouldResumeIngest, timeout, shouldSaveMetadata = false, // We're restoring what was saved. Some(thisMemberIdx), ) match { case Success(true) => () case Success(false) => logger.error( safe"Duplicate ingest stream attempted to start with name: ${Safe(name)} and settings: ${ingest.config}", ) case Failure(e) => logger.error( log"Error when restoring ingest stream: ${Safe(name)} with settings: ${ingest.config}" withException e, ) } } } is2.foreach { case (namespace, ingestMap) => ingestMap.foreach { case (name, ingest) => // Use the FileAccessPolicy that was computed at app startup // This validates restored ingests against the current configuration restoreV2IngestStream( name, ingest.config, namespace, previousStatus = ingest.status, shouldResumeRestoredIngests = shouldResumeIngest, timeout = timeout, thisMemberIdx = thisMemberIdx, ) match { case Validated.Valid(_) => () case Validated.Invalid(e) => logger.error( log"Errors when restoring ingest stream: ${Safe(name)} with settings: ${ingest.config}" withException e.head, ) } } } } } private[this] def storeStandingQueryOutputs(): Future[Unit] = { storeStandingQueryOutputs1() storeStandingQueryOutputs2() } private[this] def storeStandingQueryOutputs1(): Future[Unit] = { import Secret.Unsafe._ implicit val ec = graph.system.dispatcher Future .sequence(outputTargets.map { case (ns, targets) => storeGlobalMetaData( makeNamespaceMetaDataKey(ns, StandingQueryOutputsKey), targets.map { case (name, (id, outputsMap)) => name -> (id -> outputsMap.collect { case (outputName, OutputTarget.V1(definition, _)) => outputName -> definition }) }, )(sqOutputs1MapPersistenceCodec) }) .map(_ => ())(ExecutionContext.parasitic) } private[this] def storeStandingQueryOutputs2(): Future[Unit] = { import Secret.Unsafe._ implicit val ec = graph.system.dispatcher Future .sequence(outputTargets.map { case (ns, targets) => storeGlobalMetaData( makeNamespaceMetaDataKey(ns, V2StandingQueryOutputsKey), targets.map { case (name, (id, outputsMap)) => name -> (id -> outputsMap.collect { case (outputName, OutputTarget.V2(definition, _)) => outputName -> definition }) }, )(sqOutputs2PersistenceCodec) }) .map(_ => ())(ExecutionContext.parasitic) } } object QuineApp { final val VersionKey = "quine_app_state_version" final val SampleQueriesKey = "sample_queries" final val QuickQueriesKey = "quick_queries" final val NodeAppearancesKey = "node_appearances" final val StandingQueryOutputsKey = "standing_query_outputs" final val V2StandingQueryOutputsKey = "v2_standing_query_outputs" final val IngestStreamsKey = "ingest_streams" final val V2IngestStreamsKey = "v2_ingest_streams" final val NonDefaultNamespacesKey = "live_namespaces" final val ThrottleMasterStreamKey = "throttle_master_stream" final val DisableThrottleMasterStreamKey = "disable_throttle_master_stream" type FriendlySQName = String type SQOutputName = String import com.thatdot.quine.app.StandingQueryResultOutput.OutputTarget private type OutputTargetsV1 = Map[SQOutputName, OutputTarget.V1] private type QueryOutputTargetsV1 = Map[FriendlySQName, (StandingQueryId, OutputTargetsV1)] private type NamespaceOutputTargetsV1 = Map[NamespaceId, QueryOutputTargetsV1] private type OutputTargetsV2 = Map[SQOutputName, OutputTarget.V2] private type QueryOutputTargetsV2 = Map[FriendlySQName, (StandingQueryId, OutputTargetsV2)] private type NamespaceOutputTargetsV2 = Map[NamespaceId, QueryOutputTargetsV2] private type OutputTargets = Map[SQOutputName, OutputTarget] private type QueryOutputTargets = Map[FriendlySQName, (StandingQueryId, OutputTargets)] private type NamespaceOutputTargets = Map[NamespaceId, QueryOutputTargets] import com.thatdot.quine.app.v2api.{definitions => Api2Defs} private type V2StandingQueryDataMap = Map[FriendlySQName, (StandingQueryId, Map[SQOutputName, Api2Defs.query.standing.StandingQueryResultWorkflow])] /** Type alias for V1 standing query data map (matches the type used in persistence). */ private[app] type V1StandingQueryDataMap = Map[FriendlySQName, (StandingQueryId, Map[SQOutputName, V1.StandingQueryResultOutputUserDef])] // `StandingQueryId` is in `quine-core` where we shouldn't have codec concerns. // Circe codecs defined here for use by persistence and cluster communication. private[app] val standingQueryIdEncoder: io.circe.Encoder[StandingQueryId] = io.circe.Encoder[UUID].contramap(_.uuid) private[app] val standingQueryIdDecoder: io.circe.Decoder[StandingQueryId] = io.circe.Decoder[UUID].map(StandingQueryId(_)) /** Codec for persistence of V1 standing query outputs. * Uses preserving encoder so credentials survive round-trip (not redacted). * Requires witness (`import Secret.Unsafe._`) to call, making unsafe access explicit at call sites. */ def sqOutputs1PersistenceCodec(implicit ev: Secret.UnsafeAccess, ): EncoderDecoder[V1.StandingQueryResultOutputUserDef] = { val preservingSchema = V1.PreservingStandingQuerySchemas.standingQueryResultOutputSchema new EncoderDecoder[V1.StandingQueryResultOutputUserDef] { override def encoder: io.circe.Encoder[V1.StandingQueryResultOutputUserDef] = preservingSchema.encoder override def decoder: io.circe.Decoder[V1.StandingQueryResultOutputUserDef] = preservingSchema.decoder } } /** Codec for persistence of V1 standing query data map (full persistence type). * Uses preserving encoder so credentials survive round-trip (not redacted). * Requires witness (`import Secret.Unsafe._`) to call, making unsafe access explicit at call sites. */ def sqOutputs1MapPersistenceCodec(implicit ev: Secret.UnsafeAccess): EncoderDecoder[V1StandingQueryDataMap] = { // Schema derivation context with preserving output schema and genericRecord for StandingQueryId/tuples object Schemas extends V1.StandingQuerySchemas with endpoints4s.circe.JsonSchemas with endpoints4s.generic.JsonSchemas with com.thatdot.quine.routes.exts.CirceJsonAnySchema { // Override to preserve credentials (not redact) implicit override lazy val secretSchema: JsonSchema[Secret] = stringJsonSchema(format = None).xmap(Secret.apply)(_.unsafeValue) // Re-derive schemas that depend on secretSchema implicit override lazy val awsCredentialsSchema: Record[V1.AwsCredentials] = genericRecord[V1.AwsCredentials] implicit override lazy val standingQueryResultOutputSchema: Tagged[V1.StandingQueryResultOutputUserDef] = lazyTagged(V1.StandingQueryResultOutputUserDef.title)(genericTagged[V1.StandingQueryResultOutputUserDef]) // Derive using genericRecord, matching the original pre-22f99d13f format implicit val sqIdSchema: Record[StandingQueryId] = genericRecord[StandingQueryId] implicit val tupSchema: Record[(StandingQueryId, Map[SQOutputName, V1.StandingQueryResultOutputUserDef])] = genericRecord[(StandingQueryId, Map[SQOutputName, V1.StandingQueryResultOutputUserDef])] val mapSchema: JsonSchema[V1StandingQueryDataMap] = mapJsonSchema(tupSchema) } EncoderDecoder.ofEncodeDecode(Schemas.mapSchema.encoder, Schemas.mapSchema.decoder) } /** Codec for persistence of V2 standing query outputs. * Uses preserving encoder so credentials survive round-trip (not redacted). * Requires witness (`import Secret.Unsafe._`) to call. */ def sqOutputs2PersistenceCodec(implicit ev: Secret.UnsafeAccess): EncoderDecoder[V2StandingQueryDataMap] = { import io.circe.{Decoder, Encoder} implicit val workflowEnc: Encoder[Api2Defs.query.standing.StandingQueryResultWorkflow] = Api2Defs.query.standing.StandingQueryResultWorkflow.preservingEncoder implicit val sqIdEnc: Encoder[StandingQueryId] = standingQueryIdEncoder implicit val sqIdDec: Decoder[StandingQueryId] = standingQueryIdDecoder EncoderDecoder.ofEncodeDecode } /** Maps the default namespace to the bare metadata key and other namespaces to that key concatenated with a hyphen * * @see GlobalPersistor.setLocalMetaData for where a local identifier is prepended to these keys with a hyphen. */ def makeNamespaceMetaDataKey(namespace: NamespaceId, basedOnKey: String): String = // Example storage keys: "standing_query_outputs-myNamespace" or for default: "standing_query_outputs" basedOnKey + namespace.fold("")(_ => "-" + namespaceToString(namespace)) // the maximum time to allow a configuring API call (e.g., "add ingest query" or "update node appearances") to execute final val ConfigApiTimeout: FiniteDuration = 30.seconds /** Aggressively synchronize a unit of work returning a Future, and block on the Future's completion * * Multiple executions of synchronizedFakeFuture are guaranteed to not interleave any effects represented by their * arguments. This is used to ensure that local and persisted effects within `synchronizeMe` are fully applied * without interleaving. For certain persistors, such as Cassandra, synchronization (without an Await) would be * sufficient, because the Cassandra persistor guarantees that effects started in sequence will be applied in the * same sequence. * * NB while this does inherit the reentrance properties of `synchronized`, this function might still be prone to * deadlocking! Use with *extreme* caution! */ private[app] def synchronizedFakeFuture[T](lock: AnyRef)(synchronizeMe: => Future[T]): Future[T] = blocking( lock.synchronized( Await.ready(synchronizeMe: Future[T], QuineApp.ConfigApiTimeout), ), ) /** Version to track schemas saved by Quine app state * * Remember to increment this if schemas in Quine app state evolve in * backwards incompatible ways. */ final val CurrentPersistenceVersion: Version = Version(1, 2, 0) def quineAppIsEmpty(persistenceAgent: PrimePersistor): Future[Boolean] = { val metaDataKeys = List(SampleQueriesKey, QuickQueriesKey, NodeAppearancesKey, StandingQueryOutputsKey, IngestStreamsKey) Future.foldLeft( metaDataKeys.map(k => persistenceAgent.getMetaData(k).map(_.isEmpty)(ExecutionContext.parasitic)), )(true)(_ && _)(ExecutionContext.parasitic) } import com.thatdot.quine._ /** Aggregate Quine SQ outputs and Quine standing query into a user-facing SQ * * @note this includes only local information/metrics! * @param internal Quine representation of the SQ * @param outputs SQ outputs registered on the query * @param startTime when the query was started (or re-started) * @param bufferSize number of elements buffered in the SQ output queue * @param metrics Quine metrics object */ private def makeRegisteredStandingQuery( internal: StandingQueryInfo, inNamespace: NamespaceId, outputs: Map[String, V1.StandingQueryResultOutputUserDef], startTime: Instant, bufferSize: Int, metrics: HostQuineMetrics, ): V1.RegisteredStandingQuery = { val mode = internal.queryPattern match { case _: graph.StandingQueryPattern.DomainGraphNodeStandingQueryPattern => V1.StandingQueryPattern.StandingQueryMode.DistinctId case _: graph.StandingQueryPattern.MultipleValuesQueryPattern => V1.StandingQueryPattern.StandingQueryMode.MultipleValues case _: graph.StandingQueryPattern.QuinePatternQueryPattern => V1.StandingQueryPattern.StandingQueryMode.QuinePattern } val pattern = internal.queryPattern.origin match { case graph.PatternOrigin.GraphPattern(_, Some(cypherQuery)) => Some(V1.StandingQueryPattern.Cypher(cypherQuery, mode)) case _ => None } val meter = metrics.standingQueryResultMeter(inNamespace, internal.name) val outputHashCode = metrics.standingQueryResultHashCode(internal.id) V1.RegisteredStandingQuery( internal.name, internal.id.uuid, pattern, outputs, internal.queryPattern.includeCancellation, internal.queueBackpressureThreshold, stats = Map( "local" -> V1.StandingQueryStats( rates = V1.RatesSummary( count = meter.getCount, oneMinute = meter.getOneMinuteRate, fiveMinute = meter.getFiveMinuteRate, fifteenMinute = meter.getFifteenMinuteRate, overall = meter.getMeanRate, ), startTime, MILLIS.between(startTime, Instant.now()), bufferSize, outputHashCode.sum.toString, ), ), ) } /** Aggregate Quine SQ outputs and Quine standing query into a user-facing SQ, V2 * * @note this includes only local information/metrics! * @param internal Quine representation of the SQ * @param outputs SQ outputs registered on the query * @param startTime when the query was started (or re-started) * @param bufferSize number of elements buffered in the SQ output queue * @param metrics Quine metrics object */ private def makeRegisteredStandingQueryV2( internal: StandingQueryInfo, inNamespace: NamespaceId, outputs: Seq[V2ApiStanding.StandingQueryResultWorkflow], startTime: Instant, bufferSize: Int, metrics: HostQuineMetrics, ): V2ApiStanding.StandingQuery.RegisteredStandingQuery = { // TODO Make a decision here about return type; // - should callers manage getting this to a "data model" representation? // - should this simply return the "data model" representation? // - should this return an envelope of (spec, status, meta)? // > honestly, is "registered" anything but a piece of "status" or "meta" on otherwise the same spec? // Note that the near-equivalent of this work in QEApp is in getStandingQueriesWithNames2; it does not _need_ the // same question to be answered, since there's no extraction like this, it just doesn't need to transform the data // model back into the "internal [object] model" anymore. val mode = internal.queryPattern match { case _: graph.StandingQueryPattern.DomainGraphNodeStandingQueryPattern => V2ApiStanding.StandingQueryPattern.StandingQueryMode.DistinctId case _: graph.StandingQueryPattern.MultipleValuesQueryPattern => V2ApiStanding.StandingQueryPattern.StandingQueryMode.MultipleValues case _: graph.StandingQueryPattern.QuinePatternQueryPattern => V2ApiStanding.StandingQueryPattern.StandingQueryMode.QuinePattern } val pattern: Option[V2ApiStanding.StandingQueryPattern] = internal.queryPattern.origin match { case graph.PatternOrigin.GraphPattern(_, Some(cypherQuery)) => Some(V2ApiStanding.StandingQueryPattern.Cypher(cypherQuery, mode)) case _ => None } val meter = metrics.standingQueryResultMeter(inNamespace, internal.name) val outputHashCode = metrics.standingQueryResultHashCode(internal.id) V2ApiStanding.StandingQuery.RegisteredStandingQuery( name = internal.name, internalId = internal.id.uuid, pattern = pattern, outputs = outputs, includeCancellations = internal.queryPattern.includeCancellation, inputBufferSize = internal.queueBackpressureThreshold, stats = Map( "local" -> V2ApiStanding.StandingQueryStats( rates = Api2.RatesSummary( count = meter.getCount, oneMinute = meter.getOneMinuteRate, fiveMinute = meter.getFiveMinuteRate, fifteenMinute = meter.getFifteenMinuteRate, overall = meter.getMeanRate, ), startTime = startTime, totalRuntime = MILLIS.between(startTime, Instant.now()), bufferSize = bufferSize, outputHashCode = outputHashCode.sum, ), ), ) } private def mergeOutputNamespaces( outputV1Namespaces: NamespaceOutputTargetsV1, outputV2Namespaces: NamespaceOutputTargetsV2, ): NamespaceOutputTargets = { val namespaces = outputV1Namespaces.keySet ++ outputV2Namespaces.keySet namespaces.foldLeft(Map.empty: NamespaceOutputTargets) { case (nsMap, ns) => val v1Queries = outputV1Namespaces.getOrElse(ns, Map.empty) val v2Queries = outputV2Namespaces.getOrElse(ns, Map.empty) nsMap + (ns -> mergeOutputQueries(v1Queries, v2Queries)) } } private def mergeOutputQueries( outputV1Queries: QueryOutputTargetsV1, outputV2Queries: QueryOutputTargetsV2, ): QueryOutputTargets = { val queryNames = outputV1Queries.keySet ++ outputV2Queries.keySet queryNames.foldLeft(Map.empty: QueryOutputTargets) { case (queryMap, queryName) => (outputV1Queries.get(queryName), outputV2Queries.get(queryName)) match { case (Some((id1, outputs1)), Some((_, outputs2))) => queryMap + (queryName -> (id1, outputs1 ++ outputs2)) case (None, Some((id2, outputs2))) => queryMap + (queryName -> (id2, outputs2)) case (Some((id1, outputs1)), None) => queryMap + (queryName -> (id1, outputs1)) case (None, None) => queryMap } } } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/QuineAppIngestControl.scala ================================================ package com.thatdot.quine.app import scala.concurrent.{ExecutionContext, Future} import org.apache.pekko.Done import org.apache.pekko.stream.UniqueKillSwitch import org.apache.pekko import com.thatdot.quine.graph.IngestControl import com.thatdot.quine.util.{SwitchMode, ValveSwitch} sealed trait QuineAppIngestControl extends IngestControl { val valveHandle: ValveSwitch val termSignal: Future[Done] def pause(): Future[Boolean] def unpause(): Future[Boolean] def terminate(): Future[Done] } final case class ControlSwitches(shutdownSwitch: ShutdownSwitch, valveHandle: ValveSwitch, termSignal: Future[Done]) extends QuineAppIngestControl { def pause(): Future[Boolean] = valveHandle.flip(SwitchMode.Close) def unpause(): Future[Boolean] = valveHandle.flip(SwitchMode.Open) def terminate(): Future[Done] = shutdownSwitch.terminate(termSignal) } /** This allows us to generalize over ingests where we're manually adding pekko stream kill switches and libraries * (such as kafka) that provide a stream with a library class wrapping a kill switch. */ trait ShutdownSwitch { def terminate(termSignal: Future[Done]): Future[Done] } case class PekkoKillSwitch(killSwitch: UniqueKillSwitch) extends ShutdownSwitch { def terminate(termSignal: Future[Done]): Future[Done] = { killSwitch.shutdown() termSignal } } case class KafkaKillSwitch(killSwitch: pekko.kafka.scaladsl.Consumer.Control) extends ShutdownSwitch { def terminate(termSignal: Future[Done]): Future[Done] = killSwitch.drainAndShutdown(termSignal)(ExecutionContext.parasitic) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/QuinePreservingCodecs.scala ================================================ package com.thatdot.quine.app import io.circe.Encoder import com.thatdot.common.security.Secret import com.thatdot.quine.app.model.ingest2.V2IngestEntities.QuineIngestStreamWithStatus import com.thatdot.quine.serialization.EncoderDecoder /** Quine codecs that preserve credentials (instead of redacting them). * * WARNING: Only use for persistence, NEVER for API responses. * * == Background == * * Quine API types derive encoders that redact `Secret` values using `Secret.toString` which * produces "Secret(****)". This is correct for API responses but would break persistence * by destroying credentials. * * == Solution == * * Each type provides a `preservingEncoder` method that emits the actual credential value. * This object wires those preserving encoders into complete codecs for persistence. * * == Usage == * * All codec methods require `import Secret.Unsafe._` at the call site: * {{{ * import Secret.Unsafe._ * val codec = QuinePreservingCodecs.ingestStreamWithStatusCodec * }}} */ object QuinePreservingCodecs { /** Codec for `QuineIngestStreamWithStatus` persistence. * Requires witness (`import Secret.Unsafe._`) to call. */ def ingestStreamWithStatusCodec(implicit ev: Secret.UnsafeAccess, ): EncoderDecoder[QuineIngestStreamWithStatus] = { implicit val enc: Encoder[QuineIngestStreamWithStatus] = QuineIngestStreamWithStatus.preservingEncoder EncoderDecoder.ofEncodeDecode } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/Recipe.scala ================================================ package com.thatdot.quine.app import java.io.{File, FileNotFoundException} import java.net.HttpURLConnection.{HTTP_MOVED_PERM, HTTP_MOVED_TEMP} import java.net.{HttpURLConnection, MalformedURLException, URL, URLEncoder} import scala.util.Using import scala.util.control.Exception.catching import cats.data.{EitherNel, Validated, ValidatedNel} import cats.implicits._ import endpoints4s.generic.docs import io.circe import io.circe.DecodingFailure.Reason.WrongTypeExpectation import io.circe.Error.showError import io.circe.generic.extras.Configuration import io.circe.generic.extras.semiauto._ import io.circe.{Decoder, DecodingFailure, Json} import org.snakeyaml.engine.v2.api.YamlUnicodeReader import com.thatdot.common.security.Secret import com.thatdot.quine.routes.StandingQueryResultOutputUserDef._ import com.thatdot.quine.routes._ import com.thatdot.quine.routes.exts.CirceJsonAnySchema // ───────────────────────────────────────────────────────────────────────────── // Recipe: Unified type for V1 and V2 recipes // ───────────────────────────────────────────────────────────────────────────── /** Represents either a V1 or V2 recipe */ sealed trait Recipe { def version: Int def title: String def contributor: Option[String] def summary: Option[String] def description: Option[String] def iconImage: Option[String] } object Recipe { /** V1 recipe wrapper */ final case class V1(recipe: RecipeV1) extends Recipe { def version: Int = recipe.version def title: String = recipe.title def contributor: Option[String] = recipe.contributor def summary: Option[String] = recipe.summary def description: Option[String] = recipe.description def iconImage: Option[String] = recipe.iconImage } /** V2 recipe wrapper */ final case class V2(recipe: RecipeV2.Recipe) extends Recipe { def version: Int = recipe.version def title: String = recipe.title def contributor: Option[String] = recipe.contributor def summary: Option[String] = recipe.summary def description: Option[String] = recipe.description def iconImage: Option[String] = recipe.iconImage } /** Get a recipe (V1 or V2) by URL, file path, or canonical name */ def get(recipeIdentifyingString: String): Either[Seq[String], Recipe] = RecipeLoader.getAny(recipeIdentifyingString) /** Get a recipe and apply variable substitutions */ def getAndSubstitute(recipeIdentifyingString: String, values: Map[String, String]): Either[Seq[String], Recipe] = RecipeLoader.getAndSubstituteAny(recipeIdentifyingString, values) } // ───────────────────────────────────────────────────────────────────────────── // RecipeLoader: Handles version detection and loading // ───────────────────────────────────────────────────────────────────────────── /** Handles loading recipes with version detection */ object RecipeLoader { import io.circe.Error.showError val recipeFileExtensions: List[String] = List(".json", ".yaml", ".yml") private val recipeRedirectServiceUrlPrefix = "https://recipes.quine.io/" private val requiredRecipeContentUrlPrefix = "https://raw.githubusercontent.com/thatdot/quine/main/" /** Detect the version from a parsed JSON document */ def detectVersion(json: Json): Either[String, Int] = json.hcursor.downField("version").as[Int].leftMap(_ => "Missing or invalid 'version' field in recipe") /** Parse JSON as V1 recipe */ def parseV1(json: Json): Either[Seq[String], RecipeV1] = RecipeV1.fromJson(json).leftMap(_.toList.map(showError.show)) /** Parse JSON as V2 recipe */ def parseV2(json: Json): Either[Seq[String], RecipeV2.Recipe] = RecipeV2.Recipe.decoder.decodeAccumulating(json.hcursor).toEither.leftMap(_.toList.map(showError.show)) /** Resolve a recipe identifying string to a URL */ def resolveToUrl(recipeIdentifyingString: String): Either[Seq[String], URL] = catching(classOf[MalformedURLException]).opt(new URL(recipeIdentifyingString)) match { case Some(url: URL) => Right(url) case None if recipeFileExtensions.exists(recipeIdentifyingString.toLowerCase.endsWith(_)) => Right(new File(recipeIdentifyingString).toURI.toURL) case None => val recipeIdentifyingStringUrlEncoded: String = URLEncoder.encode(recipeIdentifyingString, "UTF-8") val urlToRedirectService = new URL(recipeRedirectServiceUrlPrefix + recipeIdentifyingStringUrlEncoded) implicit val releaseableHttpURLConnection: Using.Releasable[HttpURLConnection] = (resource: HttpURLConnection) => resource.disconnect() Using(urlToRedirectService.openConnection.asInstanceOf[HttpURLConnection]) { http: HttpURLConnection => http.setInstanceFollowRedirects(false) http.getResponseCode match { case HTTP_MOVED_PERM => val location = http.getHeaderField("Location") if (!location.startsWith(requiredRecipeContentUrlPrefix)) Left(Seq(s"Unexpected redirect URL $location")) else Right(new URL(location)) case HTTP_MOVED_TEMP => Left(Seq(s"Recipe $recipeIdentifyingString does not exist; please visit https://quine.io/recipes")) case c @ _ => Left(Seq(s"Unexpected response code $c from URL $urlToRedirectService")) } }.toEither.left.map(e => Seq(e.toString)).joinRight } /** Load JSON from a URL */ def loadJson(url: URL): Either[Seq[String], Json] = Either .catchNonFatal( Using.resource(url.openStream)(inStream => circe.yaml.v12.Parser.default.parse(new YamlUnicodeReader(inStream)).leftMap(e => Seq(showError.show(e))), ), ) .leftMap { case _: FileNotFoundException => Seq(s"Cannot find recipe file at ${url.getFile}") case e => Seq(e.toString) } .flatten /** Get a recipe (V1 or V2) with automatic version detection */ def getAny(recipeIdentifyingString: String): Either[Seq[String], Recipe] = for { url <- resolveToUrl(recipeIdentifyingString) json <- loadJson(url) version <- detectVersion(json).leftMap(Seq(_)) recipe <- version match { case 1 => for { r <- parseV1(json) _ <- RecipeV1.validateRecipeCurrentVersion(r) } yield Recipe.V1(r) case 2 => parseV2(json).map(Recipe.V2(_)) case other => Left(Seq(s"Unsupported recipe version: $other. Supported versions are 1 and 2.")) } } yield recipe /** Get a recipe and apply variable substitutions */ def getAndSubstituteAny(recipeIdentifyingString: String, values: Map[String, String]): Either[Seq[String], Recipe] = for { recipe <- getAny(recipeIdentifyingString) substituted <- recipe match { case Recipe.V1(r) => RecipeV1 .validatedNelToEitherStrings[RecipeV1, RecipeV1.UnboundVariableError]( RecipeV1.applySubstitutions(r, values), e => s"Missing required parameter ${e.name}; use --recipe-value ${e.name}=", ) .map(Recipe.V1(_)) case Recipe.V2(r) => RecipeV1 .validatedNelToEitherStrings[RecipeV2.Recipe, RecipeV2.UnboundVariableError]( RecipeV2.applySubstitutions(r, values), e => s"Missing required parameter ${e.name}; use --recipe-value ${e.name}=", ) .map(Recipe.V2(_)) } } yield substituted } // ───────────────────────────────────────────────────────────────────────────── // RecipeV1 (original V1 implementation) // ───────────────────────────────────────────────────────────────────────────── @docs("A specification of a Quine Recipe") final case class RecipeV1( @docs("Schema version (only supported value is 1)") version: Int = RecipeV1.currentVersion, @docs("Identifies the Recipe but is not necessarily unique") title: String = "RECIPE", @docs( "URL to social profile of the person or organization responsible for this Recipe", ) contributor: Option[String], @docs("Brief copy about this Recipe") summary: Option[String], @docs("Longer form copy about this Recipe") description: Option[String], @docs("URL to image asset for this Recipe") iconImage: Option[String], @docs("Ingest Streams that load data into the graph") ingestStreams: List[IngestStreamConfiguration] = List(), @docs( "Standing Queries that respond to graph updates by computing aggregates and trigger actions", ) standingQueries: List[StandingQueryDefinition] = List(), @docs("For web UI customization") nodeAppearances: List[UiNodeAppearance] = List(), @docs("For web UI customization") quickQueries: List[UiNodeQuickQuery] = List(), @docs("For web UI customization") sampleQueries: List[SampleQuery] = List(), @docs("Cypher query to be run periodically while Recipe is running") statusQuery: Option[StatusQuery], ) { def isVersion(testVersion: Int): Boolean = version == testVersion /** Extract all file paths from FileIngest configurations in this recipe */ def extractFileIngestPaths: List[String] = ingestStreams.collect { case ingest: FileIngest => ingest.path } } @docs("A Cypher query to be run periodically while Recipe is running") final case class StatusQuery(cypherQuery: String) private object RecipeSchema extends endpoints4s.circe.JsonSchemas with endpoints4s.generic.JsonSchemas with IngestSchemas with StandingQuerySchemas with QueryUiConfigurationSchemas with CirceJsonAnySchema { implicit lazy val printQuerySchema: Record[StatusQuery] = genericRecord[StatusQuery] } object RecipeV1 { import RecipeSchema._ implicit def endpointRecordToDecoder[A](implicit record: Record[A]): Decoder[A] = record.decoder // This isn't actually used anywhere else, but if we mark it `private` scalac thinks it unused and // emits a warning. implicit protected val errorOnExtraFieldsJsonConfig: Configuration = Configuration.default.withDefaults // To make case class params with default values optional in the JSON .withStrictDecoding // To error on unrecognized fields present in the JSON implicit val recipeDecoder: Decoder[RecipeV1] = deriveConfiguredDecoder //implicit val recipeEncoder: Encoder[RecipeV1] = deriveConfiguredEncoder import cats.syntax.option._ def fromJson(json: Json): EitherNel[circe.Error, RecipeV1] = for { _ <- json.asObject toRightNel DecodingFailure(WrongTypeExpectation("object", json), List()) recipe <- recipeDecoder.decodeAccumulating(json.hcursor).toEither } yield recipe /** Indicates an error due to a missing recipe variable. * * TODO: consider adding information here about where the error occurred * * @param name name of the missing variable */ final case class UnboundVariableError(name: String) /** Produces a copy of the Recipe with all tokens substituted with defined values. Only certain * predetermined Recipe fields are processed in this way. * * If a token is undefined, it will be added to the list of failures in the output. * * @param recipe parsed recipe AST * @param values variables that may be substituted * @return substituted recipe or all of the substitution errors */ def applySubstitutions( recipe: RecipeV1, values: Map[String, String], ): ValidatedNel[UnboundVariableError, RecipeV1] = { // Implicit classes so that .subs can be used below. implicit class Subs(s: String) { def subs: ValidatedNel[UnboundVariableError, String] = applySubstitution(s, values) } implicit class SubSecret(s: Secret) { import Secret.Unsafe._ def subs: ValidatedNel[UnboundVariableError, Secret] = applySubstitution(s.unsafeValue, values).map(Secret.apply) } implicit class SubCreds(c: AwsCredentials) { def subs: ValidatedNel[UnboundVariableError, AwsCredentials] = ( c.accessKeyId.subs, c.secretAccessKey.subs, ).mapN(AwsCredentials(_, _)) } implicit class SubRegion(r: AwsRegion) { def subs: ValidatedNel[UnboundVariableError, AwsRegion] = (r.region.subs).map(AwsRegion(_)) } implicit class SubStandingQueryOutputSubs(soo: StandingQueryResultOutputUserDef) { def subs: ValidatedNel[UnboundVariableError, StandingQueryResultOutputUserDef] = soo match { case Drop => Validated.valid(Drop) case q: InternalQueue => Validated.valid(q) case PostToEndpoint(url, parallelism, onlyPositiveMatchData, headers, structure) => ( url.subs, headers.toList.traverse { case (k, v) => v.subs.map(k -> _) }.map(_.toMap), ).mapN(PostToEndpoint(_, parallelism, onlyPositiveMatchData, _, structure)) case WriteToKafka( topic, bootstrapServers, format, kafkaProperties, sslKeystorePassword, sslTruststorePassword, sslKeyPassword, saslJaasConfig, structure, ) => ( topic.subs, bootstrapServers.subs, ).mapN( WriteToKafka( _, _, format, kafkaProperties, sslKeystorePassword, sslTruststorePassword, sslKeyPassword, saslJaasConfig, structure, ), ) case WriteToSNS(credentialsOpt, regionOpt, topic, structure) => ( credentialsOpt.traverse(_.subs), regionOpt.traverse(_.subs), topic.subs, ).mapN(WriteToSNS(_, _, _, structure)) case PrintToStandardOut(logLevel, logMode, structure) => Validated.valid(PrintToStandardOut(logLevel, logMode, structure)) case WriteToFile(path, structure) => ( path.subs ).map(WriteToFile(_, structure)) case PostToSlack(hookUrl, onlyPositiveMatchData, intervalSeconds) => ( hookUrl.subs ).map(PostToSlack(_, onlyPositiveMatchData, intervalSeconds)) case StandingQueryResultOutputUserDef .CypherQuery(query, parameter, parallelism, andThen, allowAllNodeScan, shouldRetry, structure) => ( query.subs, andThen.traverse(_.subs), ).mapN( StandingQueryResultOutputUserDef.CypherQuery( _, parameter, parallelism, _, allowAllNodeScan, shouldRetry, structure, ), ) case QuinePatternQuery(query, parameter, parallelism, andThen, structure) => (query.subs, andThen.traverse(_.subs)).mapN( StandingQueryResultOutputUserDef.QuinePatternQuery(_, parameter, parallelism, _, structure), ) case WriteToKinesis( credentialsOpt, regionOpt, streamName, format, kinesisParallelism, kinesisMaxBatchSize, kinesisMaxRecordsPerSecond, kinesisMaxBytesPerSecond, structure, ) => ( credentialsOpt.traverse(_.subs), regionOpt.traverse(_.subs), streamName.subs, ).mapN( WriteToKinesis( _, _, _, format, kinesisParallelism, kinesisMaxBatchSize, kinesisMaxRecordsPerSecond, kinesisMaxBytesPerSecond, structure, ), ) } } implicit class IngestStreamsConfigurationSubs(soo: IngestStreamConfiguration) { def subs: ValidatedNel[UnboundVariableError, IngestStreamConfiguration] = soo match { case KafkaIngest( format, topics, parallelism, bootstrapServers, groupId, securityProtocol, autoCommitIntervalMs, autoOffsetReset, kafkaProperties, endingOffset, maximumPerSecond, recordEncodingTypes, sslKeystorePassword, sslTruststorePassword, sslKeyPassword, saslJaasConfig, ) => ( bootstrapServers.subs ).map( KafkaIngest( format, topics, parallelism, _, groupId, securityProtocol, autoCommitIntervalMs, autoOffsetReset, kafkaProperties, endingOffset, maximumPerSecond, recordEncodingTypes, sslKeystorePassword, sslTruststorePassword, sslKeyPassword, saslJaasConfig, ), ) case KinesisIngest( format, streamName, shardIds, parallelism, credentials, region, iteratorType, numRetries, maximumPerSecond, recordEncodingTypes, ) => ( streamName.subs, credentials.traverse(_.subs), region.traverse(_.subs), ).mapN( KinesisIngest( format, _, shardIds, parallelism, _, _, iteratorType, numRetries, maximumPerSecond, recordEncodingTypes, ), ) case KinesisKCLIngest( format, applicationName, kinesisStreamName, parallelism, credentials, region, initialPosition, numRetries, maximumPerSecond, recordEncodingTypes, schedulerSourceSettings, checkpointSettings, advancedSettings, ) => ( kinesisStreamName.subs, credentials.traverse(_.subs), region.traverse(_.subs), ).mapN( KinesisKCLIngest( format, applicationName, _, parallelism, _, _, initialPosition, numRetries, maximumPerSecond, recordEncodingTypes, schedulerSourceSettings, checkpointSettings, advancedSettings, ), ) case ServerSentEventsIngest(format, url, parallelism, maximumPerSecond, recordEncodingTypes) => ( url.subs ).map(ServerSentEventsIngest(format, _, parallelism, maximumPerSecond, recordEncodingTypes)) case SQSIngest( format, queueUrl, readParallelism, writeParallelism, credentialsOpt, regionOpt, deleteReadMessages, maximumPerSecond, recordEncodingTypes, ) => ( queueUrl.subs, credentialsOpt.traverse(_.subs), regionOpt.traverse(_.subs), ).mapN( SQSIngest( format, _, readParallelism, writeParallelism, _, _, deleteReadMessages, maximumPerSecond, recordEncodingTypes, ), ) case WebsocketSimpleStartupIngest( format, wsUrl, initMessages, keepAliveProtocol, parallelism, encoding, ) => ( wsUrl.subs, initMessages.toList.traverse(_.subs), ).mapN( WebsocketSimpleStartupIngest( format, _, _, keepAliveProtocol, parallelism, encoding, ), ) case FileIngest( format, path, encoding, parallelism, maximumLineSize, startAtOffset, ingestLimit, maximumPerSecond, fileIngestMode, ) => ( path.subs ).map( FileIngest( format, _, encoding, parallelism, maximumLineSize, startAtOffset, ingestLimit, maximumPerSecond, fileIngestMode, ), ) case i: S3Ingest => Validated.valid(i) case i: StandardInputIngest => Validated.valid(i) case i: NumberIteratorIngest => Validated.valid(i) } } // Return a copy of the recipe. // Selected fields are token substituted by invoking subs. ( recipe.ingestStreams.traverse(_.subs), recipe.standingQueries.traverse(sq => for { outputsS <- sq.outputs.toList .traverse { case (k, v) => v.subs.map(k -> _) } .map(_.toMap) } yield sq.copy(outputs = outputsS), ), ).mapN((iss, sqs) => recipe.copy(ingestStreams = iss, standingQueries = sqs)) } /** Extremely simple token substitution language. * * If the first character in the input string equals '$', then the string * represents a token that is to be substituted. * * The token's value is read from the values map. If the value is not defined, * an error occurs. * * Internal substitutions are not supported. * * Double leading '$' characters ("$$") escapes token substitution and is * interpreted as a single leading '$'. */ def applySubstitution(input: String, values: Map[String, String]): ValidatedNel[UnboundVariableError, String] = if (input.startsWith("$")) { val key = input.slice(1, input.length) if (input.startsWith("$$")) Validated.valid(key) else values.get(key) toValidNel UnboundVariableError(key) } else { Validated.valid(input) } val recipeFileExtensions: List[String] = List(".json", ".yaml", ".yml") /** Synchronously maps a string that identifies a Recipe to the actual Recipe * content as a parsed and validated document. * * The string is resolved as follows: * 1. A string that is a valid URL is determined to be a URL * 2. A string that is not a valid URL and ends with .json, .yaml, or .yml is determined to be a local file * 3. Any other string is determined to be a Recipe canonical name * * Recipe canonical name is resolved to URL at githubusercontent.com * via URL redirect service at recipes.quine.io. * * Any errors are converted to a sequence of user-facing messages. */ def get(recipeIdentifyingString: String): Either[Seq[String], RecipeV1] = { val recipeRedirectServiceUrlPrefix = "https://recipes.quine.io/" val requiredRecipeContentUrlPrefix = "https://raw.githubusercontent.com/thatdot/quine/main/" for { urlToRecipeContent <- catching(classOf[MalformedURLException]).opt(new URL(recipeIdentifyingString)) match { case Some(url: URL) => Right(url) case None if recipeFileExtensions exists (recipeIdentifyingString.toLowerCase.endsWith(_)) => Right(new File(recipeIdentifyingString).toURI.toURL) case None => val recipeIdentifyingStringUrlEncoded: String = URLEncoder.encode(recipeIdentifyingString, "UTF-8") val urlToRedirectService = new URL(recipeRedirectServiceUrlPrefix + recipeIdentifyingStringUrlEncoded) implicit val releaseableHttpURLConnection: Using.Releasable[HttpURLConnection] = (resource: HttpURLConnection) => resource.disconnect() Using(urlToRedirectService.openConnection.asInstanceOf[HttpURLConnection]) { http: HttpURLConnection => http.setInstanceFollowRedirects(false) http.getResponseCode match { case HTTP_MOVED_PERM => val location = http.getHeaderField("Location") if (!location.startsWith(requiredRecipeContentUrlPrefix)) Left(Seq(s"Unexpected redirect URL $location")) else Right(new URL(location)) // Redirect service indicates not found using HTTP 302 Temporary Redirect case HTTP_MOVED_TEMP => Left(Seq(s"Recipe $recipeIdentifyingString does not exist; please visit https://quine.io/recipes")) case c @ _ => Left(Seq(s"Unexpected response code $c from URL $urlToRedirectService")) } }.toEither.left.map(e => Seq(e.toString)).joinRight } json <- Either .catchNonFatal( Using.resource(urlToRecipeContent.openStream)(inStream => circe.yaml.v12.Parser.default.parse(new YamlUnicodeReader(inStream)).leftMap(e => Seq(showError.show(e))), ), ) .leftMap { case _: FileNotFoundException => Seq(s"Cannot find recipe file at ${urlToRecipeContent.getFile}") case e => Seq(e.toString) } .flatten recipe <- fromJson(json).leftMap(_.toList.map(showError.show)) _ <- validateRecipeCurrentVersion(recipe) } yield recipe } /** Get the Recipe's canonical name if one was used. * @return The string value of the canonical recipe name if one was used, or None if a URL or file was * specified directly. */ def getCanonicalName(recipeIdentifyingString: String): Option[String] = catching(classOf[MalformedURLException]).opt(new URL(recipeIdentifyingString)) match { case Some(_) => None case None if recipeFileExtensions exists (recipeIdentifyingString.toLowerCase.endsWith(_)) => None case None => Some(recipeIdentifyingString) } def validateRecipeCurrentVersion(recipe: RecipeV1): Either[Seq[String], RecipeV1] = Either.cond( recipe.isVersion(currentVersion), recipe, Seq(s"Recipe version ${recipe.version} is not supported by this method. Use Recipe.get() for V2 recipes."), ) def validatedNelToEitherStrings[A, E]( validatedNel: ValidatedNel[E, A], showErrors: E => String, ): Either[List[String], A] = validatedNel.leftMap(_.toList.map(showErrors)).toEither /** Fetch the recipe using the identifying string and then apply substitutions * * @param recipeIdentifyingString URL, file path, or canonical name of recipe * @param values variables for substitution * @return either all of the errors, or the parsed and substituted recipe */ def getAndSubstitute(recipeIdentifyingString: String, values: Map[String, String]): Either[Seq[String], RecipeV1] = for { recipe <- get(recipeIdentifyingString) substitutedRecipe <- validatedNelToEitherStrings[RecipeV1, UnboundVariableError]( applySubstitutions(recipe, values), e => s"Missing required parameter ${e.name}; use --recipe-value ${e.name}=", ) } yield substitutedRecipe final val currentVersion = 1 } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/RecipeInterpreter.scala ================================================ package com.thatdot.quine.app import java.lang.System.lineSeparator import java.net.URL import java.util.concurrent.TimeoutException import java.util.concurrent.atomic.AtomicReference import scala.concurrent.duration.{DurationInt, FiniteDuration} import scala.concurrent.{Await, Future} import scala.util.control.NonFatal import scala.util.{Failure, Success} import org.apache.pekko.actor.Cancellable import org.apache.pekko.http.scaladsl.model.Uri import org.apache.pekko.stream.Materializer import org.apache.pekko.stream.scaladsl.{Keep, Sink} import com.thatdot.common.logging.Log.{LogConfig, Safe, SafeLoggableInterpolator} import com.thatdot.quine.app.RecipeInterpreter.RecipeState import com.thatdot.quine.app.routes.{IngestStreamState, QueryUiConfigurationState, StandingQueryStoreV1} import com.thatdot.quine.app.util.QuineLoggables._ import com.thatdot.quine.graph.cypher.{RunningCypherQuery, Value} import com.thatdot.quine.graph.{BaseGraph, CypherOpsGraph, MemberIdx, NamespaceId} import com.thatdot.quine.model.QuineIdProvider import com.thatdot.quine.util.Log.implicits._ object RecipeInterpreter { type RecipeState = QueryUiConfigurationState with IngestStreamState with StandingQueryStoreV1 } /** Runs a Recipe by making a series of blocking graph method calls as determined * by the recipe content. * * Also starts fixed rate scheduled tasks to poll for and report status updates. These * should be cancelled using the returned Cancellable. */ case class RecipeInterpreter( statusLines: StatusLines, recipe: RecipeV1, appState: RecipeState, graphService: CypherOpsGraph, quineWebserverUri: Option[URL], )(implicit idProvider: QuineIdProvider) extends Cancellable { private var tasks: List[Cancellable] = List.empty // Recipes always use the default namespace. val namespace: NamespaceId = None /** Cancel all the tasks, returning true if any task cancel returns true. */ override def cancel(): Boolean = tasks.foldLeft(false)((a, b) => b.cancel() || a) /** Returns true if all the tasks report isCancelled true. */ override def isCancelled: Boolean = tasks.forall(_.isCancelled) def run(memberIdx: MemberIdx)(implicit logConfig: LogConfig): Unit = { if (recipe.nodeAppearances.nonEmpty) { statusLines.info(log"Using ${Safe(recipe.nodeAppearances.length)} node appearances") appState.setNodeAppearances(recipe.nodeAppearances.toVector) } if (recipe.quickQueries.nonEmpty) { statusLines.info(log"Using ${Safe(recipe.quickQueries.length)} quick queries") appState.setQuickQueries(recipe.quickQueries.toVector) } if (recipe.sampleQueries.nonEmpty) { statusLines.info(log"Using ${Safe(recipe.sampleQueries.length)} sample queries") appState.setSampleQueries(recipe.sampleQueries.toVector) } // Create Standing Queries for { (standingQueryDefinition, i) <- recipe.standingQueries.zipWithIndex } { val standingQueryName = s"STANDING-${i + 1}" val addStandingQueryResult: Future[Boolean] = appState.addStandingQuery( standingQueryName, namespace, standingQueryDefinition, ) try if (!Await.result(addStandingQueryResult, 5 seconds)) { statusLines.error(log"Standing Query ${Safe(standingQueryName)} already exists") } else { statusLines.info(log"Running Standing Query ${Safe(standingQueryName)}") tasks +:= standingQueryProgressReporter(statusLines, appState, graphService, standingQueryName) } catch { case NonFatal(ex) => statusLines.error( log"Failed creating Standing Query ${Safe(standingQueryName)}: ${standingQueryDefinition}", ex, ) } () } // Create Ingest Streams for { (ingestStream, i) <- recipe.ingestStreams.zipWithIndex } { val ingestStreamName = s"INGEST-${i + 1}" appState.addIngestStream( ingestStreamName, ingestStream, namespace, previousStatus = None, shouldResumeRestoredIngests = false, timeout = 5 seconds, memberIdx = Some(memberIdx), ) match { case Failure(ex) => statusLines.error( log"Failed creating Ingest Stream ${Safe(ingestStreamName)}\n${ingestStream}", ex, ) case Success(false) => statusLines.error(log"Ingest Stream ${Safe(ingestStreamName)} already exists") case Success(true) => statusLines.info(log"Running Ingest Stream ${Safe(ingestStreamName)}") tasks +:= ingestStreamProgressReporter(statusLines, appState, graphService, ingestStreamName) } // If status query is defined, print a URL with the query and schedule the query to be executed and printed for { statusQuery @ StatusQuery(cypherQuery) <- recipe.statusQuery } { for { url <- quineWebserverUri } statusLines.info( log"Status query URL is ${Safe( Uri .from( scheme = url.getProtocol, userinfo = Option(url.getUserInfo).getOrElse(""), host = url.getHost, port = url.getPort, path = url.getPath, queryString = None, fragment = Some(cypherQuery), ) .toString, )}", ) tasks +:= statusQueryProgressReporter(statusLines, graphService, statusQuery) } } } private def ingestStreamProgressReporter( statusLines: StatusLines, appState: RecipeState, graphService: BaseGraph, ingestStreamName: String, interval: FiniteDuration = 1 second, )(implicit logConfig: LogConfig): Cancellable = { val actorSystem = graphService.system val statusLine = statusLines.create() lazy val task: Cancellable = actorSystem.scheduler.scheduleAtFixedRate( initialDelay = interval, interval = interval, ) { () => appState.getIngestStream(ingestStreamName, namespace) match { case None => statusLines.error(log"Failed getting Ingest Stream ${Safe(ingestStreamName)} (it does not exist)") task.cancel() statusLines.remove(statusLine) () case Some(ingestStream) => ingestStream .status(Materializer.matFromSystem(actorSystem)) .foreach { status => val stats = ingestStream.metrics.toEndpointResponse val message = s"$ingestStreamName status is ${status.toString.toLowerCase} and ingested ${stats.ingestedCount}" if (status.isTerminal) { statusLines.info(log"${Safe(message)}") task.cancel() statusLines.remove(statusLine) } else { statusLines.update( statusLine, message, ) } }(graphService.system.dispatcher) } }(graphService.system.dispatcher) task } private def standingQueryProgressReporter( statusLines: StatusLines, appState: RecipeState, graph: BaseGraph, standingQueryName: String, interval: FiniteDuration = 1 second, )(implicit logConfig: LogConfig): Cancellable = { val actorSystem = graph.system val statusLine = statusLines.create() lazy val task: Cancellable = actorSystem.scheduler.scheduleAtFixedRate( initialDelay = interval, interval = interval, ) { () => appState .getStandingQuery(standingQueryName, namespace) .onComplete { case Failure(ex) => statusLines.error(log"Failed getting Standing Query ${Safe(standingQueryName)}" withException ex) task.cancel() statusLines.remove(statusLine) () case Success(None) => statusLines.error(log"Failed getting Standing Query ${Safe(standingQueryName)} (it does not exist)") task.cancel() statusLines.remove(statusLine) () case Success(Some(standingQuery)) => val standingQueryStatsCount = standingQuery.stats.values.view.map(_.rates.count).sum statusLines.update(statusLine, s"$standingQueryName count $standingQueryStatsCount") }(graph.system.dispatcher) }(graph.system.dispatcher) task } private val printQueryMaxResults = 10L private def statusQueryProgressReporter( statusLines: StatusLines, graphService: CypherOpsGraph, statusQuery: StatusQuery, interval: FiniteDuration = 5 second, )(implicit idProvider: QuineIdProvider, logConfig: LogConfig): Cancellable = { val actorSystem = graphService.system val changed = new OnChanged[String] lazy val task: Cancellable = actorSystem.scheduler.scheduleWithFixedDelay( initialDelay = interval, delay = interval, ) { () => val queryResults: RunningCypherQuery = com.thatdot.quine.compiler.cypher.queryCypherValues( queryText = statusQuery.cypherQuery, namespace = namespace, )(graphService) try { val resultContent: Seq[Seq[Value]] = Await.result( queryResults.results .take(printQueryMaxResults) .toMat(Sink.seq)(Keep.right) .named("recipe-status-query") .run()(graphService.materializer), 5 seconds, ) changed(queryResultToString(queryResults, resultContent))(s => // s is a query result, and therefore PII, but the entire point of a status query is to repeatedly log // this value, so we'll treat that as implied consent to log. statusLines.info(log"${Safe(s)}"), ) } catch { case _: TimeoutException => statusLines.warn(log"Status query timed out") } }(graphService.system.dispatcher) task } /** Formats query results into a multi-line string designed to be easily human-readable. */ private def queryResultToString(queryResults: RunningCypherQuery, resultContent: Seq[Seq[Value]])(implicit idProvider: QuineIdProvider, logConfig: LogConfig, ): String = { /** Builds a repeated string by concatenation. */ def repeated(s: String, times: Int): String = Seq.fill(times)(s).mkString /** Sets the string length, by adding padding or truncating. */ def fixedLength(s: String, length: Int, padding: Char): String = if (s.length < length) { s + repeated(padding.toString, length - s.length) } else if (s.length > length) { s.substring(0, length) } else { s } (for { (resultRecord, resultRecordIndex) <- resultContent.zipWithIndex } yield { val columnNameFixedWidthMax = 20 val columnNameFixedWidth = Math.min( queryResults.columns.map(_.name.length).max, columnNameFixedWidthMax, ) val valueStrings = resultRecord.map(Value.toJson(_).noSpaces) val valueStringMaxLength = valueStrings.map(_.length).max val separator = " | " val headerLengthMin = 40 val headerLengthMax = 200 val header = fixedLength( s"---[ Status Query result ${resultRecordIndex + 1} ]", Math.max( headerLengthMin, Math.min(columnNameFixedWidth + valueStringMaxLength + separator.length, headerLengthMax), ), '-', ) val footer = repeated("-", columnNameFixedWidth + 1) + "+" + repeated("-", header.length - columnNameFixedWidth - 2) header + lineSeparator + { { for { (columnName, value) <- queryResults.columns.zip(valueStrings) fixedLengthColumnName = fixedLength(columnName.name, columnNameFixedWidth, ' ') } yield fixedLengthColumnName + separator + value } mkString lineSeparator } + lineSeparator + footer }) mkString lineSeparator } } /** Simple utility to call a parameterized function only when the input value has changed. * E.g. for periodically printing logged status updates only when the log message contains a changed string. * Intended for use from multiple concurrent threads. * Callback IS called on first invocation. * * @tparam T The input value that is compared for change using `equals` equality. */ class OnChanged[T] { private val lastValue: AtomicReference[Option[T]] = new AtomicReference(None) def apply(value: T)(callback: T => Unit): Unit = { val newValue = Some(value) val prevValue = lastValue.getAndSet(newValue) if (prevValue != newValue) { callback(value) } () } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/RecipeInterpreterV2.scala ================================================ package com.thatdot.quine.app import java.net.URL import java.util.concurrent.TimeoutException import scala.concurrent.duration.{DurationInt, FiniteDuration} import scala.concurrent.{Await, ExecutionContext, Future} import scala.util.control.NonFatal import scala.util.{Failure, Success} import org.apache.pekko.actor.Cancellable import org.apache.pekko.http.scaladsl.model.Uri import org.apache.pekko.stream.Materializer import org.apache.pekko.stream.scaladsl.{Keep, Sink} import com.thatdot.common.logging.Log.{LogConfig, Safe, SafeLoggableInterpolator} import com.thatdot.quine.app.model.ingest2.V2IngestEntities import com.thatdot.quine.app.model.ingest2.V2IngestEntities.QuineIngestConfiguration import com.thatdot.quine.app.routes.{IngestStreamState, QueryUiConfigurationState, StandingQueryInterfaceV2} import com.thatdot.quine.app.v2api.converters.{ApiToIngest, ApiToUiStyling} import com.thatdot.quine.app.v2api.definitions.query.{standing => ApiStanding} import com.thatdot.quine.graph.cypher.{RunningCypherQuery, Value} import com.thatdot.quine.graph.{BaseGraph, CypherOpsGraph, MemberIdx, NamespaceId} import com.thatdot.quine.model.QuineIdProvider import com.thatdot.quine.serialization.ProtobufSchemaCache import com.thatdot.quine.util.Log.implicits._ object RecipeInterpreterV2 { type RecipeStateV2 = QueryUiConfigurationState with IngestStreamState with StandingQueryInterfaceV2 } /** Runs a V2 Recipe by making a series of blocking graph method calls as determined * by the recipe content. * * Also starts fixed rate scheduled tasks to poll for and report status updates. These * should be cancelled using the returned Cancellable. */ case class RecipeInterpreterV2( statusLines: StatusLines, recipe: RecipeV2.Recipe, appState: RecipeInterpreterV2.RecipeStateV2, graphService: CypherOpsGraph, quineWebserverUri: Option[URL], protobufSchemaCache: ProtobufSchemaCache, )(implicit idProvider: QuineIdProvider) extends Cancellable { private var tasks: List[Cancellable] = List.empty // Recipes always use the default namespace. val namespace: NamespaceId = None implicit val ec: ExecutionContext = graphService.system.dispatcher /** Cancel all the tasks, returning true if any task cancel returns true. */ override def cancel(): Boolean = tasks.foldLeft(false)((a, b) => b.cancel() || a) /** Returns true if all the tasks report isCancelled true. */ override def isCancelled: Boolean = tasks.forall(_.isCancelled) def run(memberIdx: MemberIdx)(implicit logConfig: LogConfig): Unit = { // Set UI appearances using V2 -> V1 converters if (recipe.nodeAppearances.nonEmpty) { statusLines.info(log"Using ${Safe(recipe.nodeAppearances.length)} node appearances") val v1Appearances = recipe.nodeAppearances.map(ApiToUiStyling.apply).toVector appState.setNodeAppearances(v1Appearances) } if (recipe.quickQueries.nonEmpty) { statusLines.info(log"Using ${Safe(recipe.quickQueries.length)} quick queries") val v1QuickQueries = recipe.quickQueries.map(ApiToUiStyling.apply).toVector appState.setQuickQueries(v1QuickQueries) } if (recipe.sampleQueries.nonEmpty) { statusLines.info(log"Using ${Safe(recipe.sampleQueries.length)} sample queries") val v1SampleQueries = recipe.sampleQueries.map(ApiToUiStyling.apply).toVector appState.setSampleQueries(v1SampleQueries) } // Create Standing Queries using V2 API for { (standingQueryDef, sqIndex) <- recipe.standingQueries.zipWithIndex } { val standingQueryName = standingQueryDef.name.getOrElse(s"standing-query-$sqIndex") // Convert recipe SQ definition to API format val apiSqDef = ApiStanding.StandingQuery.StandingQueryDefinition( name = standingQueryName, pattern = standingQueryDef.pattern, outputs = standingQueryDef.outputs.zipWithIndex.map { case (workflow, wfIndex) => ApiStanding.StandingQueryResultWorkflow( name = workflow.name.getOrElse(s"output-$wfIndex"), filter = workflow.filter, preEnrichmentTransformation = workflow.preEnrichmentTransformation, resultEnrichment = workflow.resultEnrichment.map(e => com.thatdot.quine.app.v2api.definitions.outputs.QuineDestinationSteps.CypherQuery( query = e.query, parameter = e.parameter, ), ), destinations = workflow.destinations, ) }, includeCancellations = standingQueryDef.includeCancellations, inputBufferSize = standingQueryDef.inputBufferSize, ) val addResult: Future[StandingQueryInterfaceV2.Result] = appState.addStandingQueryV2(standingQueryName, namespace, apiSqDef) try Await.result(addResult, 5.seconds) match { case StandingQueryInterfaceV2.Result.Success => statusLines.info(log"Running Standing Query ${Safe(standingQueryName)}") tasks +:= standingQueryProgressReporter(statusLines, appState, graphService, standingQueryName) case StandingQueryInterfaceV2.Result.AlreadyExists(_) => statusLines.error(log"Standing Query ${Safe(standingQueryName)} already exists") case StandingQueryInterfaceV2.Result.NotFound(msg) => statusLines.error(log"Namespace not found: ${Safe(msg)}") } catch { case NonFatal(ex) => statusLines.error( log"Failed creating Standing Query ${Safe(standingQueryName)}", ex, ) } } // Create Ingest Streams using V2 API for { (ingestStream, ingestIndex) <- recipe.ingestStreams.zipWithIndex } { val ingestStreamName = ingestStream.name.getOrElse(s"ingest-stream-$ingestIndex") // Convert recipe ingest to V2 internal model val v2IngestSource = ApiToIngest(ingestStream.source) val onStreamError = ingestStream.onStreamError .map(ApiToIngest.apply) .getOrElse(V2IngestEntities.LogStreamError) val v2IngestConfig = QuineIngestConfiguration( name = ingestStreamName, source = v2IngestSource, query = ingestStream.query, parameter = ingestStream.parameter, transformation = None, // TODO: handle transformation conversion parallelism = ingestStream.parallelism, maxPerSecond = ingestStream.maxPerSecond, onRecordError = ingestStream.onRecordError, onStreamError = onStreamError, ) val result: Future[Either[Seq[String], Unit]] = appState.addV2IngestStream( name = ingestStreamName, settings = v2IngestConfig, intoNamespace = namespace, timeout = 5.seconds, memberIdx = memberIdx, ) try Await.result(result, 10.seconds) match { case Left(errors) => statusLines.error( log"Failed creating Ingest Stream ${Safe(ingestStreamName)}: ${Safe(errors.mkString(", "))}", ) case Right(_) => statusLines.info(log"Running Ingest Stream ${Safe(ingestStreamName)}") tasks +:= ingestStreamProgressReporter(statusLines, appState, graphService, ingestStreamName) } catch { case NonFatal(ex) => statusLines.error( log"Failed creating Ingest Stream ${Safe(ingestStreamName)}", ex, ) } } // Handle status query for { statusQuery <- recipe.statusQuery } { for { url <- quineWebserverUri } statusLines.info( log"Status query URL is ${Safe( Uri .from( scheme = url.getProtocol, userinfo = Option(url.getUserInfo).getOrElse(""), host = url.getHost, port = url.getPort, path = url.getPath, queryString = None, fragment = Some(statusQuery.cypherQuery), ) .toString, )}", ) tasks +:= statusQueryProgressReporter(statusLines, graphService, statusQuery) } } private def ingestStreamProgressReporter( statusLines: StatusLines, appState: RecipeInterpreterV2.RecipeStateV2, graphService: BaseGraph, ingestStreamName: String, interval: FiniteDuration = 1.second, )(implicit logConfig: LogConfig): Cancellable = { val actorSystem = graphService.system val statusLine = statusLines.create() lazy val task: Cancellable = actorSystem.scheduler.scheduleAtFixedRate( initialDelay = interval, interval = interval, ) { () => appState.getIngestStream(ingestStreamName, namespace) match { case None => statusLines.error(log"Failed getting Ingest Stream ${Safe(ingestStreamName)} (it does not exist)") task.cancel() statusLines.remove(statusLine) () case Some(ingestStream) => ingestStream .status(Materializer.matFromSystem(actorSystem)) .foreach { status => val stats = ingestStream.metrics.toEndpointResponse val message = s"$ingestStreamName status is ${status.toString.toLowerCase} and ingested ${stats.ingestedCount}" if (status.isTerminal) { statusLines.info(log"${Safe(message)}") task.cancel() statusLines.remove(statusLine) } else { statusLines.update( statusLine, message, ) } }(graphService.system.dispatcher) } }(graphService.system.dispatcher) task } private def standingQueryProgressReporter( statusLines: StatusLines, appState: RecipeInterpreterV2.RecipeStateV2, graph: BaseGraph, standingQueryName: String, interval: FiniteDuration = 1.second, )(implicit logConfig: LogConfig): Cancellable = { val actorSystem = graph.system val statusLine = statusLines.create() lazy val task: Cancellable = actorSystem.scheduler.scheduleAtFixedRate( initialDelay = interval, interval = interval, ) { () => appState .getStandingQueryV2(standingQueryName, namespace) .onComplete { case Failure(ex) => statusLines.error(log"Failed getting Standing Query ${Safe(standingQueryName)}" withException ex) task.cancel() statusLines.remove(statusLine) () case Success(None) => statusLines.error(log"Failed getting Standing Query ${Safe(standingQueryName)} (it does not exist)") task.cancel() statusLines.remove(statusLine) () case Success(Some(standingQuery)) => val standingQueryStatsCount = standingQuery.stats.values.view.map(_.rates.count).sum statusLines.update(statusLine, s"$standingQueryName count $standingQueryStatsCount") }(graph.system.dispatcher) }(graph.system.dispatcher) task } private val printQueryMaxResults = 10L private def statusQueryProgressReporter( statusLines: StatusLines, graphService: CypherOpsGraph, statusQuery: RecipeV2.StatusQueryV2, interval: FiniteDuration = 5.second, )(implicit idProvider: QuineIdProvider, logConfig: LogConfig): Cancellable = { val actorSystem = graphService.system val changed = new OnChanged[String] lazy val task: Cancellable = actorSystem.scheduler.scheduleWithFixedDelay( initialDelay = interval, delay = interval, ) { () => val queryResults: RunningCypherQuery = com.thatdot.quine.compiler.cypher.queryCypherValues( queryText = statusQuery.cypherQuery, namespace = namespace, )(graphService) try { val resultContent: Seq[Seq[Value]] = Await.result( queryResults.results .take(printQueryMaxResults) .toMat(Sink.seq)(Keep.right) .named("recipe-status-query") .run()(graphService.materializer), 5.seconds, ) changed(queryResultToString(queryResults, resultContent))(s => statusLines.info(log"${Safe(s)}")) } catch { case _: TimeoutException => statusLines.warn(log"Status query timed out") } }(graphService.system.dispatcher) task } /** Formats query results into a multi-line string designed to be easily human-readable. */ private def queryResultToString(queryResults: RunningCypherQuery, resultContent: Seq[Seq[Value]])(implicit idProvider: QuineIdProvider, logConfig: LogConfig, ): String = { import java.lang.System.lineSeparator def repeated(s: String, times: Int): String = Seq.fill(times)(s).mkString def fixedLength(s: String, length: Int, padding: Char): String = if (s.length < length) { s + repeated(padding.toString, length - s.length) } else if (s.length > length) { s.substring(0, length) } else { s } (for { (resultRecord, resultRecordIndex) <- resultContent.zipWithIndex } yield { val columnNameFixedWidthMax = 20 val columnNameFixedWidth = Math.min( queryResults.columns.map(_.name.length).max, columnNameFixedWidthMax, ) val valueStrings = resultRecord.map(Value.toJson(_).noSpaces) val valueStringMaxLength = valueStrings.map(_.length).max val separator = " | " val headerLengthMin = 40 val headerLengthMax = 200 val header = fixedLength( s"---[ Status Query result ${resultRecordIndex + 1} ]", Math.max( headerLengthMin, Math.min(columnNameFixedWidth + valueStringMaxLength + separator.length, headerLengthMax), ), '-', ) val footer = repeated("-", columnNameFixedWidth + 1) + "+" + repeated("-", header.length - columnNameFixedWidth - 2) header + lineSeparator + { { for { (columnName, value) <- queryResults.columns.zip(valueStrings) fixedLengthColumnName = fixedLength(columnName.name, columnNameFixedWidth, ' ') } yield fixedLengthColumnName + separator + value } mkString lineSeparator } + lineSeparator + footer }) mkString lineSeparator } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/RecipePackage.scala ================================================ package com.thatdot.quine.app import java.nio.file.{Files, Path} import io.circe /** Container for a Recipe that also includes data not modelled in the Recipe itself * (the Recipe source and canonical name). * * @param name canonical name of the recip * @param recipe parsed recipe * @param source YAML/JSON source from which the recipe was parsed */ final case class RecipePackage( name: String, recipe: RecipeV1, source: String, ) object RecipePackage { /** Parse a recipe package from a recipe file * * @param file path at which the recipe file is located * @return package of all information about the recipe */ def fromFile(file: Path): RecipePackage = { // Check that the recipe corresponds to a valid name val recipeFileName: String = file.getFileName.toString val name = recipeFileName.split('.') match { case Array(name, ext) if Seq("yml", "yaml", "json").contains(ext) => name case _ => throw new IllegalArgumentException( s"File $file does not have an accepted recipe extension", ) } // Get the recipe contents val source = Files.readString(file) // Parse the recipe val recipe = circe.yaml.v12.parser.decodeAccumulating[RecipeV1](source) valueOr { errs => throw new IllegalArgumentException("Malformed recipe: \n" + errs.toList.mkString("\n")) } RecipePackage(name, recipe, source) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/RecipeV2.scala ================================================ package com.thatdot.quine.app import cats.data.{NonEmptyList, ValidatedNel} import cats.implicits._ import io.circe.generic.extras.Configuration import io.circe.generic.extras.semiauto.{deriveConfiguredDecoder, deriveConfiguredEncoder} import io.circe.{Decoder, Encoder} import sttp.tapir.Schema import sttp.tapir.Schema.annotations.{default, description, title} import com.thatdot.api.v2.{AwsCredentials, AwsRegion} import com.thatdot.common.security.Secret import com.thatdot.quine.app.v2api.definitions.ApiUiStyling.{SampleQuery, UiNodeAppearance, UiNodeQuickQuery} import com.thatdot.quine.app.v2api.definitions.ingest2.ApiIngest.{ IngestSource, OnRecordErrorHandler, OnStreamErrorHandler, Transformation, } import com.thatdot.quine.app.v2api.definitions.outputs.QuineDestinationSteps import com.thatdot.quine.app.v2api.definitions.query.standing.{ Predicate, StandingQueryPattern, StandingQueryResultTransformation, } /** V2 Recipe Schema - aligned with V2 API structure */ object RecipeV2 { // Use the same configuration as the V2 API types (with "type" discriminator) // This ensures proper decoding of nested sealed traits like IngestSource, StandingQueryPattern, etc. implicit private val circeConfig: Configuration = Configuration.default.withDefaults.withDiscriminator("type") val currentVersion: Int = 2 // ───────────────────────────────────────────────────────────────────────────── // Ingest Stream Configuration (V2 style) // ───────────────────────────────────────────────────────────────────────────── @title("V2 Ingest Stream Configuration") @description("Configuration for a data ingest stream in V2 recipe format.") final case class IngestStreamV2( @description("Optional name identifying the ingest stream. If not provided, a name will be generated.") name: Option[String] = None, @description("Data source configuration.") source: IngestSource, @description("Cypher query to execute on each record.") query: String, @description("Name of the Cypher parameter to populate with the input value.") @default("that") parameter: String = "that", @description("Optional JavaScript transformation function to pre-process input before Cypher query.") transformation: Option[Transformation] = None, @description("Maximum number of records to process at once.") @default(16) parallelism: Int = 16, @description("Maximum number of records to process per second.") maxPerSecond: Option[Int] = None, @description("Action to take on a single failed record.") @default(OnRecordErrorHandler()) onRecordError: OnRecordErrorHandler = OnRecordErrorHandler(), @description("Action to take on a failure of the input stream.") onStreamError: Option[OnStreamErrorHandler] = None, ) object IngestStreamV2 { implicit val encoder: Encoder[IngestStreamV2] = deriveConfiguredEncoder implicit val decoder: Decoder[IngestStreamV2] = deriveConfiguredDecoder implicit lazy val schema: Schema[IngestStreamV2] = Schema.derived } // ───────────────────────────────────────────────────────────────────────────── // Standing Query Configuration (V2 workflow style) // ───────────────────────────────────────────────────────────────────────────── @title("Result Enrichment Cypher Query") @description("A Cypher query used to enrich standing query results.") final case class ResultEnrichmentQuery( @description("Cypher query to execute for enrichment.") query: String, @description("Name of the Cypher parameter to assign incoming data to.") @default("that") parameter: String = "that", ) object ResultEnrichmentQuery { implicit val encoder: Encoder[ResultEnrichmentQuery] = deriveConfiguredEncoder implicit val decoder: Decoder[ResultEnrichmentQuery] = deriveConfiguredDecoder implicit lazy val schema: Schema[ResultEnrichmentQuery] = Schema.derived } @title("Standing Query Result Workflow") @description( """A workflow comprising steps toward sending data derived from StandingQueryResults to destinations. |The workflow steps are processed in order: filter → preEnrichmentTransformation → resultEnrichment → destinations.""".stripMargin, ) final case class StandingQueryResultWorkflowV2( @description("Optional name for this output workflow. If not provided, a name will be generated.") name: Option[String] = None, @description("Optional filter to apply to results before processing.") filter: Option[Predicate] = None, @description("Optional transformation to apply to results before enrichment.") preEnrichmentTransformation: Option[StandingQueryResultTransformation] = None, @description("Optional Cypher query to enrich results.") resultEnrichment: Option[ResultEnrichmentQuery] = None, @description("Destinations to send the processed results to (at least one required).") destinations: NonEmptyList[QuineDestinationSteps], ) object StandingQueryResultWorkflowV2 { import com.thatdot.api.v2.schema.ThirdPartySchemas.cats._ implicit val encoder: Encoder[StandingQueryResultWorkflowV2] = deriveConfiguredEncoder implicit val decoder: Decoder[StandingQueryResultWorkflowV2] = deriveConfiguredDecoder implicit lazy val schema: Schema[StandingQueryResultWorkflowV2] = Schema.derived } @title("V2 Standing Query Definition") @description("A standing query definition in V2 recipe format with workflow-based outputs.") final case class StandingQueryDefinitionV2( @description("Optional name for this Standing Query. If not provided, a name will be generated.") name: Option[String] = None, @description("Pattern to match in the graph.") pattern: StandingQueryPattern, @description("Output workflows to process results.") @default(Seq.empty) outputs: Seq[StandingQueryResultWorkflowV2] = Seq.empty, @description("Whether or not to include cancellations in the results.") @default(false) includeCancellations: Boolean = false, @description("How many Standing Query results to buffer before backpressuring.") @default(32) inputBufferSize: Int = 32, ) object StandingQueryDefinitionV2 { implicit val encoder: Encoder[StandingQueryDefinitionV2] = deriveConfiguredEncoder implicit val decoder: Decoder[StandingQueryDefinitionV2] = deriveConfiguredDecoder implicit lazy val schema: Schema[StandingQueryDefinitionV2] = Schema.derived } // ───────────────────────────────────────────────────────────────────────────── // Status Query (same as V1) // ───────────────────────────────────────────────────────────────────────────── @title("Status Query") @description("A Cypher query to be run periodically while Recipe is running.") final case class StatusQueryV2( @description("Cypher query to execute periodically.") cypherQuery: String, ) object StatusQueryV2 { implicit val encoder: Encoder[StatusQueryV2] = deriveConfiguredEncoder implicit val decoder: Decoder[StatusQueryV2] = deriveConfiguredDecoder implicit lazy val schema: Schema[StatusQueryV2] = Schema.derived } // ───────────────────────────────────────────────────────────────────────────── // Main Recipe V2 Case Class // ───────────────────────────────────────────────────────────────────────────── @title("Quine Recipe V2") @description("A specification of a Quine Recipe using V2 API structure.") final case class Recipe( @description("Schema version (must be 2 for V2 recipes).") @default(currentVersion) version: Int = currentVersion, @description("Identifies the Recipe but is not necessarily unique.") title: String, @description("URL to social profile of the person or organization responsible for this Recipe.") contributor: Option[String] = None, @description("Brief description of this Recipe.") summary: Option[String] = None, @description("Longer form description of this Recipe.") description: Option[String] = None, @description("URL to image asset for this Recipe.") iconImage: Option[String] = None, @description("Ingest streams that load data into the graph.") @default(List.empty) ingestStreams: List[IngestStreamV2] = List.empty, @description("Standing queries that respond to graph updates.") @default(List.empty) standingQueries: List[StandingQueryDefinitionV2] = List.empty, @description("Node appearance customization for the web UI.") @default(List.empty) nodeAppearances: List[UiNodeAppearance] = List.empty, @description("Quick queries for the web UI context menu.") @default(List.empty) quickQueries: List[UiNodeQuickQuery] = List.empty, @description("Sample queries for the web UI dropdown.") @default(List.empty) sampleQueries: List[SampleQuery] = List.empty, @description("Cypher query to be run periodically while Recipe is running.") statusQuery: Option[StatusQueryV2] = None, ) { def isVersion(testVersion: Int): Boolean = version == testVersion } object Recipe { implicit val encoder: Encoder[Recipe] = deriveConfiguredEncoder implicit val decoder: Decoder[Recipe] = deriveConfiguredDecoder implicit lazy val schema: Schema[Recipe] = Schema.derived } // ───────────────────────────────────────────────────────────────────────────── // Variable Substitution // ───────────────────────────────────────────────────────────────────────────── /** Error for missing recipe variable */ final case class UnboundVariableError(name: String) /** Apply variable substitution to a string. * If the string starts with '$', treat it as a variable reference. * '$$' escapes to a single '$'. */ def applySubstitution(input: String, values: Map[String, String]): ValidatedNel[UnboundVariableError, String] = if (input.startsWith("$")) { val key = input.slice(1, input.length) if (input.startsWith("$$")) cats.data.Validated.valid(key) else values.get(key).toValidNel(UnboundVariableError(key)) } else { cats.data.Validated.valid(input) } /** Apply substitutions to all relevant fields in a V2 recipe. * This includes paths, URLs, and other configurable strings. */ def applySubstitutions(recipe: Recipe, values: Map[String, String]): ValidatedNel[UnboundVariableError, Recipe] = { import cats.data.Validated implicit class Subs(s: String) { def subs: ValidatedNel[UnboundVariableError, String] = applySubstitution(s, values) } implicit class SubSecret(s: Secret) { import Secret.Unsafe._ def subs: ValidatedNel[UnboundVariableError, Secret] = applySubstitution(s.unsafeValue, values).map(Secret.apply) } implicit class SubCreds(c: AwsCredentials) { def subs: ValidatedNel[UnboundVariableError, AwsCredentials] = (c.accessKeyId.subs, c.secretAccessKey.subs).mapN(AwsCredentials(_, _)) } implicit class SubRegion(r: AwsRegion) { def subs: ValidatedNel[UnboundVariableError, AwsRegion] = r.region.subs.map(AwsRegion(_)) } // Substitute in ingest sources def substituteIngestSource(source: IngestSource): ValidatedNel[UnboundVariableError, IngestSource] = source match { case f: IngestSource.File => f.path.subs.map(p => f.copy(path = p)) case k: IngestSource.Kafka => k.bootstrapServers.subs.map(bs => k.copy(bootstrapServers = bs)) case s: IngestSource.S3 => (s.bucket.subs, s.key.subs, s.credentials.traverse(_.subs)).mapN((b, k, c) => s.copy(bucket = b, key = k, credentials = c), ) case sse: IngestSource.ServerSentEvent => sse.url.subs.map(u => sse.copy(url = u)) case sqs: IngestSource.SQS => (sqs.queueUrl.subs, sqs.credentials.traverse(_.subs), sqs.region.traverse(_.subs)).mapN((q, c, r) => sqs.copy(queueUrl = q, credentials = c, region = r), ) case ws: IngestSource.WebsocketClient => (ws.url.subs, ws.initMessages.toList.traverse(_.subs)).mapN((u, m) => ws.copy(url = u, initMessages = m)) case kin: IngestSource.Kinesis => (kin.streamName.subs, kin.credentials.traverse(_.subs), kin.region.traverse(_.subs)).mapN((s, c, r) => kin.copy(streamName = s, credentials = c, region = r), ) case kcl: IngestSource.KinesisKCL => (kcl.kinesisStreamName.subs, kcl.credentials.traverse(_.subs), kcl.region.traverse(_.subs)).mapN((s, c, r) => kcl.copy(kinesisStreamName = s, credentials = c, region = r), ) case other => Validated.valid(other) } // Substitute in destination steps def substituteDestination(dest: QuineDestinationSteps): ValidatedNel[UnboundVariableError, QuineDestinationSteps] = dest match { case f: QuineDestinationSteps.File => f.path.subs.map(p => f.copy(path = p)) case h: QuineDestinationSteps.HttpEndpoint => h.url.subs.map(u => h.copy(url = u)) case k: QuineDestinationSteps.Kafka => (k.topic.subs, k.bootstrapServers.subs).mapN((t, bs) => k.copy(topic = t, bootstrapServers = bs)) case kin: QuineDestinationSteps.Kinesis => (kin.streamName.subs, kin.credentials.traverse(_.subs), kin.region.traverse(_.subs)).mapN((s, c, r) => kin.copy(streamName = s, credentials = c, region = r), ) case sns: QuineDestinationSteps.SNS => (sns.topic.subs, sns.credentials.traverse(_.subs), sns.region.traverse(_.subs)).mapN((t, c, r) => sns.copy(topic = t, credentials = c, region = r), ) case cq: QuineDestinationSteps.CypherQuery => cq.query.subs.map(q => cq.copy(query = q)) case sl: QuineDestinationSteps.Slack => sl.hookUrl.subs.map(u => sl.copy(hookUrl = u)) case other => Validated.valid(other) } // Substitute in workflows def substituteWorkflow( wf: StandingQueryResultWorkflowV2, ): ValidatedNel[UnboundVariableError, StandingQueryResultWorkflowV2] = { val enrichmentSubs = wf.resultEnrichment.traverse(e => e.query.subs.map(q => e.copy(query = q))) val destsSubs = wf.destinations.traverse(substituteDestination) (enrichmentSubs, destsSubs).mapN((e, d) => wf.copy(resultEnrichment = e, destinations = d)) } // Substitute in ingest streams def substituteIngest(ingest: IngestStreamV2): ValidatedNel[UnboundVariableError, IngestStreamV2] = (substituteIngestSource(ingest.source), ingest.query.subs).mapN((s, q) => ingest.copy(source = s, query = q)) // Substitute in standing queries def substituteSQ(sq: StandingQueryDefinitionV2): ValidatedNel[UnboundVariableError, StandingQueryDefinitionV2] = sq.outputs.toList.traverse(substituteWorkflow).map(wfs => sq.copy(outputs = wfs)) // Apply all substitutions ( recipe.ingestStreams.traverse(substituteIngest), recipe.standingQueries.traverse(substituteSQ), ).mapN((iss, sqs) => recipe.copy(ingestStreams = iss, standingQueries = sqs)) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/SchemaCache.scala ================================================ package com.thatdot.quine.app import com.thatdot.quine.serialization.{AvroSchemaCache, ProtobufSchemaCache} trait SchemaCache { def protobufSchemaCache: ProtobufSchemaCache def avroSchemaCache: AvroSchemaCache } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/StandingQueryResultOutput.scala ================================================ package com.thatdot.quine.app import java.util.concurrent.atomic.AtomicReference import scala.concurrent.Future import scala.language.implicitConversions import org.apache.pekko.NotUsed import org.apache.pekko.stream.scaladsl.{Flow, Keep, Sink, Source} import org.apache.pekko.stream.{KillSwitches, UniqueKillSwitch} import cats.syntax.either._ import io.circe.Json import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, Safe, SafeLoggableInterpolator} import com.thatdot.quine.app.model.outputs.{ ConsoleLoggingOutput, CypherQueryOutput, DropOutput, FileOutput, KafkaOutput, KinesisOutput, PostToEndpointOutput, QuinePatternOutput, SlackOutput, SnsOutput, } import com.thatdot.quine.app.v2api.definitions.query.{standing => ApiV2Standing} import com.thatdot.quine.graph.MasterStream.SqResultsExecToken import com.thatdot.quine.graph.{ BaseGraph, CypherOpsGraph, NamespaceId, StandingQueryResult, StandingQueryResultStructure, } import com.thatdot.quine.model.{QuineIdProvider, QuineValue} import com.thatdot.quine.routes.{OutputFormat, StandingQueryResultOutputUserDef} import com.thatdot.quine.serialization.{ConversionFailure, ProtobufSchemaCache, QuineValueToProtobuf} import com.thatdot.quine.util.Log.implicits._ import com.thatdot.quine.util.StringInput.filenameOrUrl import com.thatdot.quine.{routes => RoutesV1} object StandingQueryResultOutput extends LazySafeLogging { import StandingQueryResultOutputUserDef._ sealed trait OutputTarget object OutputTarget { case class V1(definition: RoutesV1.StandingQueryResultOutputUserDef, killSwitch: UniqueKillSwitch) extends OutputTarget case class V2(definition: ApiV2Standing.StandingQueryResultWorkflow, killSwitch: UniqueKillSwitch) extends OutputTarget } private def resultHandlingFlow( name: String, inNamespace: NamespaceId, output: StandingQueryResultOutputUserDef, graph: CypherOpsGraph, )(implicit protobufSchemaCache: ProtobufSchemaCache, logConfig: LogConfig, ): Flow[StandingQueryResult, SqResultsExecToken, NotUsed] = { val execToken = SqResultsExecToken(s"SQ: $name in: $inNamespace") output match { case Drop => DropOutput.flow(name, inNamespace, output, graph) case iq: InternalQueue => Flow[StandingQueryResult].map { r => iq.results .asInstanceOf[AtomicReference[Vector[StandingQueryResult]]] // ugh. gross. .getAndUpdate(results => results :+ r) execToken // TODO: Note that enqueuing a result does not properly respect the spirit of `execToken` in that the work // of processing the result in the queue has not been done before emitting the token. But this // `InternalQueue` is only meant for internal testing. } case webhookConfig: PostToEndpoint => new PostToEndpointOutput(webhookConfig).flow(name, inNamespace, output, graph) case kafkaSettings: WriteToKafka => new KafkaOutput(kafkaSettings).flow(name, inNamespace, output, graph) case kinesisSettings: WriteToKinesis => new KinesisOutput(kinesisSettings).flow(name, inNamespace, output, graph) case snsSettings: WriteToSNS => new SnsOutput(snsSettings).flow(name, inNamespace, output, graph) case loggingConfig: PrintToStandardOut => new ConsoleLoggingOutput(loggingConfig).flow(name, inNamespace, output, graph) case fileConfig: WriteToFile => new FileOutput(fileConfig).flow(name, inNamespace, output, graph) case slackSettings: PostToSlack => new SlackOutput(slackSettings).flow(name, inNamespace, output, graph) case query: CypherQuery => // Closures can't have implicit arguments in scala 2.13, so flatten the arguments list def createRecursiveOutput( name: String, inNamespace: NamespaceId, output: StandingQueryResultOutputUserDef, graph: CypherOpsGraph, protobufSchemaCache: ProtobufSchemaCache, logConfig: LogConfig, ): Flow[StandingQueryResult, SqResultsExecToken, NotUsed] = resultHandlingFlow(name, inNamespace, output, graph)(protobufSchemaCache, logConfig) new CypherQueryOutput(query, createRecursiveOutput).flow(name, inNamespace, output, graph) case pattern: QuinePatternQuery => def createRecursiveOutput( name: String, inNamespace: NamespaceId, output: StandingQueryResultOutputUserDef, graph: CypherOpsGraph, protobufSchemaCache: ProtobufSchemaCache, logConfig: LogConfig, ): Flow[StandingQueryResult, SqResultsExecToken, NotUsed] = resultHandlingFlow(name, inNamespace, output, graph)(protobufSchemaCache, logConfig) new QuinePatternOutput(pattern, createRecursiveOutput).flow(name, inNamespace, output, graph) } }.named(s"sq-output-$name") /** Construct a destination to which results are output. Results will flow through one or more * chained [[resultHandlingFlow]]s before emitting a completion token to the master stream * * @param name name of the Standing Query Output * @param inNamespace the namespace running this standing query * @param output configuration for handling the results * @param graph reference to the graph */ def resultHandlingSink( name: String, inNamespace: NamespaceId, output: StandingQueryResultOutputUserDef, graph: CypherOpsGraph, )(implicit protobufSchemaCache: ProtobufSchemaCache, logConfig: LogConfig, ): Sink[StandingQueryResult, UniqueKillSwitch] = Flow[StandingQueryResult] .viaMat(KillSwitches.single)(Keep.right) .via(resultHandlingFlow(name, inNamespace, output, graph)) .to(graph.masterStream.standingOutputsCompletionSink) def serialized( name: String, format: OutputFormat, graph: BaseGraph, structure: StandingQueryResultStructure, )(implicit protobufSchemaCache: ProtobufSchemaCache, logConfig: LogConfig, ): Flow[StandingQueryResult, Array[Byte], NotUsed] = format match { case OutputFormat.JSON => Flow[StandingQueryResult].map(_.toJson(structure)(graph.idProvider, logConfig).noSpaces.getBytes) case OutputFormat.Protobuf(schemaUrl, typeName) => val serializer: Future[QuineValueToProtobuf] = protobufSchemaCache .getMessageDescriptor(filenameOrUrl(schemaUrl), typeName, flushOnFail = true) .map(new QuineValueToProtobuf(_))( graph.materializer.executionContext, // this is effectively part of stream materialization ) val serializerRepeated: Source[QuineValueToProtobuf, Future[NotUsed]] = Source.futureSource( serializer .map(Source.repeat[QuineValueToProtobuf])(graph.materializer.executionContext), ) Flow[StandingQueryResult] .filter(_.meta.isPositiveMatch) .zip(serializerRepeated) .map { case (result, serializer) => serializer .toProtobufBytes(result.data) .leftMap { (err: ConversionFailure) => logger.warn( log"""On Standing Query output: ${Safe(name)}, can't serialize provided datum: $result |to protobuf type: ${Safe(typeName)}. Skipping datum. Error: ${err.toString} |""".cleanLines, ) } } .collect { case Right(value) => value } } sealed abstract class SlackSerializable { def slackJson: String } object SlackSerializable { implicit def stringToJson(s: String): Json = Json.fromString(s) def jsonFromQuineValueMap( map: Map[String, QuineValue], )(implicit logConfig: LogConfig, idProvider: QuineIdProvider): Json = Json.fromFields(map.view.map { case (k, v) => (k, QuineValue.toJson(v)) }.toSeq) def apply(positiveOnly: Boolean, results: Seq[StandingQueryResult])(implicit idProvider: QuineIdProvider, logConfig: LogConfig, ): Option[SlackSerializable] = results match { case Seq() => None // no new results or cancellations case cancellations if positiveOnly && !cancellations.exists(_.meta.isPositiveMatch) => None // no new results, only cancellations, and we're configured to drop cancellations case Seq(result) => // one new result or cancellations if (result.meta.isPositiveMatch) Some(NewResult(result.data)) else if (!positiveOnly) Some(CancelledResult(result.data)) else None case _ => // multiple results (but maybe not all valid given `positiveOnly`) val (positiveResults, cancellations) = results.partition(_.meta.isPositiveMatch) if (positiveOnly && positiveResults.length == 1) { val singleResult = positiveResults.head Some(NewResult(singleResult.data)) } else if (!positiveOnly && positiveResults.isEmpty && cancellations.length == 1) { Some(CancelledResult(cancellations.head.data)) } else if (positiveOnly && positiveResults.nonEmpty) { Some(MultipleUpdates(positiveResults, Seq.empty)) } else if (positiveResults.nonEmpty || cancellations.nonEmpty) { Some(MultipleUpdates(positiveResults, cancellations)) } else None } private def isInferredCancellation(json: Json): Boolean = (json \\ "meta").exists(meta => (meta \\ "isPositiveMatch").contains(Json.False)) /** @param results TODO document shape of...may contain meta, may not... * @return */ def apply(results: Seq[Json]): Option[SlackSerializable] = results.partition(isInferredCancellation) match { case (Nil, Nil) => None case (singlePositive :: Nil, Nil) => Some(NewResult(singlePositive)) case (Nil, singleCancellation :: Nil) => Some(CancelledResult(singleCancellation)) case (positiveResults, cancellations) => Some(MultipleUpdates(positiveResults, cancellations)) } } final private case class NewResult(data: Json) extends SlackSerializable { import SlackSerializable.stringToJson def slackBlock: Json = { // May not be perfectly escaped (for example, if the data contains a triple-backquote) val codeBlockContent = data.spaces2 Json.obj("type" -> "section", "text" -> Json.obj("type" -> "mrkdwn", "text" -> s"```$codeBlockContent```")) } override def slackJson: String = Json .obj( "text" -> "New Standing Query Result", "blocks" -> Json.arr( NewResult.header, slackBlock, ), ) .noSpaces } private object NewResult { import SlackSerializable._ def apply(data: Map[String, QuineValue])(implicit logConfig: LogConfig, idProvider: QuineIdProvider): NewResult = NewResult(jsonFromQuineValueMap(data)) val header: Json = Json.obj( "type" -> "header", "text" -> Json.obj( "type" -> "plain_text", "text" -> "New Standing Query Result", ), ) } final private case class CancelledResult(data: Json) extends SlackSerializable { import SlackSerializable.stringToJson def slackBlock: Json = { // May not be perfectly escaped (for example, if the data contains a triple-backquote) val codeBlockContent = data.spaces2 Json.obj("type" -> "section", "text" -> Json.obj("type" -> "mrkdwn", "text" -> s"```$codeBlockContent```")) } override def slackJson: String = Json .obj( "text" -> "Standing Query Result Cancelled", "blocks" -> Json.arr( CancelledResult.header, slackBlock, ), ) .noSpaces } private object CancelledResult { import SlackSerializable._ def apply( data: Map[String, QuineValue], )(implicit logConfig: LogConfig, idProvider: QuineIdProvider): CancelledResult = CancelledResult( jsonFromQuineValueMap(data), ) val header: Json = Json.obj( "type" -> "header", "text" -> Json.obj( "type" -> "plain_text", "text" -> "Standing Query Result Cancelled", ), ) } final private case class MultipleUpdates( newResults: Seq[Json], newCancellations: Seq[Json], ) extends SlackSerializable { import SlackSerializable._ private val newResultsBlocks: Vector[Json] = newResults match { case Seq() => Vector.empty case Seq(jData) => Vector( NewResult.header, NewResult(jData).slackBlock, ) case result +: remainingResults => Vector( Json.obj( "type" -> "header", "text" -> Json.obj( "type" -> "plain_text", "text" -> "New Standing Query Results", ), ), Json.obj( "type" -> "section", "text" -> Json.obj( "type" -> "mrkdwn", // Note: "Latest" is a side effect of presumed list-prepending at batching call site "text" -> s"Latest result of ${remainingResults.size}:", ), ), ) :+ (NewResult(result).slackBlock) case _ => throw new Exception(s"Unexpected value $newResults") } private val cancellationBlocks: Vector[Json] = newCancellations match { case Seq() => Vector.empty case Seq(jData) => Vector( Json.obj( "type" -> "header", "text" -> Json.obj( "type" -> "plain_text", "text" -> "Standing Query Result Cancelled", ), ), CancelledResult(jData).slackBlock, ) case cancellations => Vector( Json.obj( "type" -> "header", "text" -> Json.obj( "type" -> "plain_text", "text" -> s"Standing Query Results Cancelled: ${cancellations.size}", ), ), ) } override def slackJson: String = Json .obj( "text" -> "New Standing Query Updates", "blocks" -> Json.fromValues(newResultsBlocks ++ cancellationBlocks), ) .noSpaces } private object MultipleUpdates { import SlackSerializable.jsonFromQuineValueMap def apply( newResults: Seq[StandingQueryResult], newCancellations: Seq[StandingQueryResult], )(implicit logConfig: LogConfig, idProvider: QuineIdProvider): MultipleUpdates = MultipleUpdates( newResults = newResults.map(jsonFromStandingQueryResult), newCancellations = newCancellations.map(jsonFromStandingQueryResult), ) private def jsonFromStandingQueryResult( result: StandingQueryResult, )(implicit logConfig: LogConfig, idProvider: QuineIdProvider): Json = jsonFromQuineValueMap(result.data) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/StatusLines.scala ================================================ package com.thatdot.quine.app import java.io.PrintStream import scala.collection.mutable import scala.concurrent.blocking import com.thatdot.common.logging.Log.{LogConfig, SafeInterpolator, SafeLogger} class StatusLines( logger: SafeLogger, realtimeOutput: PrintStream, ) { /** Logs an informational message and refreshes the status lines display. * @param message */ def info(message: SafeInterpolator)(implicit logConfig: LogConfig): Unit = { logger.info(message) refreshStatusLines() } /** Logs an warning message and refreshes the status lines display. * @param message */ def warn(message: SafeInterpolator)(implicit logConfig: LogConfig): Unit = { logger.warn(message) refreshStatusLines() } /** Logs an warning message and refreshes the status lines display. * @param message */ def warn(message: SafeInterpolator, t: Throwable)(implicit logConfig: LogConfig): Unit = { logger.warn(message withException t) refreshStatusLines() } /** Logs an error message and refreshes the status lines display. * @param message */ def error(message: SafeInterpolator)(implicit logConfig: LogConfig): Unit = { logger.error(message) refreshStatusLines() } /** Logs an error message and refreshes the status lines display. * @param message */ def error(message: SafeInterpolator, t: Throwable)(implicit logConfig: LogConfig): Unit = { logger.error(message withException t) refreshStatusLines() } class StatusLine // Using LinkedHashMap so that status messages will be printed in insertion order private val messages: mutable.LinkedHashMap[StatusLine, String] = mutable.LinkedHashMap.empty[StatusLine, String] val isInteractive: Boolean = System.console() != null def create(message: String = ""): StatusLine = { val statusLine = new StatusLine blocking(messages.synchronized { messages += statusLine -> message }) refreshStatusLines() statusLine } def update(statusLine: StatusLine, message: String): Unit = { blocking(messages.synchronized { messages += statusLine -> message }) refreshStatusLines() } def remove(statusLine: StatusLine): Unit = { blocking(messages.synchronized { messages -= statusLine }) refreshStatusLines(clearExtraLine = true) } /** Prints status lines as follows: an empty line, then the status lines, then * the cursor is moved to the leftmost column of the blank line. * * @param clearExtraLine set to true after removing a status line, to account for * the line that needs to be cleared */ private def refreshStatusLines(clearExtraLine: Boolean = false): Unit = this.synchronized { // We should not print status lines at al if we are not in an interactive shell // And we do not need to refresh status lines if there are no status message to print or clear if (isInteractive && (clearExtraLine || messages.nonEmpty)) { val up1 = "\u001b[1A" val erase = "\u001b[K" val home = "\r" val homeErase = home + erase realtimeOutput.println(homeErase) val statuses = messages.values.toSeq.filter(_.trim != "") for { status <- statuses } realtimeOutput.println(s"$homeErase | => $status") if (clearExtraLine) realtimeOutput.print(homeErase) for { _ <- 1 to statuses.length + 1 } realtimeOutput.print(up1) } } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/config/Address.scala ================================================ package com.thatdot.quine.app.config import java.net.InetSocketAddress import com.google.common.net.HostAndPort object Address { /** Parse an address from an input string * * @param input string from which to parse the address * @param defaultPort if the port is missing, use this port * @return parsed address */ def parseHostAndPort(input: String, defaultPort: Int): InetSocketAddress = { val hostAndPort = HostAndPort.fromString(input).withDefaultPort(defaultPort) InetSocketAddress.createUnresolved(hostAndPort.getHost, hostAndPort.getPort) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/config/BaseConfig.scala ================================================ package com.thatdot.quine.app.config import java.nio.file.{Files, Path} import cats.syntax.either._ import com.typesafe.config.{Config, ConfigRenderOptions} import io.circe import io.circe.Json trait BaseConfig { def configVal: Config def fileIngest: FileIngestConfig def defaultApiVersion: String /** @return JSON representation of the current config with sensitive values masked */ def loadedConfigJson: Json = { val rawJson = circe.config.parser.parse(configVal).valueOr(throw _) maskSensitiveFields(rawJson) } /** Mask sensitive configuration values in JSON * * @param json The raw configuration JSON * @return JSON with sensitive fields masked (e.g., "****-bf9e") */ private def maskSensitiveFields(json: Json): Json = { // Paths to mask (works for both Enterprise and Novelty) val pathsToMask = List( List("quine", "license-key"), // Enterprise: quine.license-key List("thatdot", "novelty", "license-key"), // Novelty: thatdot.novelty.license-key ) pathsToMask.foldLeft(json) { (currentJson, path) => maskJsonPath(currentJson, path) } } /** Mask a value at a specific JSON path * * @param json The JSON to modify * @param path Path components (e.g., List("quine", "license-key")) * @return Modified JSON with value masked, or original if path doesn't exist */ private def maskJsonPath(json: Json, path: List[String]): Json = path match { case Nil => json case field :: Nil => // Last component - mask the value json.mapObject { obj => obj(field) match { case Some(valueJson) => valueJson.asString match { case Some(str) => obj.add(field, Json.fromString(maskValue(str))) case None => obj // Not a string, leave as-is } case None => obj // Field doesn't exist, no change } } case field :: rest => // Recurse into nested object json.mapObject { obj => obj(field) match { case Some(nestedJson) => obj.add(field, maskJsonPath(nestedJson, rest)) case None => obj // Field doesn't exist, no change } } } /** Mask a sensitive string value * * @param value The value to mask (e.g., "e67008aa-c018-440b-8f74-5be9d448bf9e") * @return Masked value showing only last 4 characters (e.g., "****-bf9e") */ private def maskValue(value: String): String = if (value.length <= 4) { "****" // Too short to show partial value } else { "****" + value.takeRight(4) } /** @return HOCON representation of the current config */ def loadedConfigHocon: String = configVal.root render ( ConfigRenderOptions.defaults.setOriginComments(false).setJson(false), ) /** Write the config out to a file * * @param path file path at which to write the config file */ def writeConfig(path: String): Unit = { Files.writeString(Path.of(path), loadedConfigJson.spaces2) () } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/config/EdgeIteration.scala ================================================ package com.thatdot.quine.app.config import pureconfig.ConfigConvert import pureconfig.generic.semiauto.deriveEnumerationConvert import com.thatdot.common.quineid.QuineId import com.thatdot.quine.graph.edges.{ReverseOrderedEdgeCollection, SyncEdgeCollection, UnorderedEdgeCollection} /** Options for edge collection iteration */ sealed abstract class EdgeIteration { /** Create a supplier of edge collections */ def edgeCollectionFactory: QuineId => SyncEdgeCollection } object EdgeIteration { case object Unordered extends EdgeIteration { def edgeCollectionFactory: QuineId => SyncEdgeCollection = new UnorderedEdgeCollection(_) } case object ReverseInsertion extends EdgeIteration { def edgeCollectionFactory: QuineId => SyncEdgeCollection = new ReverseOrderedEdgeCollection(_) } implicit val edgeIterationConfigConvert: ConfigConvert[EdgeIteration] = deriveEnumerationConvert[EdgeIteration] } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/config/FileAccessPolicy.scala ================================================ package com.thatdot.quine.app.config import java.nio.file.{Files, Path, Paths} import cats.data.ValidatedNel import cats.implicits._ import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, Safe, SafeLoggableInterpolator} import com.thatdot.quine.exceptions.FileIngestSecurityException import com.thatdot.quine.util.BaseError /** File access policy for ingest security * * @param allowedDirectories Allowlist of canonicalized absolute directory paths * - Empty list: Deny all file ingests (except recipe files which are automatically added) * - Non-empty list: Only specified directories allowed * @param resolutionMode File resolution mode (static or dynamic) * @param allowedFiles For static mode: Set of canonicalized file paths that existed at startup */ final case class FileAccessPolicy( allowedDirectories: List[Path], resolutionMode: ResolutionMode, allowedFiles: Set[Path] = Set.empty, ) object FileAccessPolicy extends LazySafeLogging { /** Create a FileAccessPolicy from FileIngestConfig, including recipe file paths * * Recipe file paths are automatically allowed by extracting their parent directories * and adding them to the allowed directories list. * * @param config The file ingest configuration * @param recipeFilePaths File paths from recipe ingest streams * @return Validated FileAccessPolicy with canonicalized paths and (for static mode) allowed files */ def fromConfigWithRecipePaths( allowedDirectories: List[String], resolutionMode: ResolutionMode, recipeFilePaths: List[String], )(implicit logConfig: LogConfig): ValidatedNel[String, FileAccessPolicy] = { // Extract parent directories from recipe file paths val recipeDirectories = recipeFilePaths.flatMap { filePath => try { val path = Paths.get(filePath) val realPath = path.toRealPath() val parentDir = Option(realPath.getParent).map(_.toString) parentDir } catch { case e: Exception => logger.error(log"Could not load folder of recipe data because of error" withException e) None } }.distinct // Merge recipe directories with configured directories val mergedDirectories = (allowedDirectories ++ recipeDirectories).distinct // Validate and canonicalize all directory paths val validatedPaths: ValidatedNel[String, List[Path]] = mergedDirectories .map { dirString => try { val path = Paths.get(dirString) val absolutePath = if (path.isAbsolute) path else path.toAbsolutePath val canonicalPath = absolutePath.normalize() if (!Files.exists(canonicalPath)) { // This is usually because the user is using the default file_ingests/, but didn't create that folder // This is fine if the user doesn't want file ingests logger.debug( log"Allowed directory does not exist: ${Safe(dirString)} (resolved to: ${Safe(canonicalPath.toString)})", ) List.empty[Path].validNel[String] } else { val realPath = canonicalPath.toRealPath() if (!Files.isDirectory(realPath)) { s"Allowed directory path is not a directory: $dirString (resolved to: $realPath)" .invalidNel[List[Path]] } else { List(realPath).validNel[String] } } } catch { case e: Exception => s"Invalid allowed directory path: $dirString - ${e.getMessage}" .invalidNel[List[Path]] } } .sequence .map(_.flatten) validatedPaths.map { paths => // For static mode, enumerate all files in allowed directories at startup // Only files directly in the directory (not subdirectories) are allowed val allowedFiles = resolutionMode match { case ResolutionMode.Static => paths.flatMap { dir => try { import scala.jdk.CollectionConverters._ Files .list(dir) .iterator() .asScala .filter(Files.isRegularFile(_)) .map(_.toRealPath()) .toSet } catch { case e: Exception => logger.info(log"File from allowlist was not found at startup. Will not be loaded" withException e) Set.empty[Path] } }.toSet case ResolutionMode.Dynamic => Set.empty[Path] } FileAccessPolicy(paths, resolutionMode, allowedFiles) } } /** Validate a file path against the access policy * * @param pathString The file path to validate * @param policy The file access policy * @return Validated real Path */ def validatePath(pathString: String, policy: FileAccessPolicy): ValidatedNel[BaseError, Path] = try { val path = Paths.get(pathString) val absolutePath = if (path.isAbsolute) path else path.toAbsolutePath val realPath = absolutePath.toRealPath() // Handle allowlist scenarios if (policy.allowedDirectories.isEmpty) { // Empty allowlist = deny all file ingests FileIngestSecurityException( s"File path not allowed: $pathString (resolved to: $realPath). " + s"No allowed directories configured (empty allowlist denies all file ingests).", ).invalidNel[Path] } else { // Check if the file's parent directory exactly matches one of the allowed directories // Subdirectories are NOT allowed - only files directly in the allowed directory val parentDir = Option(realPath.getParent) val isAllowed = parentDir.exists { parent => policy.allowedDirectories.exists { allowedDir => parent.equals(allowedDir) } } if (!isAllowed) { val parentDirStr = parentDir.map(_.toString).getOrElse("(no parent)") FileIngestSecurityException( s"File path not allowed: $pathString (resolved to: $realPath, parent: $parentDirStr).", ) .invalidNel[Path] } else { // For static mode, check if file was present at startup policy.resolutionMode match { case ResolutionMode.Static => if (policy.allowedFiles.contains(realPath)) { realPath.validNel } else { FileIngestSecurityException( s"File not allowed in static resolution mode: $pathString (resolved to: $realPath). " + s"Only files present at startup are allowed.", ) .invalidNel[Path] } case ResolutionMode.Dynamic => // Dynamic mode allows any files in allowed directories (even files added after startup) realPath.validNel } } } } catch { case e: Exception => FileIngestSecurityException(s"Invalid file path: $pathString - ${e.getMessage}") .invalidNel[Path] } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/config/FileIngestConfig.scala ================================================ package com.thatdot.quine.app.config import pureconfig.error.CannotConvert import pureconfig.generic.semiauto.deriveConvert import pureconfig.{ConfigConvert, ConfigReader, ConfigWriter} /** File resolution mode for file ingest security * * - Static: Only files present at startup are allowed * - Dynamic: Any file in allowed directories is allowed (even files added after startup) */ sealed trait ResolutionMode extends Product with Serializable object ResolutionMode { case object Static extends ResolutionMode case object Dynamic extends ResolutionMode implicit val configReader: ConfigReader[ResolutionMode] = ConfigReader.fromString { str => str.toLowerCase match { case "static" => Right(Static) case "dynamic" => Right(Dynamic) case other => Left( CannotConvert( other, "ResolutionMode", s"Must be either 'static' or 'dynamic', got: $other", ), ) } } implicit val configWriter: ConfigWriter[ResolutionMode] = ConfigWriter.toString { case Static => "static" case Dynamic => "dynamic" } } /** Configuration for file ingest security * * @param allowedDirectories Allowlist of allowed directories for file ingestion. * - None: Use product defaults * - Some(dirs): Only specified directories allowed. Note: Empty means no paths are allowed * - Relative paths are resolved against working directory at startup * - Paths are immediately converted to absolute, canonicalized paths * - Redundant relative components (., ..) are removed during canonicalization * @param resolutionMode File resolution mode: * - None: User product defaults * - Static: Only files that exist in allowed directories at startup can be ingested * - Dynamic: Any file in allowed directories can be ingested (including files created after startup) */ final case class FileIngestConfig( allowedDirectories: Option[List[String]] = None, resolutionMode: Option[ResolutionMode] = None, ) object FileIngestConfig extends PureconfigInstances { implicit val configConvert: ConfigConvert[FileIngestConfig] = deriveConvert[FileIngestConfig] } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/config/IdProviderType.scala ================================================ package com.thatdot.quine.app.config import java.{util => ju} import memeid.{UUID => UUID4s} import pureconfig.ConfigConvert import pureconfig.generic.semiauto.deriveConvert import com.thatdot.common.logging.Log.LogConfig import com.thatdot.quine.graph.{ IdentityIdProvider, QuineIdLongProvider, QuineIdRandomLongProvider, QuineUUIDProvider, Uuid3Provider, Uuid4Provider, Uuid5Provider, WithExplicitPositions, } import com.thatdot.quine.model.QuineIdProvider /** Options for ID representations */ sealed abstract class IdProviderType { /** Does the ID provider have a partition prefix? */ val partitioned: Boolean /** Construct the ID provider associated with this configuration */ def idProvider(implicit logConfig: LogConfig): QuineIdProvider = { val baseProvider = createUnpartitioned if (partitioned) WithExplicitPositions(baseProvider) else baseProvider } /** Construct the unpartitioned ID provider associated with this configuration */ protected def createUnpartitioned: QuineIdProvider } object IdProviderType extends PureconfigInstances { final case class Long( consecutiveStart: Option[scala.Long], partitioned: Boolean = false, ) extends IdProviderType { def createUnpartitioned: QuineIdProvider = consecutiveStart match { case None => QuineIdRandomLongProvider case Some(initial) => QuineIdLongProvider(initial) } } final case class UUID(partitioned: Boolean = false) extends IdProviderType { def createUnpartitioned = QuineUUIDProvider } final case class Uuid3( namespace: ju.UUID = UUID4s.NIL.asJava(), partitioned: Boolean = false, ) extends IdProviderType { def createUnpartitioned: Uuid3Provider = Uuid3Provider(namespace) } final case class Uuid4(partitioned: Boolean = false) extends IdProviderType { def createUnpartitioned = Uuid4Provider } final case class Uuid5( namespace: ju.UUID = UUID4s.NIL.asJava(), partitioned: Boolean = false, ) extends IdProviderType { def createUnpartitioned: Uuid5Provider = Uuid5Provider(namespace) } final case class ByteArray(partitioned: Boolean = false) extends IdProviderType { def createUnpartitioned = IdentityIdProvider } implicit val longConfigConvert: ConfigConvert[Long] = deriveConvert[Long] implicit val uuidConfigConvert: ConfigConvert[UUID] = deriveConvert[UUID] implicit val uuid3ConfigConvert: ConfigConvert[Uuid3] = deriveConvert[Uuid3] implicit val uuid4ConfigConvert: ConfigConvert[Uuid4] = deriveConvert[Uuid4] implicit val uuid5ConfigConvert: ConfigConvert[Uuid5] = deriveConvert[Uuid5] implicit val byteArrayConfigConvert: ConfigConvert[ByteArray] = deriveConvert[ByteArray] implicit val configConvert: ConfigConvert[IdProviderType] = deriveConvert[IdProviderType] } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/config/MetricsConfig.scala ================================================ package com.thatdot.quine.app.config import pureconfig.ConfigConvert import pureconfig.generic.semiauto.deriveConvert case class MetricsConfig(enableDebugMetrics: Boolean = false) object MetricsConfig extends PureconfigInstances { implicit val configConvert: ConfigConvert[MetricsConfig] = deriveConvert[MetricsConfig] } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/config/MetricsReporter.scala ================================================ package com.thatdot.quine.app.config import java.io.File import scala.concurrent.duration.FiniteDuration import scala.jdk.CollectionConverters._ import com.codahale.metrics.jmx.JmxReporter import com.codahale.metrics.{CsvReporter, MetricRegistry, Reporter, ScheduledReporter, Slf4jReporter} import metrics_influxdb.api.measurements.MetricMeasurementTransformer import metrics_influxdb.{HttpInfluxdbProtocol, InfluxdbReporter} import org.slf4j.LoggerFactory import pureconfig.ConfigConvert import pureconfig.generic.FieldCoproductHint import pureconfig.generic.semiauto.deriveConvert abstract class ReporterWrapper(reporter: Reporter) { def start(): Unit def stop(): Unit = reporter.close() } class ScheduledReporterWrapper(period: FiniteDuration, reporter: ScheduledReporter) extends ReporterWrapper(reporter) { def start(): Unit = reporter.start(period.length, period.unit) } class JmxReporterWrapper(reporter: JmxReporter) extends ReporterWrapper(reporter) { def start(): Unit = reporter.start() } /** Class to represent config values corresponding to Dropwizard Metrics implementations. */ sealed abstract class MetricsReporter { /** Register the reporter for a given MetricRegistry * * @param registry registry of metrics on which reporter should report * @param namespace namespace under which to report metrics * @return wrapper for the reporter with start() and stop() methods. */ def register(registry: MetricRegistry, namespace: String): ReporterWrapper } object MetricsReporter extends PureconfigInstances { // This is so 'Slf4j' doesn't get turned into 'slf-4j' by the default impl implicit val metricsReporterNameHint: FieldCoproductHint[MetricsReporter] = new FieldCoproductHint[MetricsReporter]("type") { override def fieldValue(name: String): String = name.toLowerCase } case object Jmx extends MetricsReporter { def register(registry: MetricRegistry, namespace: String): ReporterWrapper = new JmxReporterWrapper(JmxReporter.forRegistry(registry).build()) } sealed abstract class PeriodicReporter extends MetricsReporter { def period: FiniteDuration protected def wrapReporter(reporter: ScheduledReporter): ReporterWrapper = new ScheduledReporterWrapper(period, reporter) } final case class Csv(period: FiniteDuration, logDirectory: File) extends PeriodicReporter { def register(registry: MetricRegistry, namespace: String): ReporterWrapper = { logDirectory.mkdir() wrapReporter(CsvReporter.forRegistry(registry).build(logDirectory)) } } final case class Slf4j(period: FiniteDuration, loggerName: String = "metrics") extends PeriodicReporter { def register(registry: MetricRegistry, namespace: String): ReporterWrapper = wrapReporter( Slf4jReporter.forRegistry(registry).outputTo(LoggerFactory.getLogger(loggerName)).build(), ) } final case class Influxdb( period: FiniteDuration, database: String = "metrics", scheme: String = "http", host: String = "localhost", port: Int = 8086, user: Option[String] = None, password: Option[String] = None, ) extends PeriodicReporter { def register(registry: MetricRegistry, namespace: String): ReporterWrapper = wrapReporter( InfluxdbReporter .forRegistry(registry) .protocol( new HttpInfluxdbProtocol(scheme, host, port, user.orNull, password.orNull, database), ) .withAutoCreateDB(true) .transformer(new TagInfluxMetrics(Map("member_id" -> namespace))) .build(), ) } implicit val jmxConfigConvert: ConfigConvert[Jmx.type] = deriveConvert[Jmx.type] implicit val csvConfigConvert: ConfigConvert[Csv] = deriveConvert[Csv] implicit val slf4jConfigConvert: ConfigConvert[Slf4j] = deriveConvert[Slf4j] implicit val influxdbConfigConvert: ConfigConvert[Influxdb] = deriveConvert[Influxdb] implicit val configConvert: ConfigConvert[MetricsReporter] = deriveConvert[MetricsReporter] } class TagInfluxMetrics(tags: Map[String, String]) extends MetricMeasurementTransformer { override def tags(metricName: String): java.util.Map[String, String] = tags.asJava override def measurementName(metricName: String): String = metricName } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/config/PersistenceAgentType.scala ================================================ package com.thatdot.quine.app.config import java.io.File import java.net.InetSocketAddress import java.nio.file.Paths import scala.concurrent.duration.{DurationInt, FiniteDuration} import com.datastax.oss.driver.api.core.{ConsistencyLevel, DefaultConsistencyLevel} import pureconfig.generic.semiauto.deriveConvert import pureconfig.{ConfigConvert, ConfigReader, ConfigWriter} import software.amazon.awssdk.regions.Region import com.thatdot.common.logging.Log._ import com.thatdot.quine.persistor._ /** Options for persistence */ sealed abstract class PersistenceAgentType(val isLocal: Boolean, val label: String) { /** Size of the bloom filter, if enabled (not all persistors even support this) */ def bloomFilterSize: Option[Long] } object PersistenceAgentType extends PureconfigInstances { case object Empty extends PersistenceAgentType(isLocal = false, "empty") { def bloomFilterSize = None def persistor(persistenceConfig: PersistenceConfig): NamespacedPersistenceAgent = new EmptyPersistor(persistenceConfig) } case object InMemory extends PersistenceAgentType(isLocal = true, "inmemory") { def bloomFilterSize = None } final case class RocksDb( filepath: Option[File] = sys.env.get("QUINE_DATA").map(new File(_)), writeAheadLog: Boolean = true, syncAllWrites: Boolean = false, createParentDir: Boolean = false, bloomFilterSize: Option[Long] = None, ) extends PersistenceAgentType(isLocal = true, "rocksdb") {} final case class MapDb( filepath: Option[File], numberPartitions: Int = 1, writeAheadLog: Boolean = false, commitInterval: FiniteDuration = 10.seconds, createParentDir: Boolean = false, bloomFilterSize: Option[Long] = None, ) extends PersistenceAgentType(isLocal = true, "mapdb") { assert(numberPartitions > 0, "Must have a positive number of partitions") } val defaultCassandraPort = 9042 def defaultCassandraAddress: List[InetSocketAddress] = sys.env .getOrElse("CASSANDRA_ENDPOINTS", "localhost:9042") .split(',') .map(Address.parseHostAndPort(_, defaultCassandraPort)) .toList final case class Cassandra( keyspace: Option[String] = sys.env.get("CASSANDRA_KEYSPACE"), replicationFactor: Int = Integer.parseUnsignedInt(sys.env.getOrElse("CASSANDRA_REPLICATION_FACTOR", "1")), readConsistency: ConsistencyLevel = ConsistencyLevel.LOCAL_QUORUM, writeConsistency: ConsistencyLevel = ConsistencyLevel.LOCAL_QUORUM, endpoints: List[InetSocketAddress] = defaultCassandraAddress, localDatacenter: String = "datacenter1", writeTimeout: FiniteDuration = 10.seconds, readTimeout: FiniteDuration = 10.seconds, shouldCreateTables: Boolean = true, shouldCreateKeyspace: Boolean = true, bloomFilterSize: Option[Long] = None, snapshotPartMaxSizeBytes: Int = 1000000, oauth: Option[OAuth2Config] = None, ) extends PersistenceAgentType(isLocal = false, "cassandra") { assert(endpoints.nonEmpty, "Must specify at least one Cassandra endpoint") } final case class OAuth2Config( clientId: String, certFile: String, certAlias: Option[String], certFilePassword: Array[Char], keyAlias: Option[String], adfsEnv: Option[String], resourceURI: Option[String], discoveryURL: Option[String], ) final case class Keyspaces( keyspace: Option[String] = sys.env.get("CASSANDRA_KEYSPACE"), awsRegion: Option[Region] = None, awsRoleArn: Option[String] = None, readConsistency: ConsistencyLevel = ConsistencyLevel.LOCAL_QUORUM, writeTimeout: FiniteDuration = 10.seconds, readTimeout: FiniteDuration = 10.seconds, shouldCreateTables: Boolean = true, shouldCreateKeyspace: Boolean = true, bloomFilterSize: Option[Long] = None, snapshotPartMaxSizeBytes: Int = 1000000, ) extends PersistenceAgentType(isLocal = false, "keyspaces") { private val supportedReadConsistencies: Set[ConsistencyLevel] = Set(ConsistencyLevel.ONE, ConsistencyLevel.LOCAL_ONE, ConsistencyLevel.LOCAL_QUORUM) assert( supportedReadConsistencies.contains(readConsistency), "AWS Keyspaces only supports read constencies levels: " + supportedReadConsistencies.mkString(", "), ) } final case class ClickHouse( url: String = sys.env.getOrElse("CLICKHOUSE_URL", "http://localhost:8123"), database: String = sys.env.getOrElse("CLICKHOUSE_DATABASE", "quine"), username: Option[String] = sys.env.get("CLICKHOUSE_USER"), password: Option[String] = sys.env.get("CLICKHOUSE_PASSWORD"), bloomFilterSize: Option[Long] = None, ) extends PersistenceAgentType(isLocal = false, "clickhouse") with LazySafeLogging { /** By default, the ClickHouse client uses the default SSLContext (configured by standard java truststore and * keystore properties). If the CLICKHOUSE_CERTIFICATE_PEM environment variable is set and points to a file, * we will instead construct an SSLContext that uses that file as the only trusted certificate. * Not recommended (see log line below). */ val pemCertOverride: Option[String] = sys.env .get("CLICKHOUSE_CERTIFICATE_PEM") .filter(Paths.get(_).toFile.exists()) .map { x => logger.warn( safe"""Using certificate at: ${Safe(x)} to authenticate ClickHouse server. For better security, we |recommend using a password-protected Java truststore instead (this can be configured with the |`javax.net.ssl.trustStore` and `javax.net.ssl.trustStorePassword` properties)""".cleanLines, ) x } } implicit val consistencyLevelConvert: ConfigConvert[ConsistencyLevel] = { import ConfigReader.javaEnumReader import ConfigWriter.javaEnumWriter val reader: ConfigReader[ConsistencyLevel] = javaEnumReader[DefaultConsistencyLevel].map(identity) val writer: ConfigWriter[ConsistencyLevel] = javaEnumWriter[DefaultConsistencyLevel].contramap { case defaultLevel: DefaultConsistencyLevel => defaultLevel case other => sys.error("Can't serialize custom consistency level:" + other) } ConfigConvert(reader, writer) } implicit val charArrayReader: ConfigReader[Array[Char]] = QuineConfig.charArrayReader implicit val charArrayWriter: ConfigWriter[Array[Char]] = QuineConfig.charArrayWriter // InetSocketAddress converter (assumes Cassandra port if port is omitted) implicit val inetSocketAddressConvert: ConfigConvert[InetSocketAddress] = ConfigConvert.viaNonEmptyString[InetSocketAddress]( s => Right(Address.parseHostAndPort(s, PersistenceAgentType.defaultCassandraPort)), addr => addr.getHostString + ':' + addr.getPort, ) implicit val emptyConfigConvert: ConfigConvert[Empty.type] = deriveConvert[Empty.type] implicit val inMemoryConfigConvert: ConfigConvert[InMemory.type] = deriveConvert[InMemory.type] implicit val rocksDbConfigConvert: ConfigConvert[RocksDb] = deriveConvert[RocksDb] implicit val mapDbConfigConvert: ConfigConvert[MapDb] = deriveConvert[MapDb] implicit val oauth2ConfigConvert: ConfigConvert[OAuth2Config] = deriveConvert[OAuth2Config] implicit val cassandraConfigConvert: ConfigConvert[Cassandra] = deriveConvert[Cassandra] implicit val keyspacesConfigConvert: ConfigConvert[Keyspaces] = deriveConvert[Keyspaces] implicit val clickHouseConfigConvert: ConfigConvert[ClickHouse] = deriveConvert[ClickHouse] implicit lazy val configConvert: ConfigConvert[PersistenceAgentType] = deriveConvert[PersistenceAgentType] } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/config/PersistenceBuilder.scala ================================================ package com.thatdot.quine.app.config import java.io.File import java.util.Properties import scala.concurrent.Await import scala.concurrent.duration._ import org.apache.pekko.actor.ActorSystem import com.thatdot.common.logging.Log.LogConfig import com.thatdot.quine.app.Metrics import com.thatdot.quine.app.config.PersistenceAgentType._ import com.thatdot.quine.persistor._ import com.thatdot.quine.persistor.cassandra.aws.PrimeKeyspacesPersistor import com.thatdot.quine.persistor.cassandra.support.CassandraStatementSettings import com.thatdot.quine.persistor.cassandra.vanilla.PrimeCassandraPersistor import com.thatdot.quine.util.QuineDispatchers /** Type aliases for the builder functions used by PersistenceBuilder. * Each builder function takes specific configuration and returns a PrimePersistor. */ object PersistenceBuilderTypes { /** Builder for empty/no-op persistence */ type EmptyBuilder = (PersistenceConfig, ActorSystem, LogConfig) => PrimePersistor /** Builder for in-memory persistence */ type InMemoryBuilder = (PersistenceConfig, ActorSystem, LogConfig) => PrimePersistor /** Builder for RocksDB persistence */ type RocksDbBuilder = (RocksDb, PersistenceConfig, File, QuineDispatchers, ActorSystem, LogConfig) => PrimePersistor /** Builder for MapDB persistence */ type MapDbBuilder = (MapDb, PersistenceConfig, QuineDispatchers, ActorSystem, LogConfig) => PrimePersistor /** Builder for Cassandra persistence */ type CassandraBuilder = (Cassandra, PersistenceConfig, String, ActorSystem, LogConfig) => PrimePersistor /** Builder for AWS Keyspaces persistence */ type KeyspacesBuilder = (Keyspaces, PersistenceConfig, String, ActorSystem, LogConfig) => PrimePersistor /** Builder for ClickHouse persistence */ type ClickHouseBuilder = (ClickHouse, PersistenceConfig, ActorSystem, LogConfig) => PrimePersistor } import PersistenceBuilderTypes._ /** Case class for building persistence agents from configuration using composition. * * This class provides a unified pattern for constructing persistors across different products * (Quine, Novelty, Enterprise). It uses composition to allow products to customize behavior by * providing product-specific builder functions for each persistence type. * * @param defaultKeyspace Default Cassandra/Keyspaces keyspace name for this product. * Used when no keyspace is explicitly configured. * @param defaultRocksDbFilepath Default RocksDb file path for this product. * Used when no filepath is explicitly configured. * @param buildEmpty Builder for empty/no-op persistence * @param buildInMemory Builder for in-memory persistence * @param buildRocksDb Builder for RocksDB persistence * @param buildMapDb Builder for MapDB persistence * @param buildCassandra Builder for Cassandra persistence * @param buildKeyspaces Builder for AWS Keyspaces persistence * @param buildClickHouse Builder for ClickHouse persistence * * @see [[PersistenceBuilder]] for the Quine implementation * @see [[com.thatdot.novelty.app.config.PersistenceBuilder]] for the Novelty implementation * @see [[com.thatdot.quine.app.config.EnterprisePersistenceBuilder]] for the Enterprise implementation */ case class PersistenceBuilder( defaultKeyspace: String, defaultRocksDbFilepath: File, buildEmpty: EmptyBuilder = PersistenceBuilder.defaultBuildEmpty, buildInMemory: InMemoryBuilder = PersistenceBuilder.defaultBuildInMemory, buildRocksDb: RocksDbBuilder = PersistenceBuilder.defaultBuildRocksDb, buildMapDb: MapDbBuilder = PersistenceBuilder.defaultBuildMapDb, buildCassandra: CassandraBuilder = PersistenceBuilder.defaultBuildCassandra, buildKeyspaces: KeyspacesBuilder = PersistenceBuilder.defaultBuildKeyspaces, buildClickHouse: ClickHouseBuilder = PersistenceBuilder.defaultBuildClickHouse, ) { /** Build a PrimePersistor from the given persistence agent type and configuration. * * Dispatches to the appropriate builder function based on the configured persistence type. */ def build(pt: PersistenceAgentType, persistenceConfig: PersistenceConfig)(implicit system: ActorSystem, logConfig: LogConfig, ): PrimePersistor = { val quineDispatchers = new QuineDispatchers(system) pt match { case Empty => buildEmpty(persistenceConfig, system, logConfig) case InMemory => buildInMemory(persistenceConfig, system, logConfig) case r: RocksDb => buildRocksDb(r, persistenceConfig, defaultRocksDbFilepath, quineDispatchers, system, logConfig) case m: MapDb => buildMapDb(m, persistenceConfig, quineDispatchers, system, logConfig) case c: Cassandra => buildCassandra(c, persistenceConfig, c.keyspace.getOrElse(defaultKeyspace), system, logConfig) case c: Keyspaces => buildKeyspaces(c, persistenceConfig, c.keyspace.getOrElse(defaultKeyspace), system, logConfig) case c: ClickHouse => buildClickHouse(c, persistenceConfig, system, logConfig) } } } /** Companion object containing default builder implementations. * * These defaults can be used directly or overridden when constructing a PersistenceBuilder. */ object PersistenceBuilder { /** Default builder for empty persistence (discards all data). */ val defaultBuildEmpty: EmptyBuilder = { (persistenceConfig, system, logConfig) => implicit val s: ActorSystem = system implicit val lc: LogConfig = logConfig new StatelessPrimePersistor(persistenceConfig, None, new EmptyPersistor(_, _)) } /** Default builder for in-memory persistence (lost on shutdown). */ val defaultBuildInMemory: InMemoryBuilder = { (persistenceConfig, system, logConfig) => implicit val s: ActorSystem = system implicit val lc: LogConfig = logConfig new StatelessPrimePersistor( persistenceConfig, None, (pc, ns) => new InMemoryPersistor(persistenceConfig = pc, namespace = ns), ) } /** Default builder for RocksDB persistence. */ val defaultBuildRocksDb: RocksDbBuilder = { (r, persistenceConfig, defaultFilepath, quineDispatchers, system, logConfig) => implicit val s: ActorSystem = system implicit val lc: LogConfig = logConfig new RocksDbPrimePersistor( r.createParentDir, r.filepath.getOrElse(defaultFilepath), r.writeAheadLog, r.syncAllWrites, new Properties(), persistenceConfig, r.bloomFilterSize, quineDispatchers.blockingDispatcherEC, ) } /** Default builder for MapDB persistence. */ val defaultBuildMapDb: MapDbBuilder = { (m, persistenceConfig, quineDispatchers, system, logConfig) => implicit val s: ActorSystem = system implicit val lc: LogConfig = logConfig m.filepath match { case Some(path) => new PersistedMapDbPrimePersistor( m.createParentDir, path, m.writeAheadLog, m.numberPartitions, m.commitInterval, Metrics, persistenceConfig, m.bloomFilterSize, quineDispatchers, ) case None => new TempMapDbPrimePersistor( m.writeAheadLog, m.numberPartitions, m.commitInterval, Metrics, persistenceConfig, m.bloomFilterSize, quineDispatchers, ) } } /** Default builder for Cassandra persistence. */ val defaultBuildCassandra: CassandraBuilder = { (c, persistenceConfig, keyspace, system, logConfig) => implicit val s: ActorSystem = system implicit val lc: LogConfig = logConfig Await.result( PrimeCassandraPersistor.create( persistenceConfig, c.bloomFilterSize, c.endpoints, c.localDatacenter, c.replicationFactor, keyspace, c.shouldCreateKeyspace, c.shouldCreateTables, CassandraStatementSettings(c.readConsistency, c.readTimeout), CassandraStatementSettings(c.writeConsistency, c.writeTimeout), c.snapshotPartMaxSizeBytes, Some(Metrics), ), 90.seconds, ) } /** Default builder for AWS Keyspaces persistence. */ val defaultBuildKeyspaces: KeyspacesBuilder = { (c, persistenceConfig, keyspace, system, logConfig) => implicit val s: ActorSystem = system implicit val lc: LogConfig = logConfig Await.result( PrimeKeyspacesPersistor.create( persistenceConfig, c.bloomFilterSize, keyspace, c.awsRegion, c.awsRoleArn, CassandraStatementSettings(c.readConsistency, c.readTimeout), c.writeTimeout, c.shouldCreateKeyspace, c.shouldCreateTables, Some(Metrics), c.snapshotPartMaxSizeBytes, ), 91.seconds, ) } /** Default builder for ClickHouse persistence. * By default, ClickHouse is not available - only in Enterprise. */ val defaultBuildClickHouse: ClickHouseBuilder = { (_, _, _, _) => throw new IllegalArgumentException( "ClickHouse is not available in this product. If you are interested in using ClickHouse, please contact us.", ) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/config/PureconfigInstances.scala ================================================ package com.thatdot.quine.app.config import scala.concurrent.duration.FiniteDuration import scala.jdk.CollectionConverters._ import org.apache.pekko.util.Timeout import pureconfig.BasicReaders.stringConfigReader import pureconfig.error.CannotConvert import pureconfig.generic.ProductHint import pureconfig.generic.semiauto.{deriveConvert, deriveEnumerationConvert} import pureconfig.{ConfigConvert, ConfigReader, ConfigWriter} import com.thatdot.common.logging.Log.{LogConfig, RedactHide, RedactMethod} import com.thatdot.quine.persistor.{EventEffectOrder, PersistenceConfig, PersistenceSchedule} import com.thatdot.quine.util.Config._ import com.thatdot.quine.util.{Host, Port} /** Collection of implicits for helping implicit resolution of pureconfig schemas */ trait PureconfigInstances { // Unknown keys should be errors implicit def sealedProductHint[T]: ProductHint[T] = ProductHint[T](allowUnknownKeys = false) implicit val timeoutConvert: ConfigConvert[Timeout] = ConfigConvert[FiniteDuration].xmap(Timeout(_), _.duration) implicit val persistenceScheduleConvert: ConfigConvert[PersistenceSchedule] = deriveEnumerationConvert[PersistenceSchedule] implicit val effectOrderConvert: ConfigConvert[EventEffectOrder] = deriveEnumerationConvert[EventEffectOrder] implicit val persistenceConfigConvert: ConfigConvert[PersistenceConfig] = deriveConvert[PersistenceConfig] // RedactMethod is a sealed trait with only RedactHide case object // Uses type discriminator (e.g., redactor { type = redact-hide }) implicit val redactHideConvert: ConfigConvert[RedactHide.type] = deriveConvert[RedactHide.type] implicit val redactMethodConvert: ConfigConvert[RedactMethod] = deriveConvert[RedactMethod] implicit val logConfigConvert: ConfigConvert[LogConfig] = deriveConvert[LogConfig] implicit val symbolConvert: ConfigConvert[Symbol] = ConfigConvert[String].xmap(Symbol(_), _.name) implicit val hostConvert: ConfigConvert[Host] = ConfigConvert[String].xmap(s => Host(replaceHostSpecialValues(s)), _.asString) implicit val portConvert: ConfigConvert[Port] = ConfigConvert[Int].xmap(i => Port(replacePortSpecialValue(i)), _.asInt) import software.amazon.awssdk.regions.Region private val regions = Region.regions.asScala.map(r => r.id -> r).toMap implicit val regionReader: ConfigReader[Region] = ConfigReader.fromNonEmptyString(s => regions.get(s.toLowerCase) toRight CannotConvert(s, "Region", "expected one of " + regions.keys.mkString(", ")), ) implicit val regionWriter: ConfigWriter[Region] = ConfigWriter.toString(_.id) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/config/QuineConfig.scala ================================================ package com.thatdot.quine.app.config import scala.concurrent.duration.{Duration, DurationInt, FiniteDuration} import org.apache.pekko.util.Timeout import com.typesafe.config.{Config, ConfigObject} import pureconfig._ import pureconfig.generic.ProductHint import pureconfig.generic.semiauto.deriveConvert import shapeless.{Lens, lens} import com.thatdot.common.logging.Log.LogConfig import com.thatdot.quine.persistor.PersistenceConfig /** Top-level config for Quine * * See `documented_config.conf` inside the test resources for documentation */ final case class QuineConfig( dumpConfig: Boolean = false, timeout: Timeout = Timeout(120.seconds), inMemorySoftNodeLimit: Option[Int] = Some(10000), inMemoryHardNodeLimit: Option[Int] = Some(75000), declineSleepWhenWriteWithin: FiniteDuration = 100.millis, declineSleepWhenAccessWithin: FiniteDuration = Duration.Zero, maxCatchUpSleep: FiniteDuration = 2000.millis, webserver: WebServerBindConfig = WebServerBindConfig(), webserverAdvertise: Option[WebserverAdvertiseConfig] = None, shouldResumeIngest: Boolean = false, shardCount: Int = 4, id: IdProviderType = IdProviderType.UUID(), edgeIteration: EdgeIteration = EdgeIteration.ReverseInsertion, store: PersistenceAgentType = PersistenceAgentType.RocksDb(), persistence: PersistenceConfig = PersistenceConfig(), labelsProperty: Symbol = Symbol("__LABEL"), metricsReporters: List[MetricsReporter] = List(MetricsReporter.Jmx), metrics: MetricsConfig = MetricsConfig(), helpMakeQuineBetter: Boolean = true, defaultApiVersion: String = "v1", logConfig: LogConfig = LogConfig.permissive, fileIngest: FileIngestConfig = FileIngestConfig(), ) extends BaseConfig { def configVal: Config = ConfigWriter[QuineConfig].to(this).asInstanceOf[ConfigObject].toConfig } object QuineConfig extends PureconfigInstances { val webserverLens: Lens[QuineConfig, WebServerBindConfig] = lens[QuineConfig] >> Symbol("webserver") val webserverPortLens: Lens[QuineConfig, Int] = webserverLens >> Symbol("port") >> Symbol("asInt") val webserverEnabledLens: Lens[QuineConfig, Boolean] = webserverLens >> Symbol("enabled") val charArrayReader: ConfigReader[Array[Char]] = ConfigReader[String].map(_.toCharArray) val charArrayWriter: ConfigWriter[Array[Char]] = ConfigWriter[String].contramap(new String(_)) implicit val configConvert: ConfigConvert[QuineConfig] = { implicit val configConvert = deriveConvert[QuineConfig] // This class is necessary to make sure our config is always situated at the `quine` root case class QuineConfigRoot(quine: QuineConfig = QuineConfig()) // Allow other top-level keys that are not "quine" implicit val topLevelProductHint: ProductHint[QuineConfigRoot] = ProductHint[QuineConfigRoot](allowUnknownKeys = true) deriveConvert[QuineConfigRoot].xmap[QuineConfig](_.quine, QuineConfigRoot(_)) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/config/QuinePersistenceBuilder.scala ================================================ package com.thatdot.quine.app.config import java.io.File /** Persistence builder instance for Quine. * * Uses Quine-specific defaults: * - Uses "quine" as the default Cassandra keyspace name * - Uses "quine.db" as the default RocksDB file path * - ClickHouse throws an error (Enterprise-only feature) */ object QuinePersistenceBuilder { val instance: PersistenceBuilder = PersistenceBuilder( defaultKeyspace = "quine", defaultRocksDbFilepath = new File("quine.db"), buildClickHouse = { (_, _, _, _) => throw new IllegalArgumentException( "ClickHouse is not available in Quine. If you are interested in using ClickHouse, please contact us to discuss upgrading to Quine Enterprise.", ) }, ) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/config/WebServerConfig.scala ================================================ package com.thatdot.quine.app.config import java.io.File import java.net.{InetAddress, URL} import org.apache.pekko.http.scaladsl.model.Uri import pureconfig.generic.semiauto.deriveConvert import pureconfig.{ConfigConvert, ConfigReader, ConfigWriter} import com.thatdot.quine.app.config.WebServerBindConfig.{KeystorePasswordEnvVar, KeystorePathEnvVar} import com.thatdot.quine.util.{Host, Port} final case class SslConfig(path: File, password: Array[Char]) object SslConfig extends PureconfigInstances { implicit val configConvert: ConfigConvert[SslConfig] = { implicit val charArrayReader: ConfigReader[Array[Char]] = QuineConfig.charArrayReader implicit val charArrayWriter: ConfigWriter[Array[Char]] = QuineConfig.charArrayWriter deriveConvert[SslConfig] } } final case class MtlsTrustStore(path: File, password: String) object MtlsTrustStore extends PureconfigInstances { implicit val configConvert: ConfigConvert[MtlsTrustStore] = deriveConvert[MtlsTrustStore] } final case class MtlsHealthEndpoints( enabled: Boolean = false, port: Port = Port(8081), ) object MtlsHealthEndpoints extends PureconfigInstances { implicit val configConvert: ConfigConvert[MtlsHealthEndpoints] = deriveConvert[MtlsHealthEndpoints] } final case class UseMtls( enabled: Boolean = false, trustStore: Option[MtlsTrustStore] = None, healthEndpoints: MtlsHealthEndpoints = MtlsHealthEndpoints(), ) object UseMtls extends PureconfigInstances { implicit val configConvert: ConfigConvert[UseMtls] = deriveConvert[UseMtls] } final case class WebServerBindConfig( address: Host = Host("0.0.0.0"), port: Port = Port(8080), enabled: Boolean = true, useTls: Boolean = sys.env.contains(KeystorePathEnvVar) && sys.env.contains(KeystorePasswordEnvVar), useMtls: UseMtls = UseMtls(), ) { def protocol: String = if (useTls) "https" else "http" def guessResolvableUrl: URL = { val bindHost: Uri.Host = Uri.Host(address.asString) // If the host of the bindUri is set to wildcard (INADDR_ANY and IN6ADDR_ANY) - i.e. "0.0.0.0" or "::" // present the URL as "localhost" to the user. This is necessary because while // INADDR_ANY as a source address means "bind to all interfaces", it cannot necessarily be // used as a destination address val resolveableHost = if (bindHost.inetAddresses.head.isAnyLocalAddress) Uri.Host(InetAddress.getLoopbackAddress) else bindHost new URL(protocol, resolveableHost.address, port.asInt, "") } } object WebServerBindConfig extends PureconfigInstances { val KeystorePathEnvVar = "SSL_KEYSTORE_PATH" val KeystorePasswordEnvVar = "SSL_KEYSTORE_PASSWORD" implicit val configConvert: ConfigConvert[WebServerBindConfig] = deriveConvert[WebServerBindConfig] } final case class WebserverAdvertiseConfig( address: Host, port: Port, path: Option[String] = None, ) { def url(protocol: String): URL = new URL(protocol, address.asString, port.asInt, path.getOrElse("")) } object WebserverAdvertiseConfig extends PureconfigInstances { implicit val configConvert: ConfigConvert[WebserverAdvertiseConfig] = deriveConvert[WebserverAdvertiseConfig] } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/config/errors/ConfigErrorFormatter.scala ================================================ package com.thatdot.quine.app.config.errors import cats.data.NonEmptyList import pureconfig.error.{ConfigReaderFailure, ConfigReaderFailures, ConvertFailure, FailureReason} /** Configuration for error message formatting */ final case class ErrorFormatterConfig( expectedRootKey: String, productName: String, requiredFields: Set[String], docsUrl: String, ) object ErrorFormatterConfig { /** Format configuration errors with automatically detected startup context */ def formatErrors(config: ErrorFormatterConfig, failures: ConfigReaderFailures): String = { val context = StartupContext( configFile = sys.props.get("config.file"), isJar = !sys.props.get("java.class.path").exists(_.contains("sbt-launch")), ) new ConfigErrorFormatter(config, context).messageFor(failures) } } sealed trait ConfigError { def format(config: ErrorFormatterConfig, context: StartupContext): String } object ConfigError { case object MissingRootBlock extends ConfigError { override def format(config: ErrorFormatterConfig, context: StartupContext): String = { val basicMessage = s"""Configuration error: Missing '${config.expectedRootKey}' configuration block. | |${config.productName} requires all configuration to be nested under a '${config.expectedRootKey}' block.""".stripMargin val guidance = context match { case StartupContext(Some(file), _) => s""" |Configuration file: $file | |Ensure it has the correct structure: | ${config.expectedRootKey} { | # your configuration here | }""".stripMargin case StartupContext(None, true) => s""" |Running from JAR without a config file. | |You must either: | 1. Provide a config file: -Dconfig.file= | 2. Set required properties: -D${config.expectedRootKey}.license-key=""".stripMargin case StartupContext(None, false) => s""" |Provide configuration via: | 1. application.conf in your classpath | 2. System properties: -D${config.expectedRootKey}.license-key= | 3. Config file: -Dconfig.file=""".stripMargin } basicMessage + guidance + s"\n\nFor more details, see: ${config.docsUrl}" } } final case class MissingRequiredField(fieldName: String) extends ConfigError { override def format(config: ErrorFormatterConfig, context: StartupContext): String = { val kebabFieldName = toKebabCase(fieldName) s"""Configuration error: Missing required '$kebabFieldName'. | |${config.productName} requires a valid $kebabFieldName to start. | |Add it to your configuration file: | ${config.expectedRootKey} { | $kebabFieldName = "" | } | |Or set it as a system property: | -D${config.expectedRootKey}.$kebabFieldName= | |For more details, see: ${config.docsUrl}""".stripMargin } } final case class Invalid(path: String, found: String, expected: Set[String]) extends ConfigError { override def format(config: ErrorFormatterConfig, context: StartupContext): String = { val pathDisplay = if (path.isEmpty) "root" else s"'$path'" val expectedDisplay = if (expected.size == 1) expected.head else expected.mkString(" or ") s"""Configuration error: Invalid type at $pathDisplay. | |Expected: $expectedDisplay |Found: $found |${contextGuidance(context, config)}""".stripMargin } } final case class UnknownConfigKey(path: String, key: String) extends ConfigError { override def format(config: ErrorFormatterConfig, context: StartupContext): String = { val fullPath = if (path.isEmpty) key else s"$path.$key" s"""Configuration error: Unknown configuration key '$fullPath'. | |This key is not recognized by ${config.productName}. |Check for typos or consult the documentation. |${contextGuidance(context, config)}""".stripMargin } } /** Unclassified error - we couldn't parse/classify this failure. * Retains original failure for debugging. */ final case class UnclassifiedError( description: String, originalFailure: Option[ConfigReaderFailure] = None, ) extends ConfigError { override def format(config: ErrorFormatterConfig, context: StartupContext): String = description + "\n" + contextGuidance(context, config) } private def contextGuidance( context: StartupContext, config: ErrorFormatterConfig, ): String = context.configFile match { case Some(file) => s"\nConfiguration file: $file\nSee: ${config.docsUrl}" case None => s"\nSee: ${config.docsUrl}" } private[errors] def toKebabCase(camelCase: String): String = camelCase.replaceAll("([a-z])([A-Z])", "$1-$2").toLowerCase } /** Context about how the app was started */ final case class StartupContext( configFile: Option[String], isJar: Boolean, ) /** Formats config errors with context. */ class ConfigErrorFormatter( config: ErrorFormatterConfig, context: StartupContext, ) { /** Generate user-friendly error message for all configuration failures. * Processes each failure individually and combines them intelligently. */ def messageFor(failures: ConfigReaderFailures): String = { // ConfigReaderFailures is guaranteed non-empty by PureConfig val errorTypes = NonEmptyList.fromListUnsafe(failures.toList.map(classifyFailure)) combineMessages(errorTypes) } private def combineMessages(errorTypes: NonEmptyList[ConfigError]): String = errorTypes match { case NonEmptyList(single, Nil) => single.format(config, context) case multiple => val header = s"Found ${multiple.size} configuration errors:\n" val formattedErrors = multiple.toList.zipWithIndex.map { case (error, idx) => s"${idx + 1}. ${error.format(config, context)}" } header + formattedErrors.mkString("\n\n") } private def classifyFailure(failure: ConfigReaderFailure): ConfigError = failure match { case ConvertFailure(reason, _, path) if path.isEmpty && isKeyNotFound(reason, config.expectedRootKey) => ConfigError.MissingRootBlock case ConvertFailure(reason, _, path) => config.requiredFields .collectFirst { case fieldName if isKeyNotFound(reason, ConfigError.toKebabCase(fieldName)) => ConfigError.MissingRequiredField(fieldName) } .getOrElse(ConfigErrorFormatter.classifyUnknownFailure(reason, path)) case other => ConfigError.UnclassifiedError(other.description, Some(other)) } private def isKeyNotFound(reason: FailureReason, expectedKey: String): Boolean = { val desc = reason.description // Match exact key or parent keys (e.g., "thatdot" when expecting "thatdot.novelty") desc.contains(s"Key not found: '$expectedKey'") || expectedKey.split('.').exists(part => desc.contains(s"Key not found: '$part'")) } } object ConfigErrorFormatter { /** Classify failures that don't match known patterns. * Parses the failure reason description and maps to appropriate ConfigError types. */ private def classifyUnknownFailure(reason: FailureReason, path: String): ConfigError = { val desc = reason.description if (desc.contains("Expected type") || desc.contains("Wrong type")) { val result = for { found <- extractBetween(desc, "Found ", " ").orElse(extractBetween(desc, "found ", " ")) expected <- extractBetween(desc, "Expected type ", ".") } yield ConfigError.Invalid(path, found, Set(expected)) result.getOrElse(ConfigError.UnclassifiedError(desc, None)) } else if (desc.contains("Unknown key")) { extractBetween(desc, "Unknown key '", "'") .map(key => ConfigError.UnknownConfigKey(path, key)) .getOrElse(ConfigError.UnclassifiedError(desc, None)) } else { ConfigError.UnclassifiedError(desc, None) } } /** Extract text between two markers (helper for parsing descriptions) * Returns None if start marker not found, Some(text) otherwise. */ private def extractBetween(text: String, start: String, end: String): Option[String] = for { startIdx <- Option.when(text.contains(start))(text.indexOf(start)) afterStart = text.substring(startIdx + start.length) endIdx = afterStart.indexOf(end) result = if (endIdx >= 0) afterStart.substring(0, endIdx) else afterStart } yield result } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/data/QuineDataFoldablesFrom.scala ================================================ package com.thatdot.quine.app.data import com.thatdot.data.{DataFoldableFrom, DataFolderTo} import com.thatdot.quine.graph.cypher import com.thatdot.quine.graph.cypher.Expr import com.thatdot.quine.model.{QuineIdProvider, QuineValue} import com.thatdot.quine.serialization.data.QuineSerializationFoldablesFrom object QuineDataFoldablesFrom { implicit val cypherValueDataFoldable: DataFoldableFrom[cypher.Value] = new DataFoldableFrom[cypher.Value] { def fold[B](value: cypher.Value, folder: DataFolderTo[B]): B = value match { case Expr.Null => folder.nullValue case number: Expr.Number => number match { case Expr.Integer(long) => folder.integer(long) case Expr.Floating(double) => folder.floating(double) case Expr.Null => folder.nullValue } case bool: Expr.Bool => bool match { case Expr.True => folder.trueValue case Expr.False => folder.falseValue case Expr.Null => folder.nullValue } case value: Expr.PropertyValue => value match { case Expr.Str(string) => folder.string(string) case Expr.Integer(long) => folder.integer(long) case Expr.Floating(double) => folder.floating(double) case Expr.True => folder.trueValue case Expr.False => folder.falseValue case Expr.Bytes(b, _) => folder.bytes(b) case Expr.List(list) => val builder = folder.vectorBuilder() list.foreach(v => builder.add(fold(v, folder))) builder.finish() case Expr.Map(map) => val builder = folder.mapBuilder() map.foreach { case (k, v) => builder.add(k, fold(v, folder)) } builder.finish() case Expr.LocalDateTime(localDateTime) => folder.localDateTime(localDateTime) case Expr.Date(date) => folder.date(date) case Expr.Time(offsetTime) => folder.time(offsetTime) case Expr.LocalTime(localTime) => folder.localTime(localTime) case Expr.DateTime(zonedDateTime) => folder.zonedDateTime(zonedDateTime) case Expr.Duration(duration) => folder.duration(duration) } case other @ (Expr.Node(_, _, _) | Expr.Relationship(_, _, _, _) | Expr.Path(_, _)) => throw new Exception(s"Fold conversion not supported for $other") } } def quineValueDataFoldable(implicit idProvider: QuineIdProvider): DataFoldableFrom[QuineValue] = QuineSerializationFoldablesFrom.quineValueDataFoldableFrom } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/data/QuineDataFoldersTo.scala ================================================ package com.thatdot.quine.app.data import java.time.{Duration, LocalDate, LocalDateTime, LocalTime, OffsetTime, ZonedDateTime} import scala.collection.immutable.SortedMap import com.thatdot.data.DataFolderTo import com.thatdot.quine.graph.cypher import com.thatdot.quine.graph.cypher.{Expr => ce} object QuineDataFoldersTo { implicit val cypherValueFolder: DataFolderTo[cypher.Value] = new DataFolderTo[cypher.Value] { def nullValue: cypher.Value = ce.Null def trueValue: cypher.Value = ce.True def falseValue: cypher.Value = ce.False def integer(l: Long): cypher.Value = ce.Integer(l) def string(s: String): cypher.Value = ce.Str(s) def bytes(b: Array[Byte]): cypher.Value = ce.Bytes(b, representsId = false) def floating(d: Double): cypher.Value = ce.Floating(d) def date(d: LocalDate): cypher.Value = ce.Date(d) def time(t: OffsetTime): cypher.Value = ce.Time(t) def localTime(t: LocalTime): cypher.Value = ce.LocalTime(t) def localDateTime(ldt: LocalDateTime): cypher.Value = ce.LocalDateTime(ldt) def zonedDateTime(zdt: ZonedDateTime): cypher.Value = ce.DateTime(zdt) def duration(d: Duration): cypher.Value = ce.Duration(d) def vectorBuilder(): DataFolderTo.CollectionBuilder[cypher.Value] = new DataFolderTo.CollectionBuilder[cypher.Value] { private val elements = Vector.newBuilder[cypher.Value] def add(a: cypher.Value): Unit = elements += a def finish(): cypher.Value = ce.List(elements.result()) } def mapBuilder(): DataFolderTo.MapBuilder[cypher.Value] = new DataFolderTo.MapBuilder[cypher.Value] { private val kvs = SortedMap.newBuilder[String, cypher.Value] def add(key: String, value: cypher.Value): Unit = kvs += (key -> value) def finish(): cypher.Value = ce.Map(kvs.result()) } } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/migrations/Migration.scala ================================================ package com.thatdot.quine.app.migrations import scala.concurrent.Future import com.thatdot.quine.migrations.{MigrationError, MigrationVersion} import com.thatdot.quine.util.ComputeAndBlockingExecutionContext /** A migration represents a need to change the state of the system from one version to the next. * Note that the migration itself may be applied differently by different products, so the typeclass * pattern is used to define how to apply a migration (see [[Migration.Apply]]). * This trait is itself defined in the least common "application" package, i.e., the Quine application * itself. Conceptually, it's close to belonging in quine-core, but as quine-core is supposed to be * completely unaware of external systems, and many/most migrations will be dealing with external systems, * the interface and utilities are defined in an application package instead. */ trait Migration { val from: MigrationVersion @deprecatedOverriding( "Are you sure you want to introduce a migration that skips versions? If so, suppress this warning", "1.7.0", ) def to: MigrationVersion = MigrationVersion(from.version + 1) } object Migration { /** Typeclass for applying a migration. This is used to define how to apply a migration to a specific * product. The caller should ensure that `run` is only called when the current system version is at least * [[migration.from]]. * Migrations should be idempotent, so that they can be rerun if necessary, for example, due to network * failures or races from multiple clustered application instances */ trait Apply[M <: Migration] { val migration: M def run()(implicit ecs: ComputeAndBlockingExecutionContext): Future[Either[MigrationError, Unit]] } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/migrations/QuineMigrations.scala ================================================ package com.thatdot.quine.app.migrations import scala.concurrent.{ExecutionContext, Future} import com.thatdot.common.logging.Log._ import com.thatdot.quine.app.migrations.instances.MultipleValuesRewrite import com.thatdot.quine.graph.{NamespaceId, StandingQueryPattern} import com.thatdot.quine.migrations.MigrationError import com.thatdot.quine.persistor.cassandra.{CassandraPersistor, StandingQueryStatesDefinition} import com.thatdot.quine.persistor.{ EmptyPersistor, InMemoryPersistor, PrimePersistor, RocksDbPersistor, WrappedPersistenceAgent, } import com.thatdot.quine.util.ComputeAndBlockingExecutionContext /** [[Migration.Apply]] instances for the Quine application. These may be reused by the other Quine-based applications * if appropriate. */ object QuineMigrations { class ApplyMultipleValuesRewrite(val persistor: PrimePersistor, val namespaces: Set[NamespaceId]) extends Migration.Apply[MultipleValuesRewrite.type] { val migration = MultipleValuesRewrite def run()(implicit ecs: ComputeAndBlockingExecutionContext, ): Future[Either[MigrationError, Unit]] = { val defaultPersistor = WrappedPersistenceAgent.unwrap(persistor.getDefault) // check the persistor type before doing any persistor lookups -- if it's a persistor that _can't_ have // relevant state, we can skip the lookups. val persistorTypeMayNeedMigration: Boolean = defaultPersistor match { // persistors with no backing storage can't have any relevant state to migrate case _: EmptyPersistor => false case _: InMemoryPersistor => false case _ => true } if (!persistorTypeMayNeedMigration) Future.successful(Right(())) else { // The migration is relevant to the configured persistor, so we need to inspect the persistor's state // to determine if the migration is necessary. val needsMigrationFut = ApplyMultipleValuesRewrite.needsMigration(persistor, namespaces) needsMigrationFut .map(_.flatMap { needsMigration => if (!needsMigration) { Right(()) } else { // prefix to the (persistor-dependent) message val adviceContext = "Incompatible MultipleValues standing query states detected from a previous version of Quine." // persistor-dependent message val userAdvice = ApplyMultipleValuesRewrite.persistorSpecificAdvice(persistor, namespaces) // suffix to the (persistor-dependent) message val changeReference = "See https://github.com/thatdot/quine/releases/tag/v1.7.0 for complete change notes" Left( MigrationError.UserInterventionRequired( safe"${Safe(adviceContext)} ${Safe(userAdvice)}" + safe"\n" + safe"${Safe(changeReference)}", ), ) } })(ecs.nodeDispatcherEC) } } } object ApplyMultipleValuesRewrite { private[this] def anyMultipleValuesQueriesRegistered(persistor: PrimePersistor)(implicit ecs: ComputeAndBlockingExecutionContext, ): Future[Boolean] = persistor .getAllStandingQueries() .map( _.values.flatten // consider all sqs from all namespaces .map(_.queryPattern) .exists { case _: StandingQueryPattern.MultipleValuesQueryPattern => true case _ => false }, )(ecs.nodeDispatcherEC) private[this] def anyNamespaceHasMultipleValuesStates( persistor: PrimePersistor, namespaces: Set[NamespaceId], ): Future[Boolean] = namespaces.toSeq .flatMap(persistor.apply) .foldLeft(Future.successful(false))((foundMultipleValuesStatesFut, nextPersistor) => foundMultipleValuesStatesFut .flatMap { case true => Future.successful(true) case false => nextPersistor.containsMultipleValuesStates() }(ExecutionContext.parasitic) .recoverWith { case err: Throwable => Future.failed( new MigrationError.PersistorError( err, ), ) }(ExecutionContext.parasitic), ) /** Perform persistor lookups to see if the persistor contains any multiplevalues-related state */ def needsMigration(persistor: PrimePersistor, namespaces: Set[NamespaceId])(implicit ecs: ComputeAndBlockingExecutionContext, ): Future[Either[MigrationError, Boolean]] = anyMultipleValuesQueriesRegistered(persistor) .flatMap { case true => Future.successful(Right(true)) case false => anyNamespaceHasMultipleValuesStates(persistor, namespaces) .map(Right(_))(ExecutionContext.parasitic) }(ecs.nodeDispatcherEC) .recover { case err: MigrationError => Left(err) }(ExecutionContext.parasitic) private def persistorSpecificAdvice(persistor: PrimePersistor, namespaces: Set[NamespaceId]) = WrappedPersistenceAgent.unwrap(persistor.getDefault) match { case cass: CassandraPersistor => // In case we don't have a keyspace connected, we can still give a sensible message and let the user // do their own string substitution val (keyspace, keyspaceExplanation) = cass.keyspace.fold( "" -> "\n(where is the name of your configured keyspace).", )( _ -> "", ) """In order to continue using your persisted data in Cassandra, please run the previous version of |Quine and use the API to remove all standing queries with the `MultipleValues` mode. Then, |before starting the updated version of Quine, remove all incompatible feature-specific data from |the Cassandra persistor using the following CQL command[s]:""".stripMargin.replace('\n', ' ') + namespaces.toSeq .map(new StandingQueryStatesDefinition(_).name) .map(tableName => s" TRUNCATE TABLE $keyspace.$tableName;") .mkString(start = "\n", sep = "\n", end = keyspaceExplanation) case _: RocksDbPersistor => s"""The RocksDB-type persistor does not support side-channel updates, so migration is not possible at |this time. Please remove the following directory/directories before restarting Quine: |""".stripMargin.replace('\n', ' ').trim + namespaces.toSeq .map(persistor.apply) .collect { case Some(namespaced) => val filePath = WrappedPersistenceAgent.unwrap(namespaced).asInstanceOf[RocksDbPersistor].filePath s" $filePath" } .mkString(start = "\n", sep = "\n", end = "") case badNewsPersistor => s"""The ${badNewsPersistor.getClass.getName}-type persistor does not |support side-channel updates, so no migration is possible at this time. Please remove |the persistor's stored data and restart Quine.""".stripMargin } } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/migrations/instances/MultipleValuesRewrite.scala ================================================ package com.thatdot.quine.app.migrations.instances import com.thatdot.quine.app.migrations.Migration import com.thatdot.quine.migrations.MigrationVersion /** The MultipleValues rewrite introduced in Quine 1.7.0 */ object MultipleValuesRewrite extends Migration { val from: MigrationVersion = MigrationVersion(0) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/migrations/instances/package.scala ================================================ package com.thatdot.quine.app.migrations /** This package contains an object for each feature that may require an out-of-band migration * step. Each object must extend [[Migration]] and be a singleton (`object`). * * See [[Migration.Apply]] */ package object instances { /** Registry of all migrations, in order. */ val all: Seq[Migration] = Seq(MultipleValuesRewrite) require(all.zipWithIndex.forall { case (m, i) => m.from.version == i }, "Migrations must be contiguous and in-order") } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/README.md ================================================ ## Models This package contains internal object models. They implement the effects described in the user facing API. We are considering renaming or replacing this package due to the word "models" suggesting "data models" to some readers. This could take the form of extracting each sub-package to separate top level SBT projects, or just picking a replacement word. ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest/ContentDelimitedIngestSrcDef.scala ================================================ package com.thatdot.quine.app.model.ingest import scala.util.Success import org.apache.pekko.NotUsed import org.apache.pekko.stream.connectors.csv.scaladsl.{CsvParsing, CsvToMap} import org.apache.pekko.stream.scaladsl.{Flow, Framing, Source} import org.apache.pekko.util.ByteString import com.thatdot.common.logging.Log._ import com.thatdot.quine.app.model.ingest.serialization.{ CypherJsonInputFormat, CypherRawInputFormat, CypherStringInputFormat, ImportFormat, QuinePatternJsonInputFormat, QuinePatternRawInputFormat, QuinePatternStringInputFormat, } import com.thatdot.quine.graph.cypher.Value import com.thatdot.quine.graph.{CypherOpsGraph, NamespaceId, cypher} import com.thatdot.quine.routes.FileIngestFormat import com.thatdot.quine.routes.FileIngestFormat.{ CypherCsv, CypherJson, CypherLine, QuinePatternCsv, QuinePatternJson, QuinePatternLine, } import com.thatdot.quine.util.SwitchMode /** Ingest source runtime that requires managing its own record delimitation -- for example, line-based ingests or CSV */ abstract class ContentDelimitedIngestSrcDef( initialSwitchMode: SwitchMode, format: ImportFormat, src: Source[ByteString, NotUsed], encodingString: String, parallelism: Int, startAtOffset: Long, ingestLimit: Option[Long], maxPerSecond: Option[Int], name: String, intoNamespace: NamespaceId, )(implicit graph: CypherOpsGraph) extends RawValuesIngestSrcDef(format, initialSwitchMode, parallelism, maxPerSecond, Seq(), name, intoNamespace) { val (charset, transcode) = IngestSrcDef.getTranscoder(encodingString) def bounded[A]: Flow[A, A, NotUsed] = ingestLimit match { case None => Flow[A].drop(startAtOffset) case Some(limit) => Flow[A].drop(startAtOffset).take(limit) } } /** Ingest source runtime that delimits its records by newline characters in the input stream */ abstract class LineDelimitedIngestSrcDef[A]( initialSwitchMode: SwitchMode, format: ImportFormat, src: Source[ByteString, NotUsed], encodingString: String, parallelism: Int, maximumLineSize: Int, startAtOffset: Long, ingestLimit: Option[Long], maxPerSecond: Option[Int], name: String, intoNamespace: NamespaceId, )(implicit graph: CypherOpsGraph) extends ContentDelimitedIngestSrcDef( initialSwitchMode, format, src, encodingString, parallelism, startAtOffset, ingestLimit, maxPerSecond, name, intoNamespace, ) { type InputType = ByteString val newLineDelimited: Flow[ByteString, ByteString, NotUsed] = Framing .delimiter(ByteString("\n"), maximumLineSize, allowTruncation = true) .map(line => if (!line.isEmpty && line.last == '\r') line.dropRight(1) else line) def rawBytes(value: ByteString): Array[Byte] = value.toArray } case class QuinePatternCsvIngestSrcDef( initialSwitchMode: SwitchMode, format: FileIngestFormat.QuinePatternCsv, src: Source[ByteString, NotUsed], encodingString: String, parallelism: Int, maximumLineSize: Int, startAtOffset: Long, ingestLimit: Option[Long], maxPerSecond: Option[Int], override val name: String, override val intoNamespace: NamespaceId, )(implicit val graph: CypherOpsGraph, val logConfig: LogConfig) extends ContentDelimitedIngestSrcDef( initialSwitchMode, new QuinePatternRawInputFormat(format.query, format.parameter), src, encodingString, parallelism, startAtOffset, ingestLimit, maxPerSecond, name, intoNamespace, ) { type InputType = List[ByteString] // csv row def source(): Source[List[ByteString], NotUsed] = src .via( CsvParsing.lineScanner(format.delimiter.byte, format.quoteChar.byte, format.escapeChar.byte, maximumLineSize), ) .via(bounded) def csvHeadersFlow(headerDef: Either[Boolean, List[String]]): Flow[List[ByteString], Value, NotUsed] = headerDef match { case Right(h) => CsvToMap .withHeaders(h: _*) .map(m => cypher.Expr.Map(m.view.mapValues(bs => cypher.Expr.Str(bs.decodeString(charset))))) case Left(true) => CsvToMap .toMap() .map(m => cypher.Expr.Map(m.view.mapValues(bs => cypher.Expr.Str(bs.decodeString(charset))))) case Left(false) => Flow[List[ByteString]] .map(l => cypher.Expr.List(l.map(bs => cypher.Expr.Str(bs.decodeString(charset))).toVector)) } override val deserializeAndMeter: Flow[List[ByteString], TryDeserialized, NotUsed] = Flow[List[ByteString]] // NB when using headers, the record count here will consider the header-defining row as a "record". Since Quine // metrics are only heuristic, this is an acceptable trade-off for simpler code. .wireTap(bs => meter.mark(bs.map(_.length).sum)) .via(csvHeadersFlow(format.headers)) // Here the empty list is a placeholder for the original // value in the TryDeserialized response value. Since this // is only used in errors and this is a success response, // it's not necessary to populate it. .map((t: Value) => (Success(t), Nil)) /** Define a way to extract raw bytes from a single input event */ def rawBytes(value: List[ByteString]): Array[Byte] = { // inefficient, but should never be used anyways since csv defines its own deserializeAndMeter logger.debug( safe"""${Safe(getClass.getSimpleName)}.rawBytes was called: this function has an inefficient |implementation but should not be accessible during normal operation.""".cleanLines, ) value.reduce { (l, r) => val bs = ByteString.createBuilder bs ++= l bs += format.delimiter.byte bs ++= r bs.result() }.toArray } } case class CsvIngestSrcDef( initialSwitchMode: SwitchMode, format: FileIngestFormat.CypherCsv, src: Source[ByteString, NotUsed], encodingString: String, parallelism: Int, maximumLineSize: Int, startAtOffset: Long, ingestLimit: Option[Long], maxPerSecond: Option[Int], override val name: String, override val intoNamespace: NamespaceId, )(implicit val graph: CypherOpsGraph, val logConfig: LogConfig) extends ContentDelimitedIngestSrcDef( initialSwitchMode, new CypherRawInputFormat(format.query, format.parameter), src, encodingString, parallelism, startAtOffset, ingestLimit, maxPerSecond, name, intoNamespace, ) { type InputType = List[ByteString] // csv row def source(): Source[List[ByteString], NotUsed] = src .via( CsvParsing.lineScanner(format.delimiter.byte, format.quoteChar.byte, format.escapeChar.byte, maximumLineSize), ) .via(bounded) def csvHeadersFlow(headerDef: Either[Boolean, List[String]]): Flow[List[ByteString], Value, NotUsed] = headerDef match { case Right(h) => CsvToMap .withHeaders(h: _*) .map(m => cypher.Expr.Map(m.view.mapValues(bs => cypher.Expr.Str(bs.decodeString(charset))))) case Left(true) => CsvToMap .toMap() .map(m => cypher.Expr.Map(m.view.mapValues(bs => cypher.Expr.Str(bs.decodeString(charset))))) case Left(false) => Flow[List[ByteString]] .map(l => cypher.Expr.List(l.map(bs => cypher.Expr.Str(bs.decodeString(charset))).toVector)) } override val deserializeAndMeter: Flow[List[ByteString], TryDeserialized, NotUsed] = Flow[List[ByteString]] // NB when using headers, the record count here will consider the header-defining row as a "record". Since Quine // metrics are only heuristic, this is an acceptable trade-off for simpler code. .wireTap(bs => meter.mark(bs.map(_.length).sum)) .via(csvHeadersFlow(format.headers)) // Here the empty list is a placeholder for the original // value in the TryDeserialized response value. Since this // is only used in errors and this is a success response, // it's not necessary to populate it. .map((t: Value) => (Success(t), Nil)) /** Define a way to extract raw bytes from a single input event */ def rawBytes(value: List[ByteString]): Array[Byte] = { // inefficient, but should never be used anyways since csv defines its own deserializeAndMeter logger.debug( safe"""${Safe(getClass.getSimpleName)}.rawBytes was called: this function has an inefficient |implementation but should not be accessible during normal operation.""".cleanLines, ) value.reduce { (l, r) => val bs = ByteString.createBuilder bs ++= l bs += format.delimiter.byte bs ++= r bs.result() }.toArray } } case class StringIngestSrcDef( initialSwitchMode: SwitchMode, format: CypherStringInputFormat, src: Source[ByteString, NotUsed], encodingString: String, parallelism: Int, maximumLineSize: Int, startAtOffset: Long, ingestLimit: Option[Long], maxPerSecond: Option[Int], override val name: String, override val intoNamespace: NamespaceId, )(implicit val graph: CypherOpsGraph, val logConfig: LogConfig) extends LineDelimitedIngestSrcDef[cypher.Value]( initialSwitchMode, format, src, encodingString, parallelism, maximumLineSize, startAtOffset, ingestLimit, maxPerSecond, name, intoNamespace, ) { def source(): Source[ByteString, NotUsed] = src .via(transcode) .via(newLineDelimited) .via(bounded) } case class QPStringIngestSrcDef( initialSwitchMode: SwitchMode, format: QuinePatternStringInputFormat, src: Source[ByteString, NotUsed], encodingString: String, parallelism: Int, maximumLineSize: Int, startAtOffset: Long, ingestLimit: Option[Long], maxPerSecond: Option[Int], override val name: String, override val intoNamespace: NamespaceId, )(implicit val graph: CypherOpsGraph, val logConfig: LogConfig) extends LineDelimitedIngestSrcDef( initialSwitchMode, format, src, encodingString, parallelism, maximumLineSize, startAtOffset, ingestLimit, maxPerSecond, name, intoNamespace, ) { def source(): Source[ByteString, NotUsed] = src .via(transcode) .via(newLineDelimited) .via(bounded) } case class JsonLinesIngestSrcDef( initialSwitchMode: SwitchMode, format: CypherJsonInputFormat, src: Source[ByteString, NotUsed], encodingString: String, parallelism: Int, maximumLineSize: Int, startAtOffset: Long, ingestLimit: Option[Long], maxPerSecond: Option[Int], override val name: String, override val intoNamespace: NamespaceId, )(implicit val graph: CypherOpsGraph, protected val logConfig: LogConfig) extends LineDelimitedIngestSrcDef( initialSwitchMode, format, src, encodingString, parallelism, maximumLineSize, startAtOffset, ingestLimit, maxPerSecond, name, intoNamespace, ) { def source(): Source[ByteString, NotUsed] = src .via(transcode) .via(newLineDelimited) .via(bounded) override def rawBytes(value: ByteString): Array[Byte] = value.toArray } case class QPJsonLinesIngestSrcDef( initialSwitchMode: SwitchMode, format: QuinePatternJsonInputFormat, src: Source[ByteString, NotUsed], encodingString: String, parallelism: Int, maximumLineSize: Int, startAtOffset: Long, ingestLimit: Option[Long], maxPerSecond: Option[Int], override val name: String, override val intoNamespace: NamespaceId, )(implicit val graph: CypherOpsGraph, val logConfig: LogConfig) extends LineDelimitedIngestSrcDef( initialSwitchMode, format, src, encodingString, parallelism, maximumLineSize, startAtOffset, ingestLimit, maxPerSecond, name, intoNamespace, ) { def source(): Source[ByteString, NotUsed] = src .via(transcode) .via(newLineDelimited) .via(bounded) override def rawBytes(value: ByteString): Array[Byte] = value.toArray } object ContentDelimitedIngestSrcDef { def apply[A]( initialSwitchMode: SwitchMode, format: FileIngestFormat, src: Source[ByteString, NotUsed], encodingString: String, parallelism: Int, maximumLineSize: Int, startAtOffset: Long, ingestLimit: Option[Long], maxPerSecond: Option[Int], name: String, intoNamespace: NamespaceId, )(implicit graph: CypherOpsGraph, logConfig: LogConfig): ContentDelimitedIngestSrcDef = format match { case CypherLine(query, parameter) => StringIngestSrcDef( initialSwitchMode, new CypherStringInputFormat(query, parameter, encodingString), src, encodingString, parallelism, maximumLineSize, startAtOffset, ingestLimit, maxPerSecond, name, intoNamespace, ) case QuinePatternLine(query, parameter) => QPStringIngestSrcDef( initialSwitchMode, new QuinePatternStringInputFormat(query, parameter, encodingString), src, encodingString, parallelism, maximumLineSize, startAtOffset, ingestLimit, maxPerSecond, name, intoNamespace, ) case CypherJson(query, parameter) => JsonLinesIngestSrcDef( initialSwitchMode, new CypherJsonInputFormat(query, parameter), src, encodingString, parallelism, maximumLineSize, startAtOffset, ingestLimit, maxPerSecond, name, intoNamespace, ) case QuinePatternJson(query, parameter) => QPJsonLinesIngestSrcDef( initialSwitchMode, new QuinePatternJsonInputFormat(query, parameter), src, encodingString, parallelism, maximumLineSize, startAtOffset, ingestLimit, maxPerSecond, name, intoNamespace, ) case cv @ CypherCsv(_, _, _, _, _, _) => CsvIngestSrcDef( initialSwitchMode, cv, src, encodingString, parallelism, maximumLineSize, startAtOffset, ingestLimit, maxPerSecond, name, intoNamespace, ) case qpcv @ QuinePatternCsv(_, _, _, _, _, _) => QuinePatternCsvIngestSrcDef( initialSwitchMode, qpcv, src, encodingString, parallelism, maximumLineSize, startAtOffset, ingestLimit, maxPerSecond, name, intoNamespace, ) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest/IngestSrcDef.scala ================================================ package com.thatdot.quine.app.model.ingest import java.nio.charset.{Charset, StandardCharsets} import scala.concurrent.duration.{Duration, DurationInt} import scala.concurrent.{Await, ExecutionContext, Future, Promise} import scala.util.{Failure, Success, Try} import org.apache.pekko.actor.ActorSystem import org.apache.pekko.stream.connectors.s3.scaladsl.S3 import org.apache.pekko.stream.connectors.s3.{ObjectMetadata, S3Attributes, S3Ext, S3Settings} import org.apache.pekko.stream.connectors.text.scaladsl.TextFlow import org.apache.pekko.stream.scaladsl.{Flow, Keep, RestartSource, Source, StreamConverters} import org.apache.pekko.stream.{KillSwitches, RestartSettings} import org.apache.pekko.util.ByteString import org.apache.pekko.{Done, NotUsed} import cats.data.ValidatedNel import cats.implicits.catsSyntaxValidatedId import com.codahale.metrics.Timer import org.apache.kafka.common.KafkaException import software.amazon.awssdk.core.exception.SdkException import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, Safe, SafeLoggableInterpolator} import com.thatdot.quine.app.config.FileAccessPolicy import com.thatdot.quine.app.model.ingest.serialization._ import com.thatdot.quine.app.model.ingest.util.AwsOps import com.thatdot.quine.app.model.ingest2.sources.FileSource import com.thatdot.quine.app.routes.{IngestMeter, IngestMetered} import com.thatdot.quine.app.{ControlSwitches, PekkoKillSwitch, QuineAppIngestControl, ShutdownSwitch} import com.thatdot.quine.graph.MasterStream.IngestSrcExecToken import com.thatdot.quine.graph.cypher.{Value => CypherValue} import com.thatdot.quine.graph.metrics.implicits.TimeFuture import com.thatdot.quine.graph.{CypherOpsGraph, NamespaceId} import com.thatdot.quine.routes._ import com.thatdot.quine.serialization.ProtobufSchemaCache import com.thatdot.quine.util.Log.implicits._ import com.thatdot.quine.util.StringInput.filenameOrUrl import com.thatdot.quine.util.{SwitchMode, Valve, ValveSwitch} /** This represents the minimum functionality that is used to insert values into a CypherOps graph. */ trait QuineIngestSource extends LazySafeLogging { val name: String implicit val graph: CypherOpsGraph private var ingestControl: Option[Future[QuineAppIngestControl]] = None private val controlPromise: Promise[QuineAppIngestControl] = Promise() val meter: IngestMeter /** Fully assembled stream with the following operations applied: * * - restart settings * - shutdown switch * - valve * - throttle * - write to graph * - ack */ def stream( intoNamespace: NamespaceId, registerTerminationHooks: Future[Done] => Unit, ): Source[IngestSrcExecToken, NotUsed] /** MaxPerSecond rate limiting. */ def throttle[A](graph: CypherOpsGraph, maximumPerSecond: Option[Int]): Flow[A, A, NotUsed] = maximumPerSecond match { case Some(perSec) => Flow[A].throttle(perSec, 1.second).via(graph.ingestThrottleFlow) case None => graph.ingestThrottleFlow } val restartSettings: RestartSettings = RestartSettings(minBackoff = 10.seconds, maxBackoff = 10.seconds, 2.0) .withMaxRestarts(3, 31.seconds) .withRestartOn { case _: KafkaException => true case _: SdkException => true case _ => false } /** Update the ingest's control handle and register termination hooks. This may be called multiple times if the * initial stream construction fails (up to the `restartSettings` defined above), and will be called from different * threads. */ protected def setControl( control: Future[QuineAppIngestControl], desiredSwitchMode: SwitchMode, registerTerminationHooks: Future[Done] => Unit, ): Unit = { val streamMaterializerEc = graph.materializer.executionContext // Ensure valve is opened if required and termination hooks are registered control.foreach(c => c.valveHandle .flip(desiredSwitchMode) .recover { case _: org.apache.pekko.stream.StreamDetachedException => false }(streamMaterializerEc), )(graph.nodeDispatcherEC) control.map(c => registerTerminationHooks(c.termSignal))(graph.nodeDispatcherEC) // Set the appropriate ref and deferred ingest control control.onComplete { result => val controlsSuccessfullyAttached = controlPromise.tryComplete(result) if (!controlsSuccessfullyAttached) { logger.warn( safe"""Ingest stream: ${Safe(name)} was materialized more than once. Control handles for pausing, |resuming, and terminating the stream may be unavailable (usually temporary).""".cleanLines, ) } }(streamMaterializerEc) // TODO not threadsafe ingestControl = Some(control) } def getControl: Future[QuineAppIngestControl] = ingestControl.getOrElse(controlPromise.future) } /** Definition of an ingest that performs the actions * sourceWithShutdown -> throttle -> writeToGraph -> ack * @see [[stream]] * * Because some libraries define a source as simply a flow of raw values, * and some (e.g. Kafka, Pulsar) define sources with other functionality * already applied (source of values and a control switch), there are 2 places * provided to extend with additional ingest types: * * [[IngestSrcDef]] builds a stream from sourceWithShutdown: Source[TryDeserialized, ShutdownSwitch] * This requires a source of deserialized values. The source is responsible for * defining metering, since that requires access to the original values. * * [[RawValuesIngestSrcDef]] builds from source of raw values: Source[InputType, NotUsed]. * That is, defined by a stream of uninterpreted inputs. The RawValues ingest * is responsible for defining how results will be deserialized from raw bytes. */ abstract class IngestSrcDef( format: ImportFormat, initialSwitchMode: SwitchMode, parallelism: Int, maxPerSecond: Option[Int], val name: String, val intoNamespace: NamespaceId, )(implicit graph: CypherOpsGraph) extends QuineIngestSource with LazySafeLogging { implicit protected def logConfig: LogConfig implicit val system: ActorSystem = graph.system val isSingleHost: Boolean = graph.isSingleHost /** The type of a single value to be ingested. Data sources will be defined * as suppliers of this type. */ type InputType /** A base type that is carried through streams that includes both the * (possibly) deserialized value as well as the original input. * The original input is carried through for later ack-ing or other * reference. */ type TryDeserialized = (Try[CypherValue], InputType) final val meter: IngestMeter = IngestMetered.ingestMeter(intoNamespace, name, graph.metrics) /** A source of deserialized values along with a control. Ingest types * that provide a source of raw types should extend [[RawValuesIngestSrcDef]] * instead of this class. */ def sourceWithShutdown(): Source[TryDeserialized, ShutdownSwitch] /** MaxPerSecond rate limiting. */ def throttle[B](): Flow[B, B, NotUsed] = throttle[B](graph, maxPerSecond) .via(graph.ingestThrottleFlow) /** Default no-op implementation */ val ack: Flow[TryDeserialized, Done, NotUsed] = Flow[TryDeserialized].map(_ => Done) /** Extend for by-instance naming (e.g. to include url) */ val ingestToken: IngestSrcExecToken = IngestSrcExecToken(name) /** Write successful values to the graph. */ protected def writeSuccessValues(intoNamespace: NamespaceId)(record: TryDeserialized): Future[TryDeserialized] = record match { case (Success(deserializedRecord), _) => graph.metrics .ingestQueryTimer(intoNamespace, name) .time( format .writeValueToGraph(graph, intoNamespace, deserializedRecord) .map(_ => record)(ExecutionContext.parasitic), ) case failedAttempt @ (Failure(deserializationError), sourceRecord @ _) => // TODO QU-1379 make this behavior configurable between "Log and keep consuming" vs // "halt the stream on corrupted records" // If stream should halt on error: // Future.failed(deserializationError) // If stream should log and keep consuming: logger.warn( log"""Ingest ${Safe(name)} in namespace ${Safe(intoNamespace)} |failed to deserialize ingested record: ${sourceRecord.toString} |""".cleanLines withException deserializationError, ) Future.successful(failedAttempt) } /** If the input value is properly deserialized, insert into the graph, otherwise * propagate the error. */ def writeToGraph(intoNamespace: NamespaceId): Flow[TryDeserialized, TryDeserialized, NotUsed] = Flow[TryDeserialized].mapAsyncUnordered(parallelism)(writeSuccessValues(intoNamespace)) /** Assembled stream definition. */ def stream( intoNamespace: NamespaceId, registerTerminationHooks: Future[Done] => Unit, ): Source[IngestSrcExecToken, NotUsed] = RestartSource.onFailuresWithBackoff(restartSettings) { () => sourceWithShutdown() .viaMat(Valve(initialSwitchMode))(Keep.both) .via(throttle(graph, maxPerSecond)) .via(writeToGraph(intoNamespace)) .via(ack) .map(_ => ingestToken) .watchTermination() { case ((a: ShutdownSwitch, b: Future[ValveSwitch]), c: Future[Done]) => b.map(v => ControlSwitches(a, v, c))(ExecutionContext.parasitic) } .mapMaterializedValue(c => setControl(c, initialSwitchMode, registerTerminationHooks)) .named(name) } } /** Define an ingest from the definition of a Source of InputType. */ abstract class RawValuesIngestSrcDef[A]( format: ImportFormat, initialSwitchMode: SwitchMode, parallelism: Int, maxPerSecond: Option[Int], decoders: Seq[ContentDecoder], name: String, intoNamespace: NamespaceId, )(implicit graph: CypherOpsGraph) extends IngestSrcDef(format, initialSwitchMode, parallelism, maxPerSecond, name, intoNamespace) { private val deserializationTimer: Timer = meter.unmanagedDeserializationTimer /** Try to deserialize a value of InputType into a CypherValue. This method * also meters the raw byte length of the input. */ val deserializeAndMeter: Flow[InputType, TryDeserialized, NotUsed] = Flow[InputType].map { input: InputType => val bytes = rawBytes(input) meter.mark(bytes.length) val decoded = ContentDecoder.decode(decoders, bytes) ( format.importMessageSafeBytes( decoded, graph.isSingleHost, deserializationTimer, ), input, ) } /** Define a way to extract raw bytes from a single input event */ def rawBytes(value: InputType): Array[Byte] /** Define a data source */ def source(): Source[InputType, NotUsed] /** Default value source is defined as a combination of the raw source and kill switch. * IngestSrcDef types that need to alter this behavior should extend [[IngestSrcDef]]. */ def sourceWithShutdown(): Source[TryDeserialized, ShutdownSwitch] = source() .viaMat(KillSwitches.single)(Keep.right) .mapMaterializedValue(ks => PekkoKillSwitch(ks)) .via(deserializeAndMeter) } object IngestSrcDef extends LazySafeLogging { private def importFormatFor( label: StreamedRecordFormat, )(implicit protobufSchemaCache: ProtobufSchemaCache, logConfig: LogConfig): ImportFormat = label match { case StreamedRecordFormat.QuinePatternJson(query, parameter) => new QuinePatternJsonInputFormat(query, parameter) case StreamedRecordFormat.CypherJson(query, parameter) => new CypherJsonInputFormat(query, parameter) case StreamedRecordFormat.CypherProtobuf(query, parameter, schemaUrl, typeName) => // this is a blocking call, but it should only actually block until the first time a type is successfully // loaded. This was left as blocking because lifting the effect to a broader context would mean either: // - making ingest startup async, which would require extensive changes to QuineApp, startup, and potentially // clustering protocols, OR // - making the decode bytes step of ingest async, which violates the Kafka API's expectation that a // `org.apache.kafka.common.serialization.Deserializer` is synchronous. val descriptor = Await.result( protobufSchemaCache.getMessageDescriptor(filenameOrUrl(schemaUrl), typeName, flushOnFail = true), Duration.Inf, ) new ProtobufInputFormat(query, parameter, new ProtobufParser(descriptor)) case StreamedRecordFormat.CypherRaw(query, parameter) => new CypherRawInputFormat(query, parameter) case StreamedRecordFormat.Drop => new TestOnlyDrop() } /* Identify by name the character set that should be assumed, along with a possible * transcoding flow needed to reach that encoding. Although we want to support all character * sets, this is quite difficult when our framing methods are designed to work over byte * sequences. Thankfully, for content-delimited formats, since we frame over only a small * number of delimiters, we can overfit to a small subset of very common encodings which: * * - share the same single-byte representation for these delimiter characters * - those single-byte representations can't occur anywhere else in the string's bytes * * For all other character sets, we first transcode to UTF-8. * * TODO: optimize ingest for other character sets (transcoding is not cheap) */ def getTranscoder(charsetName: String): (Charset, Flow[ByteString, ByteString, NotUsed]) = Charset.forName(charsetName) match { case userCharset @ (StandardCharsets.UTF_8 | StandardCharsets.ISO_8859_1 | StandardCharsets.US_ASCII) => userCharset -> Flow[ByteString] case otherCharset => logger.warn( safe"Charset-sensitive ingest does not directly support ${Safe(otherCharset)} - transcoding through UTF-8 first", ) StandardCharsets.UTF_8 -> TextFlow.transcoding(otherCharset, StandardCharsets.UTF_8) } def createIngestSrcDef( name: String, intoNamespace: NamespaceId, settings: IngestStreamConfiguration, initialSwitchMode: SwitchMode, fileAccessPolicy: FileAccessPolicy, )(implicit graph: CypherOpsGraph, protobufSchemaCache: ProtobufSchemaCache, logConfig: LogConfig, ): ValidatedNel[String, IngestSrcDef] = settings match { case KafkaIngest( format, topics, parallelism, bootstrapServers, groupId, securityProtocol, autoCommitIntervalMs, autoOffsetReset, kafkaProperties, endingOffset, maxPerSecond, recordEncodings, sslKeystorePassword, sslTruststorePassword, sslKeyPassword, saslJaasConfig, ) => KafkaSrcDef( name, intoNamespace, topics, bootstrapServers, groupId.getOrElse(name), importFormatFor(format), initialSwitchMode, parallelism, securityProtocol, autoCommitIntervalMs, autoOffsetReset, kafkaProperties, endingOffset, maxPerSecond, recordEncodings.map(ContentDecoder.apply), sslKeystorePassword, sslTruststorePassword, sslKeyPassword, saslJaasConfig, ) case KinesisIngest( format: StreamedRecordFormat, streamName, shardIds, parallelism, creds, region, iteratorType, numRetries, maxPerSecond, recordEncodings, ) => KinesisSrcDef( name, intoNamespace, streamName, shardIds, importFormatFor(format), initialSwitchMode, parallelism, creds, region, iteratorType, numRetries, maxPerSecond, recordEncodings.map(ContentDecoder.apply), ).valid case KinesisKCLIngest( format: StreamedRecordFormat, applicationName, kinesisStreamName: String, parallelism, creds, region, initialPosition, numRetries, maxPerSecond, recordEncodings, schedulerSourceSettings, checkpointSettings, advancedSettings, ) => KinesisKclSrcDef( name, intoNamespace, applicationName, kinesisStreamName, importFormatFor(format), initialSwitchMode, parallelism, creds, region, initialPosition, numRetries, maxPerSecond, recordEncodings.map(ContentDecoder.apply), schedulerSourceSettings, checkpointSettings, advancedSettings, ).valid case ServerSentEventsIngest(format, url, parallelism, maxPerSecond, recordEncodings) => ServerSentEventsSrcDef( name, intoNamespace, url, importFormatFor(format), initialSwitchMode, parallelism, maxPerSecond, recordEncodings.map(ContentDecoder.apply), ).valid case SQSIngest( format, queueURL, readParallelism, writeParallelism, credentialsOpt, regionOpt, deleteReadMessages, maxPerSecond, recordEncodings, ) => SqsStreamSrcDef( name, intoNamespace, queueURL, importFormatFor(format), initialSwitchMode, readParallelism, writeParallelism, credentialsOpt, regionOpt, deleteReadMessages, maxPerSecond, recordEncodings.map(ContentDecoder.apply), ).valid case WebsocketSimpleStartupIngest( format, wsUrl, initMessages, keepAliveProtocol, parallelism, encoding, ) => WebsocketSimpleStartupSrcDef( name, intoNamespace, importFormatFor(format), wsUrl, initMessages, keepAliveProtocol, parallelism, encoding, initialSwitchMode, ).valid case FileIngest( format, path, encodingString, parallelism, maximumLineSize, startAtOffset, ingestLimit, maxPerSecond, fileIngestMode, ) => FileSource .srcFromIngest(path, fileIngestMode, fileAccessPolicy) .leftMap(_.map(_.getMessage)) .andThen { validatedSource => ContentDelimitedIngestSrcDef .apply( initialSwitchMode, format, validatedSource, encodingString, parallelism, maximumLineSize, startAtOffset, ingestLimit, maxPerSecond, name, intoNamespace, ) .valid } case S3Ingest( format, bucketName, key, encoding, parallelism, credsOpt, maxLineSize, offset, ingestLimit, maxPerSecond, ) => val source: Source[ByteString, NotUsed] = { val downloadStream: Source[ByteString, Future[ObjectMetadata]] = credsOpt match { case None => S3.getObject(bucketName, key) case creds @ Some(_) => // TODO: See example: https://stackoverflow.com/questions/61938052/alpakka-s3-connection-issue val settings: S3Settings = S3Ext(graph.system).settings.withCredentialsProvider(AwsOps.staticCredentialsProvider(creds)) val attributes = S3Attributes.settings(settings) S3.getObject(bucketName, key).withAttributes(attributes) } downloadStream.mapMaterializedValue(_ => NotUsed) } ContentDelimitedIngestSrcDef( initialSwitchMode, format, source, encoding, parallelism, maxLineSize, offset, ingestLimit, maxPerSecond, name, intoNamespace, ).valid // TODO move what validations can be done ahead, ahead. case StandardInputIngest( format, encodingString, parallelism, maximumLineSize, maxPerSecond, ) => ContentDelimitedIngestSrcDef .apply( initialSwitchMode, format, StreamConverters.fromInputStream(() => System.in).mapMaterializedValue(_ => NotUsed), encodingString, parallelism, maximumLineSize, startAtOffset = 0L, ingestLimit = None, maxPerSecond, name, intoNamespace, ) .valid case NumberIteratorIngest(format, startAt, ingestLimit, throttlePerSecond, parallelism) => ContentDelimitedIngestSrcDef .apply( initialSwitchMode, format, Source.unfold(startAt)(l => Some(l + 1 -> ByteString(l.toString + "\n"))), StandardCharsets.UTF_8.name(), parallelism, 1000, 0, ingestLimit, throttlePerSecond, name, intoNamespace, ) .valid } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest/KafkaSrcDef.scala ================================================ package com.thatdot.quine.app.model.ingest import scala.concurrent.duration.{Duration, FiniteDuration, MILLISECONDS} import scala.util.Try import org.apache.pekko.kafka.scaladsl.{Committer, Consumer} import org.apache.pekko.kafka.{ CommitDelivery, CommitterSettings, ConsumerMessage, ConsumerSettings, Subscription, Subscriptions => KafkaSubscriptions, } import org.apache.pekko.stream.scaladsl.{Flow, Source} import org.apache.pekko.{Done, NotUsed} import cats.data.ValidatedNel import cats.implicits.catsSyntaxOption import com.codahale.metrics.Timer import org.apache.kafka.clients.CommonClientConfigs.SECURITY_PROTOCOL_CONFIG import org.apache.kafka.clients.consumer.ConsumerConfig.AUTO_OFFSET_RESET_CONFIG import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.serialization.{ByteArrayDeserializer, Deserializer} import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, Safe, SafeLoggableInterpolator} import com.thatdot.common.security.Secret import com.thatdot.quine.app.KafkaKillSwitch import com.thatdot.quine.app.model.ingest.serialization.{ContentDecoder, ImportFormat} import com.thatdot.quine.app.model.ingest.util.KafkaSettingsValidator import com.thatdot.quine.graph.cypher.Value import com.thatdot.quine.graph.{CypherOpsGraph, NamespaceId} import com.thatdot.quine.routes.{ KafkaAutoOffsetReset, KafkaIngest, KafkaOffsetCommitting, KafkaSecurityProtocol, SaslJaasConfig, } import com.thatdot.quine.util.SwitchMode object KafkaSrcDef extends LazySafeLogging { /** Stream values where we won't need to retain committable offset information */ type NoOffset = ConsumerRecord[Array[Byte], Try[Value]] /** Stream values where we'll retain committable offset information */ type WithOffset = ConsumerMessage.CommittableMessage[Array[Byte], Try[Value]] /** Log warnings for any kafkaProperties keys that will be overridden by typed Secret params. */ private def warnOnOverriddenProperties( kafkaProperties: KafkaIngest.KafkaProperties, sslKeystorePassword: Option[Secret], sslTruststorePassword: Option[Secret], sslKeyPassword: Option[Secret], saslJaasConfig: Option[SaslJaasConfig], ): Unit = { val typedSecretKeys: Set[String] = Set.empty ++ sslKeystorePassword.map(_ => "ssl.keystore.password") ++ sslTruststorePassword.map(_ => "ssl.truststore.password") ++ sslKeyPassword.map(_ => "ssl.key.password") ++ saslJaasConfig.map(_ => "sasl.jaas.config") val overriddenKeys = kafkaProperties.keySet.intersect(typedSecretKeys) overriddenKeys.foreach { key => logger.warn( safe"Kafka property '${Safe(key)}' in kafkaProperties will be overridden by typed Secret parameter. " + safe"Remove '${Safe(key)}' from kafkaProperties to suppress this warning.", ) } } /** Merge typed secret params into Kafka properties. Typed params take precedence. */ private def effectiveSecretProperties( sslKeystorePassword: Option[Secret], sslTruststorePassword: Option[Secret], sslKeyPassword: Option[Secret], saslJaasConfig: Option[SaslJaasConfig], ): Map[String, String] = { import Secret.Unsafe._ Map.empty ++ sslKeystorePassword.map("ssl.keystore.password" -> _.unsafeValue) ++ sslTruststorePassword.map("ssl.truststore.password" -> _.unsafeValue) ++ sslKeyPassword.map("ssl.key.password" -> _.unsafeValue) ++ saslJaasConfig.map("sasl.jaas.config" -> SaslJaasConfig.toJaasConfigString(_)) } private def buildConsumerSettings( format: ImportFormat, isSingleHost: Boolean, bootstrapServers: String, groupId: String, autoOffsetReset: KafkaAutoOffsetReset, kafkaProperties: KafkaIngest.KafkaProperties, securityProtocol: KafkaSecurityProtocol, sslKeystorePassword: Option[Secret], sslTruststorePassword: Option[Secret], sslKeyPassword: Option[Secret], saslJaasConfig: Option[SaslJaasConfig], decoders: Seq[ContentDecoder], deserializationTimer: Timer, )(implicit graph: CypherOpsGraph): ConsumerSettings[Array[Byte], Try[Value]] = { val deserializer: Deserializer[Try[Value]] = (_: String, data: Array[Byte]) => { format.importMessageSafeBytes(ContentDecoder.decode(decoders, data), isSingleHost, deserializationTimer) } val keyDeserializer: ByteArrayDeserializer = new ByteArrayDeserializer() //NO-OP warnOnOverriddenProperties( kafkaProperties, sslKeystorePassword, sslTruststorePassword, sslKeyPassword, saslJaasConfig, ) saslJaasConfig.foreach { config => logger.info(safe"Kafka SASL config: ${Safe(SaslJaasConfig.toRedactedString(config))}") } val secretProps = effectiveSecretProperties(sslKeystorePassword, sslTruststorePassword, sslKeyPassword, saslJaasConfig) // Create Map of kafka properties: combination of user passed properties from `kafkaProperties` // as well as those templated by `KafkaAutoOffsetReset` and `KafkaSecurityProtocol` // NOTE: This divergence between how kafka properties are set should be resolved, most likely by removing // `KafkaAutoOffsetReset`, `KafkaSecurityProtocol`, and `KafkaOffsetCommitting.AutoCommit` // in favor of `KafkaIngest.KafkaProperties`. Additionally, the current "template" properties override those in kafkaProperties val properties = kafkaProperties ++ secretProps ++ Map( AUTO_OFFSET_RESET_CONFIG -> autoOffsetReset.name, SECURITY_PROTOCOL_CONFIG -> securityProtocol.name, ) ConsumerSettings(graph.system, keyDeserializer, deserializer) .withBootstrapServers(bootstrapServers) .withGroupId(groupId) // Note: The ConsumerSettings stop-timeout delays stopping the Kafka Consumer // and the stream, but when using drainAndShutdown that delay is not required and can be set to zero (as below). // https://pekko.apache.org/docs/pekko-connectors-kafka/current/consumer.html#draining-control // We're calling .drainAndShutdown on the Kafka [[Consumer.Control]] .withStopTimeout(Duration.Zero) .withProperties(properties) } def apply( name: String, intoNamespace: NamespaceId, topics: Either[KafkaIngest.Topics, KafkaIngest.PartitionAssignments], bootstrapServers: String, groupId: String, format: ImportFormat, initialSwitchMode: SwitchMode, parallelism: Int = 2, securityProtocol: KafkaSecurityProtocol, offsetCommitting: Option[KafkaOffsetCommitting], autoOffsetReset: KafkaAutoOffsetReset, kafkaProperties: KafkaIngest.KafkaProperties, endingOffset: Option[Long], maxPerSecond: Option[Int], decoders: Seq[ContentDecoder], sslKeystorePassword: Option[Secret], sslTruststorePassword: Option[Secret], sslKeyPassword: Option[Secret], saslJaasConfig: Option[SaslJaasConfig], )(implicit graph: CypherOpsGraph, logConfig: LogConfig, ): ValidatedNel[KafkaSettingsValidator.ErrorString, IngestSrcDef] = { val isSingleHost: Boolean = graph.isSingleHost val subscription: Subscription = topics.fold( KafkaSubscriptions.topics, assignments => KafkaSubscriptions.assignment( ( for { (topic, partitions) <- assignments partition <- partitions } yield new TopicPartition(topic, partition) ).toSet, ), ) val consumerSettings: ConsumerSettings[Array[Byte], Try[Value]] = buildConsumerSettings( format, isSingleHost, bootstrapServers, groupId, autoOffsetReset, kafkaProperties, securityProtocol, sslKeystorePassword, sslTruststorePassword, sslKeyPassword, saslJaasConfig, decoders, graph.metrics.ingestDeserializationTimer(intoNamespace, name), ) val complaintsFromValidator: ValidatedNel[String, Unit] = KafkaSettingsValidator .validateInput(consumerSettings.properties, assumeConfigIsFinal = true) .toInvalid(()) complaintsFromValidator.map { _ => offsetCommitting match { case None => val consumer: Source[NoOffset, Consumer.Control] = Consumer.plainSource(consumerSettings, subscription) NonCommitting( name, intoNamespace, format, initialSwitchMode, parallelism, consumer, endingOffset, maxPerSecond, decoders, ) case Some(koc @ KafkaOffsetCommitting.ExplicitCommit(_, _, _, _)) => val consumer: Source[WithOffset, Consumer.Control] = Consumer.committableSource(consumerSettings, subscription) Committing( name, intoNamespace, format, initialSwitchMode, parallelism, consumer, endingOffset, maxPerSecond, koc, decoders, ) } } } /** Kafka type that does not ack offset information. */ case class NonCommitting( override val name: String, override val intoNamespace: NamespaceId, format: ImportFormat, initialSwitchMode: SwitchMode, parallelism: Int = 2, kafkaConsumer: Source[NoOffset, Consumer.Control], endingOffset: Option[Long], maxPerSecond: Option[Int], decoders: Seq[ContentDecoder], )(implicit val graph: CypherOpsGraph, val logConfig: LogConfig) extends IngestSrcDef( format, initialSwitchMode, parallelism, maxPerSecond, s"$name (Kafka ingest)", intoNamespace, ) { type InputType = NoOffset override def sourceWithShutdown(): Source[(Try[Value], NoOffset), KafkaKillSwitch] = endingOffset .fold(kafkaConsumer)(o => kafkaConsumer.takeWhile(r => r.offset() <= o)) .wireTap((o: NoOffset) => meter.mark(o.serializedValueSize())) .mapMaterializedValue(KafkaKillSwitch) .wireTap((o: NoOffset) => if (o.value() == null) { logger.info(log"Dropping empty value from Kafka ingest($name) with offset=${o.offset().toString}") }, ) // Empty value()'s can show up in kafka from a tombstone message, and kafka doesn't call the provided // deserializer instead forwarding a null instead of a Try[CypherValue] // We should handle this because downstream processing assumes that the value of `output` is of type Try // The choice we decided on was to drop such messages. .filter(_.value() != null) .map((o: NoOffset) => (o.value(), o)) } /** Kafka type with ack. */ case class Committing( override val name: String, override val intoNamespace: NamespaceId, format: ImportFormat, initialSwitchMode: SwitchMode, parallelism: Int = 2, kafkaConsumer: Source[WithOffset, Consumer.Control], endingOffset: Option[Long], maxPerSecond: Option[Int], koc: KafkaOffsetCommitting.ExplicitCommit, decoders: Seq[ContentDecoder], )(implicit val graph: CypherOpsGraph, val logConfig: LogConfig) extends IngestSrcDef( format, initialSwitchMode, parallelism, maxPerSecond, s"$name (Kafka ingest)", intoNamespace, ) { type InputType = WithOffset override def sourceWithShutdown(): Source[TryDeserialized, KafkaKillSwitch] = endingOffset .fold(kafkaConsumer)(o => kafkaConsumer.takeWhile(r => r.record.offset() <= o)) .wireTap((o: WithOffset) => meter.mark(o.record.serializedValueSize())) .mapMaterializedValue(KafkaKillSwitch) .wireTap((o: WithOffset) => if (o.record.value() == null) { logger.info(log"Dropping empty value from Kafka ingest($name) with offset=${o.record.offset().toString}") }, ) // Empty record.value()'s can show up in kafka from a tombstone message, and kafka doesn't call the provided // deserializer instead forwarding a null instead of a Try[CypherValue] // We should handle this because downstream processing assumes that the value of `output` is of type Try // The choice we decided on was to drop such messages. .filter(_.record.value() != null) .map((o: WithOffset) => (o.record.value(), o)) /** For ack-ing source override the default mapAsyncUnordered behavior. */ override def writeToGraph(intoNamespace: NamespaceId): Flow[TryDeserialized, TryDeserialized, NotUsed] = Flow[TryDeserialized].mapAsync(parallelism)(writeSuccessValues(intoNamespace)) override val ack: Flow[TryDeserialized, Done, NotUsed] = { val committer: Flow[ConsumerMessage.Committable, ConsumerMessage.CommittableOffsetBatch, NotUsed] = Committer .batchFlow( CommitterSettings(system) .withMaxBatch(koc.maxBatch) .withMaxInterval(FiniteDuration(koc.maxIntervalMillis.toLong, MILLISECONDS)) .withParallelism(koc.parallelism) .withDelivery( if (koc.waitForCommitConfirmation) CommitDelivery.WaitForAck else CommitDelivery.SendAndForget, ), ) // Note - In cases where we are in ExplicitCommit mode with CommitDelivery.WaitForAck _and_ there is an // endingOffset set, we will get an org.apache.pekko.kafka.CommitTimeoutException here, since the commit delivery // is batched and it's possible to have remaining commit offsets remaining that don't get sent. // // e.g. partition holds 1000 values, we set koc.maxBatch=100, and endingOffset to 150. Last ack sent will // be 100, last 50 will not be sent. Flow[TryDeserialized] .map(_._2.committableOffset) .via(committer) .map(_ => Done) } } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest/KinesisKclSrcDef.scala ================================================ package com.thatdot.quine.app.model.ingest import java.net.InetAddress import java.nio.ByteBuffer import java.util.{Calendar, Optional, UUID} import scala.concurrent.duration._ import scala.jdk.CollectionConverters._ import scala.util.Try import org.apache.pekko.stream.connectors.kinesis.scaladsl.KinesisSchedulerSource import org.apache.pekko.stream.connectors.kinesis.{ CommittableRecord, KinesisSchedulerCheckpointSettings, KinesisSchedulerSourceSettings, } import org.apache.pekko.stream.scaladsl.{Flow, Source} import org.apache.pekko.{Done, NotUsed} import software.amazon.awssdk.awscore.retry.AwsRetryStrategy import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration import software.amazon.awssdk.http.async.SdkAsyncHttpClient import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient import software.amazon.awssdk.retries.StandardRetryStrategy import software.amazon.awssdk.services.cloudwatch.CloudWatchAsyncClient import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient import software.amazon.awssdk.services.dynamodb.model.BillingMode import software.amazon.awssdk.services.kinesis.KinesisAsyncClient import software.amazon.kinesis.common.{ConfigsBuilder, InitialPositionInStream, InitialPositionInStreamExtended} import software.amazon.kinesis.coordinator.CoordinatorConfig.ClientVersionConfig import software.amazon.kinesis.coordinator.Scheduler import software.amazon.kinesis.leases.{NoOpShardPrioritization, ParentsFirstShardPrioritization} import software.amazon.kinesis.metrics.MetricsLevel import software.amazon.kinesis.processor.{ShardRecordProcessorFactory, SingleStreamTracker} import software.amazon.kinesis.retrieval.fanout.FanOutConfig import software.amazon.kinesis.retrieval.polling.PollingConfig import com.thatdot.common.logging.Log.LogConfig import com.thatdot.quine.app.model.ingest.serialization.{ContentDecoder, ImportFormat} import com.thatdot.quine.app.model.ingest.util.AwsOps import com.thatdot.quine.app.model.ingest.util.AwsOps.AwsBuilderOps import com.thatdot.quine.graph.MasterStream.IngestSrcExecToken import com.thatdot.quine.graph.cypher.Value import com.thatdot.quine.graph.{CypherOpsGraph, NamespaceId} import com.thatdot.quine.routes.KinesisIngest.RetrievalSpecificConfig import com.thatdot.quine.routes.{AwsCredentials, AwsRegion, KinesisIngest} import com.thatdot.quine.util.SwitchMode /** The definition of a source stream from Amazon Kinesis using the Kinesis Client Library (KCL). * * @param name The unique, human-facing name of the ingest stream * @param intoNamespace The namespace (database) into which the data is ingested * @param applicationName The name of the application as seen by KCL and its accompanying DynamoDB instance * @param streamName The Kinesis stream name * @param format The [[ImportFormat]] describing how to parse bytes read from Kinesis * @param initialSwitchMode The initial mode that controls whether ingestion is active or paused * @param parallelism How many concurrent writes should be performed on the database * @param credentialsOpt The AWS credentials to access the stream (if None, default credentials are used) * @param regionOpt The AWS region in which the stream resides (if None, default region is used) * @param initialPosition The initial position from which KCL will consume from a Kinesis stream (e.g., LATEST, TRIM_HORIZON) * @param numRetries How many times to retry on ingest failures * @param maxPerSecond Optional rate limit (records per second). If None, no explicit rate limit is applied * @param decoders A sequence of [[ContentDecoder]] instances for transforming the ingested data * @param checkpointSettings Settings controlling how checkpoints are managed for this stream */ final case class KinesisKclSrcDef( override val name: String, override val intoNamespace: NamespaceId, applicationName: String, streamName: String, format: ImportFormat, initialSwitchMode: SwitchMode, parallelism: Int = 2, credentialsOpt: Option[AwsCredentials], regionOpt: Option[AwsRegion], initialPosition: KinesisIngest.InitialPosition, numRetries: Int, maxPerSecond: Option[Int], decoders: Seq[ContentDecoder], schedulerSettings: Option[KinesisIngest.KinesisSchedulerSourceSettings], checkpointSettings: Option[KinesisIngest.KinesisCheckpointSettings], advancedSettings: Option[KinesisIngest.KCLConfiguration], )(implicit val graph: CypherOpsGraph, protected val logConfig: LogConfig) extends RawValuesIngestSrcDef( format, initialSwitchMode, parallelism, maxPerSecond, decoders, s"$name (Kinesis ingest)", intoNamespace, ) { import KinesisKclSrcDef._ type InputType = CommittableRecord override val ingestToken: IngestSrcExecToken = IngestSrcExecToken(format.label) def rawBytes(record: CommittableRecord): Array[Byte] = recordBufferToArray(record.record.data()) def source(): Source[CommittableRecord, NotUsed] = { val httpClient = buildAsyncHttpClient val kinesisClient = buildAsyncClient(buildAsyncHttpClient, credentialsOpt, regionOpt, numRetries) val dynamoClient: DynamoDbAsyncClient = DynamoDbAsyncClient.builder .credentials(credentialsOpt) .httpClient(httpClient) .region(regionOpt) .build val cloudWatchClient: CloudWatchAsyncClient = CloudWatchAsyncClient.builder .credentials(credentialsOpt) .httpClient(httpClient) .region(regionOpt) .build Seq(kinesisClient, dynamoClient, cloudWatchClient).foreach { client => graph.system.registerOnTermination(client.close()) } val schedulerSourceSettings: KinesisSchedulerSourceSettings = schedulerSettings .map { apiKinesisSchedulerSourceSettings => val base = KinesisSchedulerSourceSettings.defaults val withSize = apiKinesisSchedulerSourceSettings.bufferSize.fold(base)(base.withBufferSize) val withSizeAndTimeout = apiKinesisSchedulerSourceSettings.backpressureTimeoutMillis.fold(withSize) { t => withSize.withBackpressureTimeout(java.time.Duration.ofMillis(t)) } withSizeAndTimeout } .getOrElse(KinesisSchedulerSourceSettings.defaults) val builder: ShardRecordProcessorFactory => Scheduler = recordProcessorFactory => { // Configuration settings point to set the initial stream position used below in the Scheduler val initialPositionInStream: InitialPositionInStreamExtended = initialPosition match { case KinesisIngest.InitialPosition.Latest => InitialPositionInStreamExtended.newInitialPosition(InitialPositionInStream.LATEST) case KinesisIngest.InitialPosition.TrimHorizon => InitialPositionInStreamExtended.newInitialPosition(InitialPositionInStream.TRIM_HORIZON) case KinesisIngest.InitialPosition.AtTimestamp(year, month, dayOfMonth, hour, minute, second) => val cal = Calendar.getInstance() cal.set(year, month - 1, dayOfMonth, hour, minute, second) InitialPositionInStreamExtended.newInitialPositionAtTimestamp(cal.getTime) case _ => throw new IllegalArgumentException( s"Only Latest, TrimHorizon, and AtTimestamp are valid Iterator Types when using the KCL version of Kinesis", ) // will be caught as an "Invalid" (400) below } val streamTracker = new SingleStreamTracker(streamName, initialPositionInStream) val workerId = advancedSettings .flatMap(_.configsBuilder.flatMap(_.workerIdentifier)) .getOrElse(s"${InetAddress.getLocalHost.getHostName}:${UUID.randomUUID()}") val configsBuilder = new ConfigsBuilder( streamTracker, applicationName, kinesisClient, dynamoClient, cloudWatchClient, workerId, recordProcessorFactory, ) // `ConfigsBuilder#tableName` may only be set after construction, but we // need to do it before the rest of the `advancedSettings` traversal advancedSettings.foreach(_.configsBuilder.foreach(_.tableName.foreach(configsBuilder.tableName))) val leaseManagementConfig = configsBuilder.leaseManagementConfig // This should be covered by `streamTracker`, but this is to be safe since we're // not providing an override in the abbreviated `LeaseManagementConfig` API schema .initialPositionInStream(initialPositionInStream) val processorConfig = configsBuilder.processorConfig val coordinatorConfig = configsBuilder.coordinatorConfig val lifecycleConfig = configsBuilder.lifecycleConfig val retrievalConfig = configsBuilder.retrievalConfig val metricsConfig = configsBuilder.metricsConfig advancedSettings.foreach { apiKclConfig => apiKclConfig.leaseManagementConfig.foreach { apiLeaseConfig => apiLeaseConfig.failoverTimeMillis.foreach(leaseManagementConfig.failoverTimeMillis) apiLeaseConfig.shardSyncIntervalMillis.foreach(leaseManagementConfig.shardSyncIntervalMillis) apiLeaseConfig.cleanupLeasesUponShardCompletion.foreach( leaseManagementConfig.cleanupLeasesUponShardCompletion, ) apiLeaseConfig.ignoreUnexpectedChildShards.foreach(leaseManagementConfig.ignoreUnexpectedChildShards) apiLeaseConfig.maxLeasesForWorker.foreach(leaseManagementConfig.maxLeasesForWorker) apiLeaseConfig.maxLeaseRenewalThreads.foreach(value => leaseManagementConfig.maxLeaseRenewalThreads(value)) apiLeaseConfig.billingMode.foreach { case KinesisIngest.BillingMode.PROVISIONED => leaseManagementConfig.billingMode(BillingMode.PROVISIONED) case KinesisIngest.BillingMode.PAY_PER_REQUEST => leaseManagementConfig.billingMode(BillingMode.PAY_PER_REQUEST) case KinesisIngest.BillingMode.UNKNOWN_TO_SDK_VERSION => leaseManagementConfig.billingMode(BillingMode.UNKNOWN_TO_SDK_VERSION) } apiLeaseConfig.initialLeaseTableReadCapacity.foreach(leaseManagementConfig.initialLeaseTableReadCapacity) apiLeaseConfig.initialLeaseTableWriteCapacity.foreach(leaseManagementConfig.initialLeaseTableWriteCapacity) // Begin setting workerUtilizationAwareAssignmentConfig val workerUtilizationAwareAssignmentConfig = leaseManagementConfig.workerUtilizationAwareAssignmentConfig() apiLeaseConfig.reBalanceThresholdPercentage.foreach( workerUtilizationAwareAssignmentConfig.reBalanceThresholdPercentage, ) apiLeaseConfig.dampeningPercentage.foreach(workerUtilizationAwareAssignmentConfig.dampeningPercentage) apiLeaseConfig.allowThroughputOvershoot.foreach( workerUtilizationAwareAssignmentConfig.allowThroughputOvershoot, ) apiLeaseConfig.disableWorkerMetrics.foreach(workerUtilizationAwareAssignmentConfig.disableWorkerMetrics) apiLeaseConfig.maxThroughputPerHostKBps.foreach( workerUtilizationAwareAssignmentConfig.maxThroughputPerHostKBps, ) // Finalize setting workerUtilizationAwareAssignmentConfig by updating its value in the leaseManagementConfig leaseManagementConfig.workerUtilizationAwareAssignmentConfig(workerUtilizationAwareAssignmentConfig) val gracefulLeaseHandoffConfig = leaseManagementConfig.gracefulLeaseHandoffConfig() apiLeaseConfig.isGracefulLeaseHandoffEnabled.foreach( gracefulLeaseHandoffConfig.isGracefulLeaseHandoffEnabled, ) apiLeaseConfig.gracefulLeaseHandoffTimeoutMillis.foreach( gracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis, ) leaseManagementConfig.gracefulLeaseHandoffConfig(gracefulLeaseHandoffConfig) } apiKclConfig.retrievalSpecificConfig .map { case RetrievalSpecificConfig.FanOutConfig( consumerArn, consumerName, maxDescribeStreamSummaryRetries, maxDescribeStreamConsumerRetries, registerStreamConsumerRetries, retryBackoffMillis, ) => val fanOutConfig = new FanOutConfig(kinesisClient) fanOutConfig.streamName(streamName) consumerArn.foreach(fanOutConfig.consumerArn) consumerName.foreach(fanOutConfig.consumerName) maxDescribeStreamSummaryRetries.foreach(fanOutConfig.maxDescribeStreamSummaryRetries) maxDescribeStreamConsumerRetries.foreach(fanOutConfig.maxDescribeStreamConsumerRetries) registerStreamConsumerRetries.foreach(fanOutConfig.registerStreamConsumerRetries) retryBackoffMillis.foreach(fanOutConfig.retryBackoffMillis) fanOutConfig case RetrievalSpecificConfig.PollingConfig( maxRecords, retryGetRecordsInSeconds, maxGetRecordsThreadPool, idleTimeBetweenReadsInMillis, ) => val pollingConfig = new PollingConfig(streamName, kinesisClient) maxRecords.foreach(pollingConfig.maxRecords) // It's tempting to always set the config value for Optional types, using RichOption or some such, // but we really only want to set something other than the library default if one is provided via the API maxGetRecordsThreadPool.foreach(value => pollingConfig.maxGetRecordsThreadPool(Optional.of(value))) retryGetRecordsInSeconds.foreach(value => pollingConfig.retryGetRecordsInSeconds(Optional.of(value))) idleTimeBetweenReadsInMillis.foreach(pollingConfig.idleTimeBetweenReadsInMillis) pollingConfig } .foreach(retrievalConfig.retrievalSpecificConfig) apiKclConfig.processorConfig.foreach { apiProcessorConfig => apiProcessorConfig.callProcessRecordsEvenForEmptyRecordList.foreach( processorConfig.callProcessRecordsEvenForEmptyRecordList, ) } apiKclConfig.coordinatorConfig.foreach { apiCoordinatorConfig => apiCoordinatorConfig.parentShardPollIntervalMillis.foreach(coordinatorConfig.parentShardPollIntervalMillis) apiCoordinatorConfig.skipShardSyncAtWorkerInitializationIfLeasesExist.foreach( coordinatorConfig.skipShardSyncAtWorkerInitializationIfLeasesExist, ) apiCoordinatorConfig.shardPrioritization.foreach { case KinesisIngest.ShardPrioritization.ParentsFirstShardPrioritization(maxDepth) => coordinatorConfig.shardPrioritization(new ParentsFirstShardPrioritization(maxDepth)) case KinesisIngest.ShardPrioritization.NoOpShardPrioritization => coordinatorConfig.shardPrioritization(new NoOpShardPrioritization()) } apiCoordinatorConfig.clientVersionConfig.foreach { case KinesisIngest.ClientVersionConfig.CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X => coordinatorConfig.clientVersionConfig(ClientVersionConfig.CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X) case KinesisIngest.ClientVersionConfig.CLIENT_VERSION_CONFIG_3X => coordinatorConfig.clientVersionConfig(ClientVersionConfig.CLIENT_VERSION_CONFIG_3X) } } apiKclConfig.lifecycleConfig.foreach { apiLifecycleConfig => apiLifecycleConfig.taskBackoffTimeMillis.foreach(lifecycleConfig.taskBackoffTimeMillis) // It's tempting to always set the config value for Optional types, using RichOption or some such, // but we really only want to set something other than the library default if one is provided via the API apiLifecycleConfig.logWarningForTaskAfterMillis.foreach(value => lifecycleConfig.logWarningForTaskAfterMillis(Optional.of(value)), ) } apiKclConfig.retrievalConfig.foreach { apiRetrievalConfig => apiRetrievalConfig.listShardsBackoffTimeInMillis.foreach(retrievalConfig.listShardsBackoffTimeInMillis) apiRetrievalConfig.maxListShardsRetryAttempts.foreach(retrievalConfig.maxListShardsRetryAttempts) } apiKclConfig.metricsConfig.foreach { apiMetricsConfig => apiMetricsConfig.metricsBufferTimeMillis.foreach(metricsConfig.metricsBufferTimeMillis) apiMetricsConfig.metricsMaxQueueSize.foreach(metricsConfig.metricsMaxQueueSize) apiMetricsConfig.metricsLevel.foreach { case KinesisIngest.MetricsLevel.NONE => metricsConfig.metricsLevel(MetricsLevel.NONE) case KinesisIngest.MetricsLevel.SUMMARY => metricsConfig.metricsLevel(MetricsLevel.SUMMARY) case KinesisIngest.MetricsLevel.DETAILED => metricsConfig.metricsLevel(MetricsLevel.DETAILED) } apiMetricsConfig.metricsEnabledDimensions.foreach(values => metricsConfig.metricsEnabledDimensions(new java.util.HashSet(values.map(_.value).asJava)), ) } } // Note: Currently, this config is the only one built within the configs builder // that is not affected by the `advancedSettings` traversal above. That makes // sense because we also have `checkpointSettings` at the same level, but the // reasons that we don't build a `checkpointConfig` from that parameter are: // 1. Those settings are used for `KinesisSchedulerCheckpointSettings` in the // `ack` flow, and that purpose is distinct from this checkpoint config's // purpose, so we probably don't want to re-use those values for discrete // things. // 2. At a glance, the only way to build a checkpoint config other than the // parameterless default one built within the configs builder at this // accessor is to build a `DynamoDBCheckpointer` via its factory, and that // is no small task. val checkpointConfig = configsBuilder.checkpointConfig new Scheduler( checkpointConfig, coordinatorConfig, leaseManagementConfig, lifecycleConfig, metricsConfig, processorConfig, retrievalConfig, ) } val source = KinesisSchedulerSource(builder, schedulerSourceSettings) source.mapMaterializedValue(_ => NotUsed) } override val ack: Flow[(Try[Value], CommittableRecord), Done, NotUsed] = { val defaultSettings: KinesisSchedulerCheckpointSettings = KinesisSchedulerCheckpointSettings.defaults checkpointSettings .map { case apiSettings if !apiSettings.disableCheckpointing => KinesisSchedulerCheckpointSettings .apply( apiSettings.maxBatchSize.getOrElse(defaultSettings.maxBatchSize), apiSettings.maxBatchWaitMillis.map(Duration(_, MILLISECONDS)).getOrElse(defaultSettings.maxBatchWait), ) case _ => defaultSettings } .map( KinesisSchedulerSource .checkpointRecordsFlow(_) .contramap[(Try[Value], CommittableRecord)]({ case (_, cr) => cr }) .map(_ => Done), ) .getOrElse(Flow[(Try[Value], CommittableRecord)].map(_ => Done)) } } object KinesisKclSrcDef { /** Converts the supplied [[ByteBuffer]] to an `Array[Byte]`. * A new byte array is allocated and populated by reading from a duplication of the buffer. * * @param data The [[ByteBuffer]] to convert * @return A corresponding array of bytes */ private def recordBufferToArray(data: ByteBuffer): Array[Byte] = { // Duplicate in case something else was using the position information val duplicateBuffer = data.duplicate() val bytes = new Array[Byte](duplicateBuffer.remaining()) duplicateBuffer.get(bytes) bytes } def buildAsyncHttpClient: SdkAsyncHttpClient = NettyNioAsyncHttpClient.builder.maxConcurrency(AwsOps.httpConcurrencyPerClient).build() def buildAsyncClient( httpClient: SdkAsyncHttpClient, credentialsOpt: Option[AwsCredentials], regionOpt: Option[AwsRegion], numRetries: Int, ): KinesisAsyncClient = { val retryStrategy: StandardRetryStrategy = AwsRetryStrategy .standardRetryStrategy() .toBuilder .maxAttempts(numRetries) .build() val builder = KinesisAsyncClient .builder() .credentials(credentialsOpt) .region(regionOpt) .httpClient(httpClient) .overrideConfiguration( ClientOverrideConfiguration .builder() .retryStrategy(retryStrategy) .build(), ) builder.build } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest/KinesisSrcDef.scala ================================================ package com.thatdot.quine.app.model.ingest import java.time.Instant import scala.collection.Set import scala.concurrent.Future import scala.concurrent.duration.DurationInt import scala.jdk.CollectionConverters._ import scala.jdk.FutureConverters.CompletionStageOps import org.apache.pekko.NotUsed import org.apache.pekko.stream.connectors.kinesis.ShardIterator._ import org.apache.pekko.stream.connectors.kinesis.ShardSettings import org.apache.pekko.stream.connectors.kinesis.scaladsl.KinesisSource import org.apache.pekko.stream.scaladsl.{Flow, Source} import software.amazon.awssdk.awscore.retry.AwsRetryStrategy import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration import software.amazon.awssdk.http.async.SdkAsyncHttpClient import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient import software.amazon.awssdk.retries.StandardRetryStrategy import software.amazon.awssdk.services.kinesis.model.DescribeStreamRequest import software.amazon.awssdk.services.kinesis.{KinesisAsyncClient, model => kinesisModel} import com.thatdot.common.logging.Log.LogConfig import com.thatdot.quine.app.model.ingest.serialization.{ContentDecoder, ImportFormat} import com.thatdot.quine.app.model.ingest.util.AwsOps import com.thatdot.quine.app.model.ingest.util.AwsOps.AwsBuilderOps import com.thatdot.quine.graph.MasterStream.IngestSrcExecToken import com.thatdot.quine.graph.{CypherOpsGraph, NamespaceId} import com.thatdot.quine.routes.{AwsCredentials, AwsRegion, KinesisIngest} import com.thatdot.quine.util.SwitchMode /** The definition of a source stream from Amazon Kinesis * * @param name The unique, human-facing name of the ingest stream * @param streamName The Kinesis stream name * @param shardIds The Kinesis shard IDs, or Set.empty to use all shards in the stream. Each probably start "shardId-" Note that this [[KinesisSrcDef]] * will be invalidated if the stream rescales * @param format The [[ImportFormat]] to use to ingest bytes from Kinesis * @param parallelism How many concurrent writes should be performed on the database * @param credentialsOpt The AWS credentials to access the stream */ final case class KinesisSrcDef( override val name: String, override val intoNamespace: NamespaceId, streamName: String, shardIds: Option[Set[String]], format: ImportFormat, initialSwitchMode: SwitchMode, parallelism: Int = 2, credentialsOpt: Option[AwsCredentials], regionOpt: Option[AwsRegion], iteratorType: KinesisIngest.IteratorType, numRetries: Int, maxPerSecond: Option[Int], decoders: Seq[ContentDecoder], )(implicit val graph: CypherOpsGraph, protected val logConfig: LogConfig) extends RawValuesIngestSrcDef( format, initialSwitchMode, parallelism, maxPerSecond, decoders, s"$name (Kinesis ingest)", intoNamespace, ) { type InputType = kinesisModel.Record override val ingestToken: IngestSrcExecToken = IngestSrcExecToken(format.label) def rawBytes(record: kinesisModel.Record): Array[Byte] = record.data().asByteArrayUnsafe() def source(): Source[kinesisModel.Record, NotUsed] = { import KinesisIngest.IteratorType val shardIterator = iteratorType match { case IteratorType.Latest => Latest case IteratorType.TrimHorizon => TrimHorizon case IteratorType.AtTimestamp(ms) => AtTimestamp(Instant.ofEpochMilli(ms)) case IteratorType.AtSequenceNumber(_) | IteratorType.AfterSequenceNumber(_) if shardIds.fold(true)(_.size != 1) => throw new IllegalArgumentException( "To use AtSequenceNumber or AfterSequenceNumber, exactly 1 shard must be specified", ) // will be caught as an "Invalid" (400) below case IteratorType.AtSequenceNumber(seqNo) => AtSequenceNumber(seqNo) case IteratorType.AfterSequenceNumber(seqNo) => AfterSequenceNumber(seqNo) } val kinesisClient = KinesisSrcDef.buildAsyncClient(credentialsOpt, regionOpt, numRetries) graph.system.registerOnTermination(kinesisClient.close()) // a Future yielding the shard IDs to read from val shardSettingsFut: Future[List[ShardSettings]] = (shardIds.getOrElse(Set()) match { case noIds if noIds.isEmpty => kinesisClient .describeStream( DescribeStreamRequest.builder().streamName(streamName).build(), ) .asScala .map(response => response .streamDescription() .shards() .asScala .map(_.shardId()) .toSet, )(graph.materializer.executionContext) case atLeastOneId => Future.successful(atLeastOneId) }) .map(ids => ids .map(shardId => ShardSettings(streamName, shardId).withShardIterator(shardIterator)) .toList, )(graph.materializer.executionContext) // A Flow that limits the stream to 2MB * (number of shards) per second // TODO This is an imperfect heuristic, as the limit imposed is literally 2MB _per shard_, // not 2MB per shard "on average across all shards". val kinesisRateLimiter: Flow[kinesisModel.Record, kinesisModel.Record, NotUsed] = Flow .futureFlow( shardSettingsFut.map { shards => val kinesisShardCount = shards.length // there are a maximum of 500 shards per stream val throttleBytesPerSecond = kinesisShardCount * 2 * 1024 * 1024 Flow[kinesisModel.Record] .throttle( throttleBytesPerSecond, 1.second, rec => // asByteArrayUnsafe avoids extra allocations, to get the length we can't use a readonly bytebuffer rec.data().asByteArrayUnsafe().length, ) }(graph.materializer.executionContext), ) .mapMaterializedValue(_ => NotUsed) Source .future(shardSettingsFut) .flatMapConcat(shardSettings => KinesisSource .basicMerge(shardSettings, kinesisClient), ) .via(kinesisRateLimiter) } } object KinesisSrcDef { def buildAsyncHttpClient: SdkAsyncHttpClient = NettyNioAsyncHttpClient.builder.maxConcurrency(AwsOps.httpConcurrencyPerClient).build() def buildAsyncClient( credentialsOpt: Option[AwsCredentials], regionOpt: Option[AwsRegion], numRetries: Int, ): KinesisAsyncClient = { val retryStrategy: StandardRetryStrategy = AwsRetryStrategy .standardRetryStrategy() .toBuilder .maxAttempts(numRetries) .build(); val builder = KinesisAsyncClient .builder() .credentials(credentialsOpt) .region(regionOpt) .httpClient(buildAsyncHttpClient) .overrideConfiguration( ClientOverrideConfiguration .builder() .retryStrategy(retryStrategy) .build(), ) builder.build } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest/NamedPipeSource.scala ================================================ package com.thatdot.quine.app.model.ingest import java.nio.ByteBuffer import java.nio.channels.FileChannel import java.nio.file._ import scala.concurrent.ExecutionContext import scala.concurrent.duration.{DurationInt, FiniteDuration} import scala.util.{Failure, Success, Try} import org.apache.pekko.NotUsed import org.apache.pekko.stream.scaladsl.{FileIO, Source} import org.apache.pekko.stream.stage._ import org.apache.pekko.stream.{Attributes, Outlet, SourceShape} import org.apache.pekko.util.ByteString import jnr.posix.POSIXFactory import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, Safe, SafeLoggableInterpolator} import com.thatdot.quine.routes.FileIngestMode import com.thatdot.quine.routes.FileIngestMode.NamedPipe import com.thatdot.quine.util.Log.implicits._ import com.thatdot.quine.util.QuineDispatchers object NamedPipeSource extends LazySafeLogging { def fromPath( path: Path, chunkSize: Int = 8192, pollInterval: FiniteDuration = 1.second, ): Source[ByteString, NotUsed] = Source .fromGraph(new NamedPipeSource(path, chunkSize, pollInterval)) .withAttributes(attributes) /** Factory for building a regular file source or a named path source from a file path. * @param path Path of file or named pipe * @param fileIngestMode If defined, explicitly determines if a regular file source or a named path sources should be used (otherwise the file status is auto detected) */ def fileOrNamedPipeSource( path: Path, fileIngestMode: Option[FileIngestMode], )(implicit logConfig: LogConfig): Source[ByteString, NotUsed] = { val isNamedPipe = fileIngestMode map (_ == NamedPipe) getOrElse { try POSIXFactory.getPOSIX.stat(path.toString).isFifo catch { case e: IllegalStateException => logger.warn(log"Unable to determine if path ${Safe(path)} is named pipe" withException e) false } } if (isNamedPipe) { logger.debug(safe"Using named pipe mode for reading ${Safe(path)}") NamedPipeSource.fromPath(path) } else FileIO.fromPath(path).mapMaterializedValue(_ => NotUsed) } private[this] val attributes = Attributes.name("namedPipeSource") } /** Uses a FileChannel to pull data from a named pipe. Reading from a named pipe is different * from reading from a regular file: * * - [[FileChannel]]#open and #read may block until data is available * * - Even after reading all the bytes in the file, the reader must tail for more data, because * data may be appended to the named pipe at any time * * - Named pipes do not support seek, which is used by [[org.apache.pekko.stream.impl.io.FileSource]] * * @param path named pipe file name * @param chunkSize size of memory buffer allocated for this graph stage * @param pollInterval how long to wait before reopening and reading again after reading an EOF */ class NamedPipeSource(path: Path, chunkSize: Int, pollInterval: FiniteDuration) extends GraphStage[SourceShape[ByteString]] { require(chunkSize > 0, "chunkSize must be greater than 0") val out: Outlet[ByteString] = Outlet[ByteString]("NamedPipeSource.out") override val shape: SourceShape[ByteString] = SourceShape(out) override def createLogic(inheritedAttributes: Attributes): GraphStageLogic = new TimerGraphStageLogic(shape) with OutHandler { val buffer = ByteBuffer.allocate(chunkSize) /** File channel from through which data is read. Don't call `open` here * because it may be blocking! */ var channel: Option[FileChannel] = None /** Handles the outcome of the async `FileChannel#open` triggered in * `onPull` (for when there is no open `FileChannel`) */ private val openCallback = getAsyncCallback[Try[FileChannel]] { case Success(c) => channel = Some(c) onPull() case Failure(ex) => failStage(ex) } /** Handles the outcome of the async `FileChannel#read` triggered in * `onPull` */ private val readCallback = getAsyncCallback[Try[Int]] { case Success(n) if n > 0 => buffer.flip() val byteString = ByteString.fromByteBuffer(buffer) buffer.clear() emit(out, byteString) case Success(_) => // 0 means no bytes read, -1 means end-of-stream. In either case, // wait a bit and then try to read again scheduleOnce("poll", pollInterval) case Failure(ex) => failStage(ex) } var dispatcher: ExecutionContext = _ setHandler(out, this) override def preStart(): Unit = { if (!Files.exists(path)) throw new NoSuchFileException(path.toString) require(!Files.isDirectory(path), s"Path '$path' is a directory") require(Files.isReadable(path), s"Missing read permission for '$path'") dispatcher = new QuineDispatchers(materializer.system).blockingDispatcherEC } override def onPull(): Unit = channel match { case None => // Open the file (should happen only on the first `onPull`) dispatcher.execute { () => openCallback.invoke(Try(FileChannel.open(path, StandardOpenOption.READ))) } case Some(c) => // Read from the file dispatcher.execute { () => readCallback.invoke(Try(c.read(buffer))) } } override def postStop(): Unit = for { c <- channel } { if (c.isOpen()) { c.close() } channel = None } override def onTimer(timerKey: Any): Unit = timerKey match { case "poll" => onPull() case _ => throw new Exception(s"Unhandled timer key $timerKey") } } override def toString: String = s"NamedPipeSource($path, $chunkSize, $pollInterval)" } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest/ServerSentEventsSrcDef.scala ================================================ package com.thatdot.quine.app.model.ingest import org.apache.pekko.NotUsed import org.apache.pekko.http.scaladsl.Http import org.apache.pekko.http.scaladsl.model.Uri import org.apache.pekko.http.scaladsl.model.sse.ServerSentEvent import org.apache.pekko.stream.connectors.sse.scaladsl.EventSource import org.apache.pekko.stream.scaladsl.Source import com.thatdot.common.logging.Log.LogConfig import com.thatdot.quine.app.model.ingest.serialization.{ContentDecoder, ImportFormat} import com.thatdot.quine.graph.MasterStream.IngestSrcExecToken import com.thatdot.quine.graph.{CypherOpsGraph, NamespaceId} import com.thatdot.quine.util.SwitchMode final case class ServerSentEventsSrcDef( override val name: String, override val intoNamespace: NamespaceId, url: String, format: ImportFormat, initialSwitchMode: SwitchMode, parallelism: Int, maxPerSecond: Option[Int], decoders: Seq[ContentDecoder], )(implicit val graph: CypherOpsGraph, protected val logConfig: LogConfig) extends RawValuesIngestSrcDef( format, initialSwitchMode, parallelism, maxPerSecond, decoders, s"$name (SSE ingest)", intoNamespace, ) { type InputType = ServerSentEvent override val ingestToken: IngestSrcExecToken = IngestSrcExecToken(s"$name: $url") def source(): Source[ServerSentEvent, NotUsed] = EventSource( uri = Uri(url), send = Http().singleRequest(_), ) def rawBytes(event: ServerSentEvent): Array[Byte] = event.data.getBytes } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest/SqsStreamSrcDef.scala ================================================ package com.thatdot.quine.app.model.ingest import scala.concurrent.Future import scala.util.{Success, Try} import org.apache.pekko.stream.connectors.sqs.scaladsl.{SqsAckSink, SqsSource} import org.apache.pekko.stream.connectors.sqs.{MessageAction, SqsSourceSettings} import org.apache.pekko.stream.scaladsl.{Flow, Sink, Source} import org.apache.pekko.{Done, NotUsed} import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient import software.amazon.awssdk.services.sqs.SqsAsyncClient import software.amazon.awssdk.services.sqs.model.Message import com.thatdot.common.logging.Log.LogConfig import com.thatdot.quine.app.model.ingest.serialization.{ContentDecoder, ImportFormat} import com.thatdot.quine.app.model.ingest.util.AwsOps import com.thatdot.quine.app.model.ingest.util.AwsOps.AwsBuilderOps import com.thatdot.quine.graph.MasterStream.IngestSrcExecToken import com.thatdot.quine.graph.cypher.Value import com.thatdot.quine.graph.{CypherOpsGraph, NamespaceId} import com.thatdot.quine.routes.{AwsCredentials, AwsRegion} import com.thatdot.quine.util.SwitchMode /** The definition of an incoming AWS SQS stream. * * @param name the unique, human-facing name of the ingest stream * @param queueURL the URL of the SQS queue from which to read * @param format the [[ImportFormat]] to use in deserializing and writing records from the queue * @param initialSwitchMode is the ingest stream initially paused or not? * @param readParallelism how many records to pull off the SQS queue at a time * @param writeParallelism how many records to write to the graph at a time * @param credentialsOpt the AWS credentials necessary to access the provided SQS queue * @param deleteReadMessages if true, issue an acknowledgement for each successfully-deserialized message, * causing SQS to delete that message from the queue */ final case class SqsStreamSrcDef( override val name: String, override val intoNamespace: NamespaceId, queueURL: String, format: ImportFormat, initialSwitchMode: SwitchMode, readParallelism: Int, writeParallelism: Int, credentialsOpt: Option[AwsCredentials], regionOpt: Option[AwsRegion], deleteReadMessages: Boolean, maxPerSecond: Option[Int], decoders: Seq[ContentDecoder], )(implicit val graph: CypherOpsGraph, protected val logConfig: LogConfig) extends RawValuesIngestSrcDef( format, initialSwitchMode, writeParallelism, maxPerSecond, decoders, s"$name (SQS ingest)", intoNamespace, ) { type InputType = Message implicit val client: SqsAsyncClient = SqsAsyncClient .builder() .credentials(credentialsOpt) .region(regionOpt) .httpClient( NettyNioAsyncHttpClient.builder.maxConcurrency(AwsOps.httpConcurrencyPerClient).build(), ) .build() graph.system.registerOnTermination(client.close()) override val ingestToken: IngestSrcExecToken = IngestSrcExecToken(s"$name: $queueURL") def source(): Source[Message, NotUsed] = SqsSource(queueURL, SqsSourceSettings().withParallelRequests(readParallelism)) def rawBytes(message: Message): Array[Byte] = message.body.getBytes /** For each element, executes the MessageAction specified, and if a Deserialized body is present, returns it. * * This sends an "ignore" message for messages that fail on deserialization. It's not clear if that's the * correct thing to do, but leaving it in for now as it's what the pre-existing code did. */ override val ack: Flow[TryDeserialized, Done, NotUsed] = if (deleteReadMessages) { val ackSink: Sink[(Try[Value], Message), Future[Done]] = SqsAckSink(queueURL) .contramap[TryDeserialized] { case (Success(_), msg) => MessageAction.delete(msg) case (_, msg) => MessageAction.ignore(msg) } .named("sqs-ack-sink") Flow[TryDeserialized].alsoTo(ackSink).map(_ => Done.done()) } else { Flow[TryDeserialized].map(_ => Done.done()) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest/WebsocketSimpleStartupSrcDef.scala ================================================ package com.thatdot.quine.app.model.ingest import scala.concurrent.duration.DurationInt import scala.concurrent.{ExecutionContext, Future} import scala.util.{Failure, Success} import org.apache.pekko.NotUsed import org.apache.pekko.http.scaladsl.Http import org.apache.pekko.http.scaladsl.model.ws._ import org.apache.pekko.http.scaladsl.settings.ClientConnectionSettings import org.apache.pekko.stream.scaladsl.{Flow, Keep, Source} import org.apache.pekko.util.ByteString import com.thatdot.common.logging.Log.LogConfig import com.thatdot.quine.app.model.ingest.WebsocketSimpleStartupSrcDef.UpgradeFailedException import com.thatdot.quine.app.model.ingest.serialization.ImportFormat import com.thatdot.quine.graph.MasterStream.IngestSrcExecToken import com.thatdot.quine.graph.{CypherOpsGraph, NamespaceId} import com.thatdot.quine.routes.WebsocketSimpleStartupIngest import com.thatdot.quine.routes.WebsocketSimpleStartupIngest.KeepaliveProtocol import com.thatdot.quine.util.SwitchMode object WebsocketSimpleStartupSrcDef { class UpgradeFailedException(cause: Throwable) extends RuntimeException("Unable to upgrade to websocket connection", cause) { def this(cause: String) = this(new Throwable(cause)) } } final case class WebsocketSimpleStartupSrcDef( override val name: String, override val intoNamespace: NamespaceId, format: ImportFormat, wsUrl: String, initMessages: Seq[String], keepaliveProtocol: KeepaliveProtocol, parallelism: Int, encoding: String, initialSwitchMode: SwitchMode, )(implicit val graph: CypherOpsGraph, protected val logConfig: LogConfig) extends RawValuesIngestSrcDef( format, initialSwitchMode, parallelism, None, Seq(), s"$name (WS ingest)", intoNamespace, ) { type InputType = ByteString val (charset, _) = IngestSrcDef.getTranscoder(encoding) val baseHttpClientSettings: ClientConnectionSettings = ClientConnectionSettings(system) override val ingestToken: IngestSrcExecToken = IngestSrcExecToken(s"$name $wsUrl") /** placeholder for compile; unused */ override def rawBytes(value: ByteString): Array[Byte] = value.toArray // Copy (and potentially tweak) baseHttpClientSettings for websockets usage val httpClientSettings: ClientConnectionSettings = keepaliveProtocol match { case WebsocketSimpleStartupIngest.PingPongInterval(intervalMillis) => baseHttpClientSettings.withWebsocketSettings( baseHttpClientSettings.websocketSettings.withPeriodicKeepAliveMaxIdle(intervalMillis.millis), ) case WebsocketSimpleStartupIngest.SendMessageInterval(message, intervalMillis) => baseHttpClientSettings.withWebsocketSettings( baseHttpClientSettings.websocketSettings .withPeriodicKeepAliveMaxIdle(intervalMillis.millis) .withPeriodicKeepAliveData(() => ByteString(message, charset)), ) case WebsocketSimpleStartupIngest.NoKeepalive => baseHttpClientSettings } // NB Instead of killing this source with the downstream KillSwitch, we could switch this Source.never to a // Source.maybe, completing it with None to kill the connection -- this is closer to the docs for // webSocketClientFlow val outboundMessages: Source[TextMessage.Strict, NotUsed] = Source .fromIterator(() => initMessages.iterator) .map(TextMessage(_)) .concat(Source.never) .named("websocket-ingest-outbound-messages") val wsFlow: Flow[Message, Message, Future[WebSocketUpgradeResponse]] = Http() .webSocketClientFlow( WebSocketRequest(wsUrl), settings = httpClientSettings, ) .named("websocket-ingest-client") val (websocketUpgraded: Future[WebSocketUpgradeResponse], websocketSource: Source[Message, NotUsed]) = outboundMessages .viaMat(wsFlow)(Keep.right) .preMaterialize() val v: Source[ByteString, NotUsed] = websocketSource.flatMapConcat { case textMessage: TextMessage => textMessage.textStream .fold("")(_ + _) .map(ByteString.fromString(_, charset)) case m: BinaryMessage => m.dataStream.fold(ByteString.empty)(_ concat _) } def source(): Source[ByteString, NotUsed] = Source .futureSource(websocketUpgraded.transform { // if the websocket upgrade fails, return an already-failed Source case Success(InvalidUpgradeResponse(_, cause)) => Failure(new UpgradeFailedException(cause)) case Failure(ex) => Failure(new UpgradeFailedException(ex)) // the websocket upgrade succeeded: proceed with setting up the ingest stream source case Success(ValidUpgrade(_, _)) => Success(v) }(ExecutionContext.parasitic)) .mapMaterializedValue(_ => NotUsed) // TBD .mapMaterializedValue(_.flatten) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest/serialization/ContentDecoder.scala ================================================ package com.thatdot.quine.app.model.ingest.serialization import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.util.Base64 import java.util.zip.{GZIPInputStream, GZIPOutputStream, InflaterInputStream, InflaterOutputStream} import org.apache.pekko.NotUsed import org.apache.pekko.http.scaladsl.coding.Coders import org.apache.pekko.stream.scaladsl.Flow import org.apache.pekko.util.ByteString import com.thatdot.quine.routes.RecordDecodingType /** A class that corresponds to a single type of content decoding. * Instances define a flow that performs decoding of the specified type. */ sealed abstract class ContentDecoder() { def decode(bytes: Array[Byte]): Array[Byte] def encode(bytes: Array[Byte]): Array[Byte] def decoderFlow: Flow[ByteString, ByteString, NotUsed] def encoderFlow: Flow[ByteString, ByteString, NotUsed] } object ContentDecoder { case object Base64Decoder extends ContentDecoder { private val base64Decoder: Base64.Decoder = Base64.getDecoder private val base64Encoder: Base64.Encoder = Base64.getEncoder override def decode(bytes: Array[Byte]): Array[Byte] = base64Decoder.decode(bytes) override def encode(bytes: Array[Byte]): Array[Byte] = base64Encoder.encode(bytes) override def decoderFlow: Flow[ByteString, ByteString, NotUsed] = Flow[ByteString].map(bs => ByteString(decode(bs.toArrayUnsafe()))) override def encoderFlow: Flow[ByteString, ByteString, NotUsed] = Flow[ByteString].map(bs => ByteString(encode(bs.toArrayUnsafe()))) } case object GzipDecoder extends ContentDecoder { override def decode(bytes: Array[Byte]): Array[Byte] = { val is = new GZIPInputStream(new ByteArrayInputStream(bytes)) try is.readAllBytes() finally is.close() } override def encode(bytes: Array[Byte]): Array[Byte] = { val out = new ByteArrayOutputStream(bytes.length) val gzOut = new GZIPOutputStream(out) gzOut.write(bytes) gzOut.close() out.toByteArray } def decoderFlow: Flow[ByteString, ByteString, NotUsed] = Coders.Gzip.decoderFlow def encoderFlow: Flow[ByteString, ByteString, NotUsed] = Coders.Gzip.encoderFlow } case object ZlibDecoder extends ContentDecoder { override def decode(bytes: Array[Byte]): Array[Byte] = { val is = new InflaterInputStream(new ByteArrayInputStream(bytes)) try is.readAllBytes() finally is.close() } override def encode(bytes: Array[Byte]): Array[Byte] = { val out = new ByteArrayOutputStream() val zOut = new InflaterOutputStream(out) zOut.write(bytes) zOut.flush() zOut.close() out.toByteArray } def decoderFlow: Flow[ByteString, ByteString, NotUsed] = Coders.Deflate.decoderFlow def encoderFlow: Flow[ByteString, ByteString, NotUsed] = Coders.Deflate.encoderFlow } /** V1 entities. */ def apply(encodingType: RecordDecodingType): ContentDecoder = encodingType match { case RecordDecodingType.Base64 => Base64Decoder case RecordDecodingType.Gzip => GzipDecoder case RecordDecodingType.Zlib => ZlibDecoder } def encode(decoders: Seq[ContentDecoder], bytes: Array[Byte]): Array[Byte] = decoders.foldRight(bytes)((d, b) => d.encode(b)) def decode(decoders: Seq[ContentDecoder], bytes: Array[Byte]): Array[Byte] = decoders.foldLeft(bytes)((b, d) => d.decode(b)) def decode(decoders: Seq[ContentDecoder], bytes: ByteString): ByteString = if (decoders.nonEmpty) ByteString(decode(decoders, bytes.toArrayUnsafe())) else bytes def decoderFlow(decoders: Seq[ContentDecoder]): Flow[ByteString, ByteString, NotUsed] = decoders.foldLeft(Flow[ByteString])((flow, decoder) => flow.via(decoder.decoderFlow)) def encoderFlow(decoders: Seq[ContentDecoder]): Flow[ByteString, ByteString, NotUsed] = decoders.foldRight(Flow[ByteString])((decoder, flow) => flow.via(decoder.encoderFlow)) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest/serialization/CypherParseProtobuf.scala ================================================ package com.thatdot.quine.app.model.ingest.serialization import java.net.URL import scala.util.Try import org.apache.pekko.stream.scaladsl.Source import org.apache.pekko.util.Timeout import com.google.protobuf.InvalidProtocolBufferException import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, Safe, SafeLoggableInterpolator} import com.thatdot.common.logging.Pretty._ import com.thatdot.common.quineid.QuineId import com.thatdot.quine.graph.cypher.{ Expr, Parameters, ProcedureExecutionLocation, QueryContext, Type, UserDefinedProcedure, UserDefinedProcedureSignature, Value, } import com.thatdot.quine.serialization.ProtobufSchemaCache import com.thatdot.quine.util.StringInput.filenameOrUrl /** Parse a protobuf message into a Cypher map according to a schema provided by a schema cache. * Because loading the schema is asynchronous, this must be a procedure rather than a function. */ class CypherParseProtobuf(private val cache: ProtobufSchemaCache) extends UserDefinedProcedure with LazySafeLogging { def name: String = "parseProtobuf" def canContainUpdates: Boolean = false def isIdempotent: Boolean = true def canContainAllNodeScan: Boolean = false def call(context: QueryContext, arguments: Seq[Value], location: ProcedureExecutionLocation)(implicit parameters: Parameters, timeout: Timeout, logConfig: LogConfig, ): Source[Vector[Value], _] = { implicit val prettyId: Pretty[QuineId] = location.idProvider val (bytes, schemaUrl, typeName): (Array[Byte], URL, String) = arguments match { case Seq(Expr.Bytes(bytes, bytesRepresentId), Expr.Str(schemaUrl), Expr.Str(typeName)) => if (bytesRepresentId) logger.info( safe"""Received an ID (${Safe(QuineId(bytes).pretty)}) as a source of |bytes to parse a protobuf value of type: ${Safe(typeName)}.""".cleanLines, ) (bytes, filenameOrUrl(schemaUrl), typeName) case _ => throw wrongSignature(arguments) } Source .future(cache.getMessageDescriptor(schemaUrl, typeName, flushOnFail = true)) .map(new ProtobufParser(_)) .map { parser => val result = Try[Value](parser.parseBytes(bytes)) // Ideally, this [[recover]] would match the configuration of the context in which the query was // run (eg, default to erroring in an ad-hoc query but default to returning null in an ingest, unless the // ingest is set to halt on error). However, we don't have that information here, so we default to // returning null. .recover { case e if e.isInstanceOf[ClassCastException] || e.isInstanceOf[InvalidProtocolBufferException] => logger.warn( log"${Safe(name)} procedure received corrupted protobuf record -- returning null" withException e, ) Expr.Null }.get Vector(result) } } def signature: UserDefinedProcedureSignature = UserDefinedProcedureSignature( arguments = Seq("bytes" -> Type.Bytes, "schemaUrl" -> Type.Str, "typeName" -> Type.Str), outputs = Seq("value" -> Type.Map), description = "Parses a protobuf message into a Cypher map value, or null if the bytes are not parseable as the requested type", ) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest/serialization/CypherToProtobuf.scala ================================================ package com.thatdot.quine.app.model.ingest.serialization import java.net.URL import scala.util.Try import org.apache.pekko.stream.scaladsl.Source import org.apache.pekko.util.Timeout import cats.implicits.toFunctorOps import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig} import com.thatdot.quine.graph.cypher.{ Expr, Parameters, ProcedureExecutionLocation, QueryContext, Type, UserDefinedProcedure, UserDefinedProcedureSignature, Value, } import com.thatdot.quine.model.QuineValue import com.thatdot.quine.serialization.{ProtobufSchemaCache, QuineValueToProtobuf} import com.thatdot.quine.util.MonadHelpers._ import com.thatdot.quine.util.StringInput.filenameOrUrl class CypherToProtobuf(private val cache: ProtobufSchemaCache) extends UserDefinedProcedure with LazySafeLogging { def name: String = "toProtobuf" def canContainUpdates: Boolean = false def isIdempotent: Boolean = true def canContainAllNodeScan: Boolean = false def call(context: QueryContext, arguments: Seq[Value], location: ProcedureExecutionLocation)(implicit parameters: Parameters, timeout: Timeout, logConfig: LogConfig, ): Source[Vector[Value], _] = { val (value, schemaUrl, typeName): (Map[String, QuineValue], URL, String) = arguments match { case Seq(Expr.Map(value), Expr.Str(schemaUrl), Expr.Str(typeName)) => (value.fmap(Expr.toQuineValue(_).getOrThrow), filenameOrUrl(schemaUrl), typeName) case _ => throw wrongSignature(arguments) } Source .future(cache.getMessageDescriptor(schemaUrl, typeName, flushOnFail = true)) .map(new QuineValueToProtobuf(_)) .map { serializer => val result: Value = Try(serializer.toProtobufBytes(value)) .map { case Left(conversionFailures @ _) => Expr.Null case Right(value) => Expr.Bytes(value, representsId = false) } .recover { case _: IllegalArgumentException => Expr.Null } .get Vector(result) } } def signature: UserDefinedProcedureSignature = UserDefinedProcedureSignature( arguments = Seq("value" -> Type.Map, "schemaUrl" -> Type.Str, "typeName" -> Type.Str), outputs = Seq("protoBytes" -> Type.Bytes), description = """Serializes a Cypher value into bytes, according to a protobuf schema. |Returns null if the value is not serializable as the requested type |""".stripMargin.replace('\n', ' ').trim, ) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest/serialization/ImportFormat.scala ================================================ package com.thatdot.quine.app.model.ingest.serialization import scala.concurrent.{ExecutionContext, Future, Promise} import scala.util.{Failure, Success, Try} import org.apache.pekko.Done import org.apache.pekko.stream.scaladsl.Sink import com.codahale.metrics.Timer import com.typesafe.config.ConfigFactory import io.circe.jawn.CirceSupportParser import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, Safe, SafeLoggableInterpolator} import com.thatdot.quine.app.util.AtLeastOnceCypherQuery import com.thatdot.quine.compiler import com.thatdot.quine.graph.cypher.quinepattern.{ CypherAndQuineHelpers, OutputTarget, QueryContext => QPQueryContext, QueryPlanner, RuntimeMode, } import com.thatdot.quine.graph.cypher.{CompiledQuery, Location} import com.thatdot.quine.graph.quinepattern.{LoadQuery, QuinePatternOpsGraph} import com.thatdot.quine.graph.{CypherOpsGraph, NamespaceId, StandingQueryId, cypher} /** Describes formats that Quine can import * Deserialized type refers to the (nullable) type to be produced by invocations of this [[ImportFormat]] */ trait ImportFormat { /** Attempt to import raw data as a [[cypher.Value]]. This will get called for each value to be imported * * @param data the raw data to decode * @return A Success if and only if a [[cypher.Value]] can be produced from the provided data, * otherwise, a Failure describing the error during deserialization. These Failures should never * be fatal. */ protected def importBytes(data: Array[Byte]): Try[cypher.Value] /** Defers to [[importBytes]] but also checks that input data can (probably) be safely sent via pekko clustered messaging. * This is checked based on [[ImportFormat.pekkoMessageSizeLimit]] * * @param data byte payload * @param isSingleHost is the cluster just one host (in which case there is no risk of oversize payloads) * @return */ final def importMessageSafeBytes( data: Array[Byte], isSingleHost: Boolean, deserializationTimer: Timer, ): Try[cypher.Value] = if (!isSingleHost && data.length > pekkoMessageSizeLimit) Failure( new Exception( s"Attempted to decode ${data.length} bytes, but records larger than $pekkoMessageSizeLimit bytes are prohibited.", ), ) else { val timer = deserializationTimer.time() val deserialized = importBytes(data) deserialized.foreach(_ => timer.stop()) // only time successful deserializations deserialized } /** A description of the import format. */ def label: String /** An estimated limit on record size (based on the pekko remote frame size with 15kb of headspace) */ lazy val pekkoMessageSizeLimit: Long = ConfigFactory.load().getBytes("pekko.remote.artery.advanced.maximum-frame-size") - 15 * 1024 def writeValueToGraph( graph: CypherOpsGraph, intoNamespace: NamespaceId, deserialized: cypher.Value, ): Future[Done] } class TestOnlyDrop extends ImportFormat { override val label = "TestOnlyDrop" override def importBytes(data: Array[Byte]): Try[cypher.Value] = Success(cypher.Expr.Null) override def writeValueToGraph( graph: CypherOpsGraph, intoNamespace: NamespaceId, deserialized: cypher.Value, ): Future[Done] = Future.successful(Done) } abstract class CypherImportFormat(query: String, parameter: String) extends ImportFormat with LazySafeLogging { override val label: String = "Cypher " + query implicit protected def logConfig: LogConfig // TODO: think about error handling of failed compilation val compiled: CompiledQuery[Location.Anywhere] = compiler.cypher.compile(query, unfixedParameters = Seq(parameter)) lazy val atLeastOnceQuery: AtLeastOnceCypherQuery = AtLeastOnceCypherQuery(compiled, parameter, "ingest-query") if (compiled.query.canContainAllNodeScan) { // TODO this should be lifted to an (overridable, see allowAllNodeScan in SQ outputs) API error logger.warn( safe"Cypher query may contain full node scan; for improved performance, re-write without full node scan. " + compiled.queryText.fold(safe"")(q => safe"The provided query was: ${Safe(q)}"), ) } if (!compiled.query.isIdempotent) { // TODO allow user to override this (see: allowAllNodeScan) and only retry when idempotency is asserted logger.warn( safe"""Could not verify that the provided ingest query is idempotent. If timeouts occur, query |execution may be retried and duplicate data may be created.""".cleanLines, ) } def writeValueToGraph( graph: CypherOpsGraph, intoNamespace: NamespaceId, deserialized: cypher.Value, ): Future[Done] = atLeastOnceQuery .stream(deserialized, intoNamespace)(graph) .runWith(Sink.ignore)(graph.materializer) } /** An abstract implementation of the `ImportFormat` trait that allows importing * data into Quine graphs, utilizing the Quine Pattern query language. * * @constructor Creates a new instance of `QuinePatternImportFormat`. * @param query the Quine Pattern query that defines how the data should be interpreted. * @param parameter the symbol in the query to be replaced with deserialized data during execution. * * This class processes a defined query using the Quine Pattern query pipeline, * which includes lexing, parsing, symbol analysis, and query planning. The resulting * `QueryPlan` is used for interpreting data and writing it into a Quine graph. * * The class checks the system property `qp.enabled` to ensure the Quine Pattern * functionality is enabled, throwing an error if not configured correctly. * * The `writeValueToGraph` method interprets the compiled query with the provided * deserialized data and writes it to the target namespace in the Quine graph. */ abstract class QuinePatternImportFormat(query: String, parameter: String) extends ImportFormat with LazySafeLogging { val maybeIsQPEnabled: Option[Boolean] = for { pv <- Option(System.getProperty("qp.enabled")) b <- pv.toBooleanOption } yield b maybeIsQPEnabled match { case Some(true) => () case _ => sys.error("Quine pattern must be enabled using -Dqp.enabled=true to use this feature.") } override val label: String = "QuinePattern " + query implicit protected def logConfig: LogConfig val planned: QueryPlanner.PlannedQuery = QueryPlanner.planFromString(query) match { case Right(p) => p case Left(error) => throw new IllegalArgumentException(s"Failed to compile query: $error") } def writeValueToGraph( graph: CypherOpsGraph, intoNamespace: NamespaceId, deserialized: cypher.Value, ): Future[Done] = { implicit val ec: ExecutionContext = graph.system.dispatcher // Typecast is required here because `ImportFormat` is hard coded // to existing Quine structures val hack = graph.asInstanceOf[QuinePatternOpsGraph] val deserializedPatternValue = CypherAndQuineHelpers.cypherValueToPatternValue(graph.idProvider)(deserialized) match { case Left(error) => throw error case Right(value) => value } // Create a promise that will be completed when the query finishes val resultPromise = Promise[Seq[QPQueryContext]]() hack.getLoader ! LoadQuery( StandingQueryId.fresh(), planned.plan, RuntimeMode.Eager, Map(Symbol(parameter) -> deserializedPatternValue), intoNamespace, OutputTarget.EagerCollector(resultPromise), planned.returnColumns, planned.outputNameMapping, // `atTime` defaults to `None` (current state) because ingest queries process incoming // data against the current graph; historical ingest queries are not currently supported. ) // Convert the promise to Done when complete resultPromise.future.map(_ => Done) } } //"Drop Format" should not run a query but should still read from ... class CypherJsonInputFormat(query: String, parameter: String)(implicit val logConfig: LogConfig) extends CypherImportFormat(query, parameter) { override def importBytes(data: Array[Byte]): Try[cypher.Value] = // deserialize bytes into JSON without going through string new CirceSupportParser(maxValueSize = None, allowDuplicateKeys = false) .parseFromByteArray(data) .map(cypher.Value.fromJson) } class QuinePatternJsonInputFormat(query: String, parameter: String)(implicit val logConfig: LogConfig) extends QuinePatternImportFormat(query, parameter) { override def importBytes(data: Array[Byte]): Try[cypher.Value] = new CirceSupportParser(maxValueSize = None, allowDuplicateKeys = false) .parseFromByteArray(data) .map(cypher.Value.fromJson) } class CypherStringInputFormat(query: String, parameter: String, charset: String)(implicit val logConfig: LogConfig) extends CypherImportFormat(query, parameter) { override def importBytes(arr: Array[Byte]): Try[cypher.Value] = Success(cypher.Expr.Str(new String(arr, charset))) } class QuinePatternStringInputFormat(query: String, parameter: String, charset: String)(implicit val logConfig: LogConfig, ) extends QuinePatternImportFormat(query, parameter) { override protected def importBytes(data: Array[Byte]): Try[cypher.Value] = Success( cypher.Expr.Str(new String(data, charset)), ) } class QuinePatternRawInputFormat(query: String, parameter: String)(implicit val logConfig: LogConfig) extends QuinePatternImportFormat(query, parameter) { override def importBytes(arr: Array[Byte]): Try[cypher.Value] = Success(cypher.Expr.Bytes(arr, representsId = false)) } class CypherRawInputFormat(query: String, parameter: String)(implicit val logConfig: LogConfig) extends CypherImportFormat(query, parameter) { override def importBytes(arr: Array[Byte]): Try[cypher.Value] = Success(cypher.Expr.Bytes(arr, representsId = false)) } class ProtobufInputFormat(query: String, parameter: String, parser: ProtobufParser)(implicit val logConfig: LogConfig) extends CypherImportFormat(query, parameter) { override protected def importBytes(data: Array[Byte]): Try[cypher.Value] = Try(parser.parseBytes(data)) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest/serialization/ProtobufParser.scala ================================================ package com.thatdot.quine.app.model.ingest.serialization import com.google.protobuf.Descriptors.Descriptor import com.google.protobuf.{DynamicMessage, InvalidProtocolBufferException} import com.thatdot.data.DataFoldableFrom import com.thatdot.quine.app.data.QuineDataFoldersTo import com.thatdot.quine.graph.cypher.Value /** Parses Protobuf messages to cypher values according to a schema. */ class ProtobufParser(messageDescriptor: Descriptor) { @throws[InvalidProtocolBufferException] @throws[ClassCastException] def parseBytes(bytes: Array[Byte]): Value = { val dm: DynamicMessage = DynamicMessage.parseFrom(messageDescriptor, bytes) DataFoldableFrom.protobufDataFoldable.fold(dm, QuineDataFoldersTo.cypherValueFolder) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest/util/AwsOps.scala ================================================ package com.thatdot.quine.app.model.ingest.util import scala.reflect.{ClassTag, classTag} import software.amazon.awssdk.auth.credentials.{ AwsBasicCredentials, AwsCredentialsProvider, DefaultCredentialsProvider, StaticCredentialsProvider, } import software.amazon.awssdk.awscore.client.builder.AwsClientBuilder import software.amazon.awssdk.regions.Region import com.thatdot.aws.{util => awsutil} import com.thatdot.common.logging.Log._ import com.thatdot.common.security.Secret import com.thatdot.quine.{routes => V1} case object AwsOps extends LazySafeLogging { // the maximum number of simultaneous API requests any individual AWS client should make // invariant: all AWS clients using HTTP will set this as a maximum concurrency value val httpConcurrencyPerClient: Int = awsutil.AwsOps.httpConcurrencyPerClient def staticCredentialsProvider(credsOpt: Option[V1.AwsCredentials]): AwsCredentialsProvider = credsOpt.fold[AwsCredentialsProvider](DefaultCredentialsProvider.builder().build()) { credentials => import Secret.Unsafe._ StaticCredentialsProvider.create( AwsBasicCredentials.create(credentials.accessKeyId.unsafeValue, credentials.secretAccessKey.unsafeValue), ) } implicit class AwsBuilderOps[Client: ClassTag, Builder <: AwsClientBuilder[Builder, Client]]( builder: AwsClientBuilder[Builder, Client], ) { /** Credentials to use for this AWS client. If provided, these will be used explicitly. * If absent, credentials will be inferred from the environment according to AWS's DefaultCredentialsProvider * This may have security implications! Ensure your environment only contains environment variables, * java system properties, aws credentials files, and instance profile credentials you trust! * * @see https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html#credentials-default * * If you are deploying on EC2 and do NOT wish to use EC2 container metadata/credentials, ensure the java property * `aws.disableEc2Metadata` is set to true, or the environment variable AWS_EC2_METADATA_DISABLED is set to true. * Note that this will also disable region lookup, and thus require all AWS client constructions to explicitly set * credentials. * * @param credsOpt if set, aws credentials to use explicitly * @return */ def credentials(credsOpt: Option[V1.AwsCredentials]): Builder = { val creds = credsOpt.orElse { logger.info( safe"""No AWS credentials provided while building AWS client of type |${Safe(classTag[Client].runtimeClass.getSimpleName)}. Defaulting |to environmental credentials.""".cleanLines, ) None } builder.credentialsProvider(staticCredentialsProvider(creds)) } def region(regionOpt: Option[V1.AwsRegion]): Builder = regionOpt.fold { logger.info( safe"""No AWS region provided while building AWS client of type: |${Safe(classTag[Client].runtimeClass.getSimpleName)}. |Defaulting to environmental settings.""".cleanLines, ) builder.applyMutation(_ => ()) // return the builder unmodified }(region => builder.region(Region.of(region.region))) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest/util/KafkaSettingsValidator.scala ================================================ package com.thatdot.quine.app.model.ingest.util import java.lang.reflect.Field import java.net.{InetSocketAddress, Socket} import scala.concurrent.duration.FiniteDuration import scala.concurrent.{ExecutionContext, Future, blocking} import scala.jdk.CollectionConverters._ import scala.util.{Failure, Success, Try} import cats.data.NonEmptyList import com.google.common.net.HostAndPort import org.apache.kafka.clients.CommonClientConfigs import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.clients.producer.ProducerConfig import org.apache.kafka.common.config.SaslConfigs.SASL_JAAS_CONFIG import org.apache.kafka.common.config.{AbstractConfig, ConfigDef, ConfigValue} import com.thatdot.common.logging.Log._ import com.thatdot.quine.app.model.ingest.util.KafkaSettingsValidator.ErrorString import com.thatdot.quine.routes.KafkaIngest.KafkaProperties import com.thatdot.quine.routes.KafkaOffsetCommitting object KafkaSettingsValidator extends LazySafeLogging { type ErrorString = String private def underlyingValidator[C <: AbstractConfig](c: Class[C]): ConfigDef = Try { val config: Field = c.getDeclaredField("CONFIG") config.setAccessible(true) config.get(null).asInstanceOf[ConfigDef] } match { case Failure(e) => // Should be impossible. logger.error( safe"""Expected Kafka settings validator to be available at ${Safe(c.getName)}.CONFIG -- |did you override your classpath with a custom kafka JAR? Kafka config validation |will now fail.""".cleanLines, ) throw e case Success(validator) => validator } /** Will return error strings or None. * If [[assumeConfigIsFinal]] is true, the properties will also be checked against kafka's internal property * validator (additional checks include things like verifying that values fall within enumerated options and that * all required fields to construct a Kafka Consumer are present) */ def validateInput( properties: KafkaProperties, explicitGroupId: Option[String] = None, explicitOffsetCommitting: Option[KafkaOffsetCommitting] = None, assumeConfigIsFinal: Boolean = false, ): Option[NonEmptyList[String]] = { val v = new KafkaSettingsValidator(underlyingValidator(classOf[ConsumerConfig]), properties) /* these values have no direct analogues in Kafka settings: - parallelism: Int - ingest.topics - ingest.format */ val errors: Seq[String] = if (assumeConfigIsFinal) { // config is already merged, so we can rely on the kafka-provided validator for any errors for { validatedConfigEntry <- v.underlyingValues configName = validatedConfigEntry.name() // TODO why does a finalized config not have key.deserializer set? // Does pekko tack it on in settings.consumerFactory? if configName != ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG if configName != ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG err: ErrorString <- validatedConfigEntry.errorMessages.asScala } yield s"Error in Kafka setting $configName: $err" } else { // config is not yet merged (multiple sources of truth), so we can look for conflicts between the parts of config List( v.findConflict(Set(CommonClientConfigs.GROUP_ID_CONFIG), explicitGroupId), v.findConflict( Set(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG), Some(explicitOffsetCommitting), ), //boostrap servers is mandatory on ingest. If it is set in properties that's a conflict v.disallowField( CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, "Please use the Kafka ingest `bootstrapServers` field.", ), v.disallowField( ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "Please use one of the `format` field cypher options, which rely on their hard-coded deserializers.", ), v.disallowField( ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "Please use one of the `format` field cypher options, which rely on their hard-coded deserializers.", ), v.disallowField( CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "Please use the Kafka ingest `securityProtocol` field.", ), v.disallowField( ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "Please use the Kafka ingest `autoOffsetReset` field.", ), // // --- if any of these keys points to something containing "com.sun.security.auth.module.JndiLoginModule" // // Conservative fix for CVE-2023-25194: disable keys including ${SaslConfigs.SASL_JAAS_CONFIG} v.disallowJaasSubstring(SASL_JAAS_CONFIG), // these 3 config scopes may allow "overrides" -- the security advisory at https://archive.ph/P6q2A // recommends blacklisting the `override` subkey for each scope. These are already considered // invalid by `unrecognizedProperties`, but better safe than sorry. v.disallowJaasSubstring(s"producer.override.$SASL_JAAS_CONFIG"), v.disallowJaasSubstring(s"consumer.override.$SASL_JAAS_CONFIG"), v.disallowJaasSubstring(s"admin.override.$SASL_JAAS_CONFIG"), ).flatten } v.withUnrecognizedErrors(errors) } def validateProperties(properties: KafkaProperties): Option[NonEmptyList[String]] = { val v = new KafkaSettingsValidator(underlyingValidator(classOf[ProducerConfig]), properties) val errors: Seq[ErrorString] = List( //boostrap servers is mandatory. If it is set in properties that's a conflict v.disallowField( CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, "Please use the result output `bootstrapServers` field.", ), v.disallowField( ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "Please use one of the `format` field cypher options, which rely on their hard-coded deserializers.", ), // // --- if any of these keys points to something containing "com.sun.security.auth.module.JndiLoginModule" // // Conservative fix for CVE-2023-25194: disable keys including ${SaslConfigs.SASL_JAAS_CONFIG} v.disallowJaasSubstring(SASL_JAAS_CONFIG), // these 3 config scopes may allow "overrides" -- the security advisory at https://archive.ph/P6q2A // recommends blacklisting the `override` subkey for each scope. These are already considered // invalid by `unrecognizedProperties`, but better safe than sorry. v.disallowJaasSubstring(s"producer.override.$SASL_JAAS_CONFIG"), v.disallowJaasSubstring(s"consumer.override.$SASL_JAAS_CONFIG"), v.disallowJaasSubstring(s"admin.override.$SASL_JAAS_CONFIG"), ).flatten v.withUnrecognizedErrors(errors) } /** Parse a single bootstrap server string "host:port" into (host, port). * Uses Guava's HostAndPort which handles IPv6 addresses correctly. */ private def parseOneServer(server: String): Either[String, (String, Int)] = Try(HostAndPort.fromString(server)).toOption .filter(_.hasPort) .map(hp => (hp.getHost, hp.getPort)) .toRight(s"Invalid bootstrap server format (expected host:port): $server") /** Parse a Kafka bootstrap servers string into a non-empty list of (host, port) tuples. * Bootstrap servers are comma-separated, each in the format "host:port". */ def parseBootstrapServers(bootstrapServers: String): Either[NonEmptyList[String], NonEmptyList[(String, Int)]] = { val servers = bootstrapServers.split(",").map(_.trim).filter(_.nonEmpty).toList if (servers.isEmpty) Left(NonEmptyList.one("No bootstrap servers specified")) else { val (errors, parsed) = servers.map(parseOneServer).partitionMap(identity) NonEmptyList.fromList(errors).toLeft(NonEmptyList.fromListUnsafe(parsed)) } } /** Try to connect to a single server. Returns None on success, Some(error) on failure. */ private def tryConnect(host: String, port: Int, timeoutMs: Int): Option[String] = { val socket = new Socket() try { socket.connect(new InetSocketAddress(host, port), timeoutMs) None } catch { case e: Exception => Some(s"Cannot connect to $host:$port: ${e.getMessage}") } finally try socket.close() catch { case _: Exception => } } /** Check TCP connectivity to at least one bootstrap server. * Attempts connections in parallel, returning success as soon as any one server is reachable. * Logs warnings for any connection failures (useful for users even if some servers succeeded). * Returns None if at least one server is reachable, Some(errors) if all fail. */ def checkBootstrapConnectivity( bootstrapServers: String, timeout: FiniteDuration, )(implicit ec: ExecutionContext): Future[Option[NonEmptyList[String]]] = parseBootstrapServers(bootstrapServers) match { case Left(parseErrors) => Future.successful(Some(parseErrors)) case Right(servers) => val attempts = servers.toList.map { case (host, port) => Future(blocking(tryConnect(host, port, timeout.toMillis.toInt))).map { case None => Right(()) // Success case Some(err) => logger.warn(safe"Kafka bootstrap server connectivity warning: ${Safe(err)}") Left(err) // Failure } } Future.find(attempts)(_.isRight).flatMap { case Some(_) => Future.successful(None) // At least one succeeded case None => // All failed - Future.find only returns None after all futures complete Future.sequence(attempts).map(results => NonEmptyList.fromList(results.collect { case Left(e) => e })) } } /** Validates Kafka output properties AND checks bootstrap server connectivity. * First performs synchronous property validation, then checks connectivity. */ def validatePropertiesWithConnectivity( properties: KafkaProperties, bootstrapServers: String, timeout: FiniteDuration, )(implicit ec: ExecutionContext): Future[Option[NonEmptyList[String]]] = validateProperties(properties) match { case Some(syntaxErrors) => Future.successful(Some(syntaxErrors)) case None => checkBootstrapConnectivity(bootstrapServers, timeout) } } class KafkaSettingsValidator( validator: ConfigDef, properties: KafkaProperties, ) extends LazySafeLogging { private val underlyingKnownKeys: Set[String] = validator.configKeys.values.asScala.map(_.name).toSet def underlyingValues: Seq[ConfigValue] = validator.validate(properties.asJava).asScala.toVector /** Variables that have analogues in kafka properties. Settings in both properties * and the direct setting via the api should generate errors. Use this when the * setting must be provided via EITHER the API or the properties object, but not * both */ protected def findConflict( keys: Set[String], ingestField: Option[_], ): Option[ErrorString] = ingestField match { case Some(_) => val usedKeys: Set[ErrorString] = properties.keySet.intersect(keys) if (usedKeys.nonEmpty) Some(f"Property value conflicts with property ${usedKeys.mkString(",")}") else None case _ => None } protected def disallowJaasSubstring(key: String): Option[ErrorString] = { val forbiddenJaasModule = "com.sun.security.auth.module.JndiLoginModule" if (properties.get(key).exists((userSetValue: String) => userSetValue.contains(forbiddenJaasModule))) Some(s"$key may not be set to: ${properties(key)}, as it contains: $forbiddenJaasModule") else None } /** Field conflicts with an explicitly set property on the ingest. Use this when * the setting MUST be provided via the API */ protected def disallowField(key: String, errorString: String): Option[ErrorString] = if (properties.keySet.contains(key)) Some(s"$key is not allowed in the kafkaProperties Map. $errorString") else None val unrecognizedPropertiesError: List[String] = properties.keySet.diff(underlyingKnownKeys) match { case s if s.isEmpty => Nil case s @ _ => List(s"Unrecognized properties: ${s.mkString(",")}") } def withUnrecognizedErrors(errors: Seq[String]): Option[NonEmptyList[String]] = NonEmptyList.fromList(unrecognizedPropertiesError ++ errors) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/V1IngestCodecs.scala ================================================ package com.thatdot.quine.app.model.ingest2 import cats.implicits.catsSyntaxEitherId import io.circe.Encoder.encodeString import io.circe.generic.extras.semiauto.{ deriveConfiguredDecoder, deriveConfiguredEncoder, deriveEnumerationDecoder, deriveEnumerationEncoder, } import io.circe.{Decoder, Encoder, Json} import com.thatdot.api.codec.SecretCodecs import com.thatdot.api.codec.SecretCodecs._ import com.thatdot.api.v2.TypeDiscriminatorConfig.instances.circeConfig import com.thatdot.common.security.Secret import com.thatdot.quine.{routes => V1} /** Circe encoders and decoders for V1 routes types used by V2 ingest. * * These types are defined in quine-endpoints which doesn't have Circe dependencies, * so codecs are provided here. * * For Tapir schemas, see [[V1IngestSchemas]]. * * Usage: * {{{ * import com.thatdot.quine.app.model.ingest2.V1IngestCodecs._ * }}} */ object V1IngestCodecs { implicit val csvCharacterEncoder: Encoder[V1.CsvCharacter] = deriveEnumerationEncoder implicit val csvCharacterDecoder: Decoder[V1.CsvCharacter] = deriveEnumerationDecoder implicit val recordDecodingTypeEncoder: Encoder[V1.RecordDecodingType] = deriveEnumerationEncoder implicit val recordDecodingTypeDecoder: Decoder[V1.RecordDecodingType] = deriveEnumerationDecoder implicit val fileIngestModeEncoder: Encoder[V1.FileIngestMode] = deriveEnumerationEncoder implicit val fileIngestModeDecoder: Decoder[V1.FileIngestMode] = deriveEnumerationDecoder implicit val kafkaAutoOffsetResetEncoder: Encoder[V1.KafkaAutoOffsetReset] = deriveEnumerationEncoder implicit val kafkaAutoOffsetResetDecoder: Decoder[V1.KafkaAutoOffsetReset] = deriveEnumerationDecoder implicit val ingestStreamStatusEncoder: Encoder[V1.IngestStreamStatus] = deriveEnumerationEncoder implicit val ingestStreamStatusDecoder: Decoder[V1.IngestStreamStatus] = deriveEnumerationDecoder // KafkaSecurityProtocol uses custom codec for name mapping implicit val kafkaSecurityProtocolEncoder: Encoder[V1.KafkaSecurityProtocol] = encodeString.contramap[V1.KafkaSecurityProtocol](_.name) implicit val kafkaSecurityProtocolDecoder: Decoder[V1.KafkaSecurityProtocol] = Decoder.decodeString.emap { case s if s == V1.KafkaSecurityProtocol.PlainText.name => V1.KafkaSecurityProtocol.PlainText.asRight case s if s == V1.KafkaSecurityProtocol.Ssl.name => V1.KafkaSecurityProtocol.Ssl.asRight case s if s == V1.KafkaSecurityProtocol.Sasl_Ssl.name => V1.KafkaSecurityProtocol.Sasl_Ssl.asRight case s if s == V1.KafkaSecurityProtocol.Sasl_Plaintext.name => V1.KafkaSecurityProtocol.Sasl_Plaintext.asRight case s => Left(s"$s is not a valid KafkaSecurityProtocol") } implicit val kafkaOffsetCommittingEncoder: Encoder[V1.KafkaOffsetCommitting] = deriveConfiguredEncoder implicit val kafkaOffsetCommittingDecoder: Decoder[V1.KafkaOffsetCommitting] = deriveConfiguredDecoder implicit val awsCredentialsEncoder: Encoder[V1.AwsCredentials] = deriveConfiguredEncoder implicit val awsCredentialsDecoder: Decoder[V1.AwsCredentials] = deriveConfiguredDecoder implicit val awsRegionEncoder: Encoder[V1.AwsRegion] = deriveConfiguredEncoder implicit val awsRegionDecoder: Decoder[V1.AwsRegion] = deriveConfiguredDecoder implicit val keepaliveProtocolEncoder: Encoder[V1.WebsocketSimpleStartupIngest.KeepaliveProtocol] = deriveConfiguredEncoder implicit val keepaliveProtocolDecoder: Decoder[V1.WebsocketSimpleStartupIngest.KeepaliveProtocol] = deriveConfiguredDecoder implicit val kinesisIteratorTypeEncoder: Encoder[V1.KinesisIngest.IteratorType] = deriveConfiguredEncoder implicit val kinesisIteratorTypeDecoder: Decoder[V1.KinesisIngest.IteratorType] = deriveConfiguredDecoder /** Encoder that preserves credential values for persistence and cluster communication. * Requires witness (`import Secret.Unsafe._`) to call. */ def awsCredentialsPreservingEncoder(implicit ev: Secret.UnsafeAccess): Encoder[V1.AwsCredentials] = { val preservingSecretEnc: Encoder[Secret] = SecretCodecs.preservingEncoder // Defined manually to avoid implicit scope collision of `Encoder[Secret]` Encoder.instance { creds => Json.obj( "accessKeyId" -> preservingSecretEnc(creds.accessKeyId), "secretAccessKey" -> preservingSecretEnc(creds.secretAccessKey), ) } } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/V1IngestSchemas.scala ================================================ package com.thatdot.quine.app.model.ingest2 import sttp.tapir.Schema import com.thatdot.common.security.Secret import com.thatdot.quine.{routes => V1} /** Tapir schemas for V1 routes types used by V2 ingest. * * These types are defined in quine-endpoints which doesn't have Tapir dependencies, * so schemas are provided here. * * For Circe codecs, see [[V1IngestCodecs]]. * * Usage: * {{{ * import com.thatdot.quine.app.model.ingest2.V1IngestSchemas._ * }}} */ object V1IngestSchemas { implicit lazy val csvCharacterSchema: Schema[V1.CsvCharacter] = Schema.derived implicit lazy val recordDecodingTypeSchema: Schema[V1.RecordDecodingType] = Schema.derived implicit lazy val fileIngestModeSchema: Schema[V1.FileIngestMode] = Schema.derived implicit lazy val kafkaSecurityProtocolSchema: Schema[V1.KafkaSecurityProtocol] = Schema.derived implicit lazy val kafkaAutoOffsetResetSchema: Schema[V1.KafkaAutoOffsetReset] = Schema.derived implicit lazy val kafkaOffsetCommittingSchema: Schema[V1.KafkaOffsetCommitting] = Schema.derived implicit lazy val secretSchema: Schema[Secret] = Schema.string.map((s: String) => Some(Secret(s)))(_.toString) implicit lazy val awsCredentialsSchema: Schema[V1.AwsCredentials] = Schema.derived implicit lazy val awsRegionSchema: Schema[V1.AwsRegion] = Schema.derived implicit lazy val keepaliveProtocolSchema: Schema[V1.WebsocketSimpleStartupIngest.KeepaliveProtocol] = Schema.derived implicit lazy val kinesisIteratorTypeSchema: Schema[V1.KinesisIngest.IteratorType] = Schema.derived implicit lazy val recordDecoderSeqSchema: Schema[Seq[V1.RecordDecodingType]] = Schema.schemaForArray(recordDecodingTypeSchema).map(a => Some(a.toSeq))(s => s.toArray) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/V1ToV2.scala ================================================ package com.thatdot.quine.app.model.ingest2 import com.thatdot.api.{v2 => api} import com.thatdot.quine.app.model.ingest2.{V2IngestEntities => V2} import com.thatdot.quine.{routes => V1} /** Converts V1 API types to V2 API types. */ object V1ToV2 { def apply(config: V1.SaslJaasConfig): api.SaslJaasConfig = config match { case V1.SaslJaasConfig.PlainLogin(username, password) => api.PlainLogin(username, password) case V1.SaslJaasConfig.ScramLogin(username, password) => api.ScramLogin(username, password) case V1.SaslJaasConfig.OAuthBearerLogin(clientId, clientSecret, scope, tokenEndpointUrl) => api.OAuthBearerLogin(clientId, clientSecret, scope, tokenEndpointUrl) } def apply( schedulerSourceSettings: V1.KinesisIngest.KinesisSchedulerSourceSettings, ): KinesisSchedulerSourceSettings = KinesisSchedulerSourceSettings( bufferSize = schedulerSourceSettings.bufferSize, backpressureTimeoutMillis = schedulerSourceSettings.backpressureTimeoutMillis, ) def apply( maybeSchedulerSourceSettings: Option[V1.KinesisIngest.KinesisSchedulerSourceSettings], ): KinesisSchedulerSourceSettings = maybeSchedulerSourceSettings.fold(KinesisSchedulerSourceSettings())(apply) def apply(checkpointSettings: V1.KinesisIngest.KinesisCheckpointSettings): KinesisCheckpointSettings = KinesisCheckpointSettings( disableCheckpointing = checkpointSettings.disableCheckpointing, maxBatchSize = checkpointSettings.maxBatchSize, maxBatchWaitMillis = checkpointSettings.maxBatchWaitMillis, ) def apply(maybeCheckpointSettings: Option[V1.KinesisIngest.KinesisCheckpointSettings]): KinesisCheckpointSettings = maybeCheckpointSettings.fold(KinesisCheckpointSettings())(apply) def apply(configsBuilder: V1.KinesisIngest.ConfigsBuilder): ConfigsBuilder = ConfigsBuilder( tableName = configsBuilder.tableName, workerIdentifier = configsBuilder.workerIdentifier, ) def apply(maybeConfigsBuilder: Option[V1.KinesisIngest.ConfigsBuilder]): ConfigsBuilder = maybeConfigsBuilder.fold(ConfigsBuilder())(apply) def apply(billingMode: V1.KinesisIngest.BillingMode): BillingMode = billingMode match { case V1.KinesisIngest.BillingMode.PROVISIONED => BillingMode.PROVISIONED case V1.KinesisIngest.BillingMode.PAY_PER_REQUEST => BillingMode.PAY_PER_REQUEST case V1.KinesisIngest.BillingMode.UNKNOWN_TO_SDK_VERSION => BillingMode.UNKNOWN_TO_SDK_VERSION } def apply(leaseManagementConfig: V1.KinesisIngest.LeaseManagementConfig): LeaseManagementConfig = LeaseManagementConfig( failoverTimeMillis = leaseManagementConfig.failoverTimeMillis, shardSyncIntervalMillis = leaseManagementConfig.shardSyncIntervalMillis, cleanupLeasesUponShardCompletion = leaseManagementConfig.cleanupLeasesUponShardCompletion, ignoreUnexpectedChildShards = leaseManagementConfig.ignoreUnexpectedChildShards, maxLeasesForWorker = leaseManagementConfig.maxLeasesForWorker, maxLeaseRenewalThreads = leaseManagementConfig.maxLeaseRenewalThreads, billingMode = leaseManagementConfig.billingMode.map(apply), initialLeaseTableReadCapacity = leaseManagementConfig.initialLeaseTableReadCapacity, initialLeaseTableWriteCapacity = leaseManagementConfig.initialLeaseTableWriteCapacity, reBalanceThresholdPercentage = leaseManagementConfig.reBalanceThresholdPercentage, dampeningPercentage = leaseManagementConfig.dampeningPercentage, allowThroughputOvershoot = leaseManagementConfig.allowThroughputOvershoot, disableWorkerMetrics = leaseManagementConfig.disableWorkerMetrics, maxThroughputPerHostKBps = leaseManagementConfig.maxThroughputPerHostKBps, isGracefulLeaseHandoffEnabled = leaseManagementConfig.isGracefulLeaseHandoffEnabled, gracefulLeaseHandoffTimeoutMillis = leaseManagementConfig.gracefulLeaseHandoffTimeoutMillis, ) def apply(maybeLeaseManagementConfig: Option[V1.KinesisIngest.LeaseManagementConfig]): LeaseManagementConfig = maybeLeaseManagementConfig.fold(LeaseManagementConfig())(apply) def apply( retrievalSpecificConfig: V1.KinesisIngest.RetrievalSpecificConfig, ): RetrievalSpecificConfig = retrievalSpecificConfig match { case fanOutConfig: V1.KinesisIngest.RetrievalSpecificConfig.FanOutConfig => apply(fanOutConfig) case pollingConfig: V1.KinesisIngest.RetrievalSpecificConfig.PollingConfig => apply(pollingConfig) } def apply( maybeRetrievalSpecificConfig: Option[V1.KinesisIngest.RetrievalSpecificConfig], ): Option[RetrievalSpecificConfig] = maybeRetrievalSpecificConfig.map(apply) def apply( fanOutConfig: V1.KinesisIngest.RetrievalSpecificConfig.FanOutConfig, ): RetrievalSpecificConfig.FanOutConfig = RetrievalSpecificConfig.FanOutConfig( consumerArn = fanOutConfig.consumerArn, consumerName = fanOutConfig.consumerName, maxDescribeStreamSummaryRetries = fanOutConfig.maxDescribeStreamSummaryRetries, maxDescribeStreamConsumerRetries = fanOutConfig.maxDescribeStreamConsumerRetries, registerStreamConsumerRetries = fanOutConfig.registerStreamConsumerRetries, retryBackoffMillis = fanOutConfig.retryBackoffMillis, ) def apply( pollingConfig: V1.KinesisIngest.RetrievalSpecificConfig.PollingConfig, ): RetrievalSpecificConfig.PollingConfig = RetrievalSpecificConfig.PollingConfig( maxRecords = pollingConfig.maxRecords, retryGetRecordsInSeconds = pollingConfig.retryGetRecordsInSeconds, maxGetRecordsThreadPool = pollingConfig.maxGetRecordsThreadPool, idleTimeBetweenReadsInMillis = pollingConfig.idleTimeBetweenReadsInMillis, ) def apply(processorConfig: V1.KinesisIngest.ProcessorConfig): ProcessorConfig = ProcessorConfig( callProcessRecordsEvenForEmptyRecordList = processorConfig.callProcessRecordsEvenForEmptyRecordList, ) def apply(maybeProcessorConfig: Option[V1.KinesisIngest.ProcessorConfig]): ProcessorConfig = maybeProcessorConfig.fold(ProcessorConfig())(apply) def apply(shardPrioritization: V1.KinesisIngest.ShardPrioritization): ShardPrioritization = shardPrioritization match { case V1.KinesisIngest.ShardPrioritization.NoOpShardPrioritization => ShardPrioritization.NoOpShardPrioritization case V1.KinesisIngest.ShardPrioritization.ParentsFirstShardPrioritization(maxDepth) => ShardPrioritization.ParentsFirstShardPrioritization(maxDepth) } def apply(clientVersionConfig: V1.KinesisIngest.ClientVersionConfig): ClientVersionConfig = clientVersionConfig match { case V1.KinesisIngest.ClientVersionConfig.CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X => ClientVersionConfig.CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X case V1.KinesisIngest.ClientVersionConfig.CLIENT_VERSION_CONFIG_3X => ClientVersionConfig.CLIENT_VERSION_CONFIG_3X } def apply(coordinatorConfig: V1.KinesisIngest.CoordinatorConfig): CoordinatorConfig = CoordinatorConfig( parentShardPollIntervalMillis = coordinatorConfig.parentShardPollIntervalMillis, skipShardSyncAtWorkerInitializationIfLeasesExist = coordinatorConfig.skipShardSyncAtWorkerInitializationIfLeasesExist, shardPrioritization = coordinatorConfig.shardPrioritization.map(apply), clientVersionConfig = coordinatorConfig.clientVersionConfig.map(apply), ) def apply(maybeCoordinatorConfig: Option[V1.KinesisIngest.CoordinatorConfig]): CoordinatorConfig = maybeCoordinatorConfig.fold(CoordinatorConfig())(apply) def apply(lifecycleConfig: V1.KinesisIngest.LifecycleConfig): LifecycleConfig = LifecycleConfig( taskBackoffTimeMillis = lifecycleConfig.taskBackoffTimeMillis, logWarningForTaskAfterMillis = lifecycleConfig.logWarningForTaskAfterMillis, ) def apply(maybeLifecycleConfig: Option[V1.KinesisIngest.LifecycleConfig]): LifecycleConfig = maybeLifecycleConfig.fold(LifecycleConfig())(apply) def apply(retrievalConfig: V1.KinesisIngest.RetrievalConfig): RetrievalConfig = RetrievalConfig( listShardsBackoffTimeInMillis = retrievalConfig.listShardsBackoffTimeInMillis, maxListShardsRetryAttempts = retrievalConfig.maxListShardsRetryAttempts, ) def apply(maybeRetrievalConfig: Option[V1.KinesisIngest.RetrievalConfig]): RetrievalConfig = maybeRetrievalConfig.fold(RetrievalConfig())(apply) def apply(metricsLevel: V1.KinesisIngest.MetricsLevel): MetricsLevel = metricsLevel match { case V1.KinesisIngest.MetricsLevel.NONE => MetricsLevel.NONE case V1.KinesisIngest.MetricsLevel.SUMMARY => MetricsLevel.SUMMARY case V1.KinesisIngest.MetricsLevel.DETAILED => MetricsLevel.DETAILED } def apply(metricsDimension: V1.KinesisIngest.MetricsDimension): MetricsDimension = metricsDimension match { case V1.KinesisIngest.MetricsDimension.OPERATION_DIMENSION_NAME => MetricsDimension.OPERATION_DIMENSION_NAME case V1.KinesisIngest.MetricsDimension.SHARD_ID_DIMENSION_NAME => MetricsDimension.SHARD_ID_DIMENSION_NAME case V1.KinesisIngest.MetricsDimension.STREAM_IDENTIFIER => MetricsDimension.STREAM_IDENTIFIER case V1.KinesisIngest.MetricsDimension.WORKER_IDENTIFIER => MetricsDimension.WORKER_IDENTIFIER } def apply(metricsConfig: V1.KinesisIngest.MetricsConfig): MetricsConfig = MetricsConfig( metricsBufferTimeMillis = metricsConfig.metricsBufferTimeMillis, metricsMaxQueueSize = metricsConfig.metricsMaxQueueSize, metricsLevel = metricsConfig.metricsLevel.map(apply), metricsEnabledDimensions = metricsConfig.metricsEnabledDimensions.map(_.map(apply)), ) def apply(maybeMetricsConfig: Option[V1.KinesisIngest.MetricsConfig]): MetricsConfig = maybeMetricsConfig.fold(MetricsConfig())(apply) def apply(advancedSettings: V1.KinesisIngest.KCLConfiguration): KCLConfiguration = KCLConfiguration( configsBuilder = V1ToV2(advancedSettings.configsBuilder), leaseManagementConfig = V1ToV2(advancedSettings.leaseManagementConfig), retrievalSpecificConfig = V1ToV2(advancedSettings.retrievalSpecificConfig), processorConfig = V1ToV2(advancedSettings.processorConfig), coordinatorConfig = V1ToV2(advancedSettings.coordinatorConfig), lifecycleConfig = V1ToV2(advancedSettings.lifecycleConfig), retrievalConfig = V1ToV2(advancedSettings.retrievalConfig), metricsConfig = V1ToV2(advancedSettings.metricsConfig), ) def apply(advancedSettings: Option[V1.KinesisIngest.KCLConfiguration]): KCLConfiguration = advancedSettings.fold(KCLConfiguration())(apply) def apply(initialPosition: V1.KinesisIngest.InitialPosition): InitialPosition = initialPosition match { case V1.KinesisIngest.InitialPosition.TrimHorizon => InitialPosition.TrimHorizon case V1.KinesisIngest.InitialPosition.Latest => InitialPosition.Latest case V1.KinesisIngest.InitialPosition.AtTimestamp(year, month, day, hour, minute, second) => InitialPosition.AtTimestamp(year, month, day, hour, minute, second) } def apply(stats: V1.IngestStreamStats): V2.IngestStreamStats = V2.IngestStreamStats( ingestedCount = stats.ingestedCount, rates = apply(stats.rates), byteRates = apply(stats.byteRates), startTime = stats.startTime, totalRuntime = stats.totalRuntime, ) def apply(summary: V1.RatesSummary): V2.RatesSummary = V2.RatesSummary( count = summary.count, oneMinute = summary.oneMinute, fiveMinute = summary.fiveMinute, fifteenMinute = summary.fifteenMinute, overall = summary.overall, ) def apply(status: V1.IngestStreamStatus): V2.IngestStreamStatus = status match { case V1.IngestStreamStatus.Running => V2.IngestStreamStatus.Running case V1.IngestStreamStatus.Paused => V2.IngestStreamStatus.Paused case V1.IngestStreamStatus.Restored => V2.IngestStreamStatus.Restored case V1.IngestStreamStatus.Completed => V2.IngestStreamStatus.Completed case V1.IngestStreamStatus.Terminated => V2.IngestStreamStatus.Terminated case V1.IngestStreamStatus.Failed => V2.IngestStreamStatus.Failed } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/V2IngestEntities.scala ================================================ package com.thatdot.quine.app.model.ingest2 import java.time.Instant import scala.util.{Failure, Success, Try} import io.circe.generic.extras.semiauto.{deriveConfiguredDecoder, deriveConfiguredEncoder} import io.circe.{Decoder, Encoder} import sttp.tapir.Schema import sttp.tapir.Schema.annotations.{description, title} import com.thatdot.api.v2.TypeDiscriminatorConfig.instances.circeConfig import com.thatdot.api.v2.codec.ThirdPartyCodecs.jdk.{instantDecoder, instantEncoder} import com.thatdot.common.logging.Log.LazySafeLogging import com.thatdot.common.security.Secret import com.thatdot.quine.app.v2api.definitions.ingest2.ApiIngest.OnRecordErrorHandler import com.thatdot.quine.serialization.EncoderDecoder import com.thatdot.quine.{routes => V1} import com.thatdot.{api => api} /** Base trait for all ingest formats. */ sealed trait IngestFormat object IngestFormat { implicit lazy val schema: Schema[IngestFormat] = Schema .derived[IngestFormat] .description("Ingest format") /** Encoder for the IngestFormat union type. */ implicit lazy val encoder: Encoder[IngestFormat] = Encoder.instance { case f: FileFormat => FileFormat.encoder(f) case s: StreamingFormat => StreamingFormat.encoder(s) } /** Decoder for the IngestFormat union type. * * Note: This decoder has an inherent ambiguity for JsonFormat because both FileFormat.JsonFormat * and StreamingFormat.JsonFormat serialize to `{"type": "JsonFormat"}`. This decoder tries * FileFormat first, so `{"type": "JsonFormat"}` always decodes to FileFormat.JsonFormat. * * This is not a problem in practice because runtime code uses specific types (FileFormat or * StreamingFormat) based on the IngestSource subtype, not this union decoder. */ implicit lazy val decoder: Decoder[IngestFormat] = FileFormat.decoder.map(f => f: IngestFormat).or(StreamingFormat.decoder.map(s => s: IngestFormat)) } /** Data format that reads a single value from an externally delimited frame. */ sealed trait StreamingFormat extends IngestFormat object StreamingFormat { case object JsonFormat extends StreamingFormat case object RawFormat extends StreamingFormat final case class ProtobufFormat( schemaUrl: String, typeName: String, ) extends StreamingFormat object ProtobufFormat { implicit lazy val schema: Schema[ProtobufFormat] = Schema.derived } case class AvroFormat( schemaUrl: String, ) extends StreamingFormat case object DropFormat extends StreamingFormat def apply(v1Format: V1.StreamedRecordFormat): StreamingFormat = v1Format match { case V1.StreamedRecordFormat.CypherJson(_, _) => JsonFormat case V1.StreamedRecordFormat.CypherRaw(_, _) => RawFormat case V1.StreamedRecordFormat.CypherProtobuf(_, _, schemaUrl, typeName) => ProtobufFormat(schemaUrl, typeName) //note : Avro is not supported in v1 case V1.StreamedRecordFormat.Drop => DropFormat case _ => sys.error(s"Unsupported version 1 format: $v1Format") } implicit lazy val schema: Schema[StreamingFormat] = Schema.derived implicit lazy val encoder: Encoder[StreamingFormat] = deriveConfiguredEncoder implicit lazy val decoder: Decoder[StreamingFormat] = deriveConfiguredDecoder } @title("File Ingest Format") @description("Format for decoding a stream of elements from a file for ingest.") sealed trait FileFormat extends IngestFormat object FileFormat { import V1IngestSchemas.csvCharacterSchema /** Read each line in as a single string element. */ case object LineFormat extends FileFormat /** Read each line as a JSON value */ case object JsonLinesFormat extends FileFormat case object JsonFormat extends FileFormat /** Comma (or other delimiter) separated values. Each line is a record, separated by a field delimiter. */ case class CsvFormat( headers: Either[Boolean, List[String]] = Left(false), delimiter: V1.CsvCharacter = V1.CsvCharacter.Comma, quoteChar: V1.CsvCharacter = V1.CsvCharacter.DoubleQuote, escapeChar: V1.CsvCharacter = V1.CsvCharacter.Backslash, ) extends FileFormat { require(delimiter != quoteChar, "Different characters must be used for `delimiter` and `quoteChar`.") require(delimiter != escapeChar, "Different characters must be used for `delimiter` and `escapeChar`.") require(quoteChar != escapeChar, "Different characters must be used for `quoteChar` and `escapeChar`.") } object CsvFormat { import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder} import V1IngestCodecs.{csvCharacterEncoder, csvCharacterDecoder} // Explicit Either codec for headers field implicit private val headersEncoder: Encoder[Either[Boolean, List[String]]] = Encoder.instance { case Left(b) => io.circe.Json.fromBoolean(b) case Right(l) => io.circe.Json.arr(l.map(io.circe.Json.fromString): _*) } implicit private val headersDecoder: Decoder[Either[Boolean, List[String]]] = Decoder.instance { c => c.as[Boolean].map(Left(_)).orElse(c.as[List[String]].map(Right(_))) } implicit lazy val schema: Schema[CsvFormat] = Schema.derived implicit lazy val encoder: Encoder[CsvFormat] = deriveEncoder implicit lazy val decoder: Decoder[CsvFormat] = deriveDecoder } def apply(v1Format: V1.FileIngestFormat): FileFormat = v1Format match { case V1.FileIngestFormat.CypherLine(_, _) => LineFormat case V1.FileIngestFormat.CypherJson(_, _) => JsonLinesFormat case V1.FileIngestFormat.CypherCsv(_, _, headers, delimiter, quoteChar, escapeChar) => CsvFormat(headers, delimiter, quoteChar, escapeChar) case _ => sys.error(s"Unsupported version 1 format: $v1Format") } implicit lazy val schema: Schema[FileFormat] = Schema.derived implicit lazy val encoder: Encoder[FileFormat] = deriveConfiguredEncoder implicit lazy val decoder: Decoder[FileFormat] = deriveConfiguredDecoder } object V2IngestEntities { /** Ingest definition and status representation used for persistence */ final case class QuineIngestStreamWithStatus( config: QuineIngestConfiguration, status: Option[V1.IngestStreamStatus], ) object QuineIngestStreamWithStatus { import V1IngestCodecs.{ingestStreamStatusEncoder, ingestStreamStatusDecoder} implicit lazy val encoder: Encoder[QuineIngestStreamWithStatus] = deriveConfiguredEncoder implicit lazy val decoder: Decoder[QuineIngestStreamWithStatus] = deriveConfiguredDecoder implicit lazy val encoderDecoder: EncoderDecoder[QuineIngestStreamWithStatus] = EncoderDecoder.ofEncodeDecode /** Encoder that preserves credential values for persistence. * Requires witness (`import Secret.Unsafe._`) to call. */ def preservingEncoder(implicit ev: Secret.UnsafeAccess): Encoder[QuineIngestStreamWithStatus] = { // Use preserving encoder for configuration that may contain secrets implicit val quineIngestConfigurationEncoder: Encoder[QuineIngestConfiguration] = QuineIngestConfiguration.preservingEncoder deriveConfiguredEncoder } } case class IngestStreamInfo( status: IngestStreamStatus, message: Option[String], settings: IngestSource, stats: IngestStreamStats, ) { def withName(name: String): IngestStreamInfoWithName = IngestStreamInfoWithName(name, status, message, settings, stats) } object IngestStreamInfo { implicit lazy val encoder: Encoder[IngestStreamInfo] = deriveConfiguredEncoder implicit lazy val decoder: Decoder[IngestStreamInfo] = deriveConfiguredDecoder } case class IngestStreamInfoWithName( name: String, status: IngestStreamStatus, message: Option[String], settings: IngestSource, stats: IngestStreamStats, ) object IngestStreamInfoWithName { implicit lazy val encoder: Encoder[IngestStreamInfoWithName] = deriveConfiguredEncoder implicit lazy val decoder: Decoder[IngestStreamInfoWithName] = deriveConfiguredDecoder } sealed trait IngestStreamStatus object IngestStreamStatus { case object Running extends IngestStreamStatus case object Paused extends IngestStreamStatus case object Restored extends IngestStreamStatus case object Completed extends IngestStreamStatus case object Terminated extends IngestStreamStatus case object Failed extends IngestStreamStatus implicit val encoder: Encoder[IngestStreamStatus] = deriveConfiguredEncoder implicit val decoder: Decoder[IngestStreamStatus] = deriveConfiguredDecoder } sealed trait ValvePosition object ValvePosition { case object Open extends ValvePosition case object Closed extends ValvePosition } case class IngestStreamStats( ingestedCount: Long, rates: RatesSummary, byteRates: RatesSummary, startTime: Instant, totalRuntime: Long, ) object IngestStreamStats { implicit val encoder: Encoder[IngestStreamStats] = deriveConfiguredEncoder implicit val decoder: Decoder[IngestStreamStats] = deriveConfiguredDecoder } case class RatesSummary( count: Long, oneMinute: Double, fiveMinute: Double, fifteenMinute: Double, overall: Double, ) object RatesSummary { implicit val encoder: Encoder[RatesSummary] = deriveConfiguredEncoder implicit val decoder: Decoder[RatesSummary] = deriveConfiguredDecoder } sealed trait OnStreamErrorHandler object OnStreamErrorHandler { implicit lazy val schema: Schema[OnStreamErrorHandler] = Schema.derived implicit val encoder: Encoder[OnStreamErrorHandler] = deriveConfiguredEncoder implicit val decoder: Decoder[OnStreamErrorHandler] = deriveConfiguredDecoder } @title("Retry Stream Error Handler") @description("Retry the stream on failure.") case class RetryStreamError(retryCount: Int) extends OnStreamErrorHandler @title("Log Stream Error Handler") @description("If the stream fails log a message but do not retry.") case object LogStreamError extends OnStreamErrorHandler /** Enforce shared structure between quine and novelty ingest usages. */ trait V2IngestConfiguration { val source: IngestSource val parallelism: Int val maxPerSecond: Option[Int] val onRecordError: OnRecordErrorHandler val onStreamError: OnStreamErrorHandler } sealed trait Transformation object Transformation { case class JavaScript( /* JavaScript source code of the function, must be callable */ function: String, ) extends Transformation implicit lazy val schema: Schema[Transformation] = Schema.derived implicit val encoder: Encoder[Transformation] = deriveConfiguredEncoder implicit val decoder: Decoder[Transformation] = deriveConfiguredDecoder } case class QuineIngestConfiguration( name: String, source: IngestSource, query: String, parameter: String = "that", transformation: Option[Transformation] = None, parallelism: Int = V1.IngestRoutes.defaultWriteParallelism, maxPerSecond: Option[Int] = None, onRecordError: OnRecordErrorHandler = OnRecordErrorHandler(), onStreamError: OnStreamErrorHandler = LogStreamError, ) extends V2IngestConfiguration with LazySafeLogging { def asV1IngestStreamConfiguration: V1.IngestStreamConfiguration = { def asV1StreamedRecordFormat(format: StreamingFormat): Try[V1.StreamedRecordFormat] = format match { case StreamingFormat.JsonFormat => Success(V1.StreamedRecordFormat.CypherJson(query, parameter)) case StreamingFormat.RawFormat => Success(V1.StreamedRecordFormat.CypherRaw(query, parameter)) case StreamingFormat.ProtobufFormat(schemaUrl, typeName) => Success(V1.StreamedRecordFormat.CypherProtobuf(query, parameter, schemaUrl, typeName)) case _: StreamingFormat.AvroFormat => Failure( new UnsupportedOperationException( "Avro is not supported in Api V1", ), ) case _: StreamingFormat.DropFormat.type => Success(V1.StreamedRecordFormat.Drop) } def asV1FileIngestFormat(format: FileFormat): Try[V1.FileIngestFormat] = format match { case FileFormat.LineFormat => Success(V1.FileIngestFormat.CypherLine(query, parameter)) case FileFormat.JsonFormat | FileFormat.JsonLinesFormat => Success(V1.FileIngestFormat.CypherJson(query, parameter)) case FileFormat.CsvFormat(headers, delimiter, quoteChar, escapeChar) => Success(V1.FileIngestFormat.CypherCsv(query, parameter, headers, delimiter, quoteChar, escapeChar)) } val tryConfig: Try[V1.IngestStreamConfiguration] = source match { case FileIngest(format, path, fileIngestMode, maximumLineSize, startOffset, limit, charset, _) => asV1FileIngestFormat(format).map { fmt => V1.FileIngest( fmt, path, charset.name(), parallelism, maximumLineSize.getOrElse(Integer.MAX_VALUE), startOffset, limit, maxPerSecond, fileIngestMode, ) } case S3Ingest(format, bucket, key, credentials, maximumLineSize, startOffset, limit, charset, _) => // last param recordDecoders unsupported in V1 asV1FileIngestFormat(format).map { fmt => V1.S3Ingest( fmt, bucket, key, charset.name(), parallelism, credentials, maximumLineSize.getOrElse(Integer.MAX_VALUE), startOffset, limit, maxPerSecond, ) } case StdInputIngest(format, maximumLineSize, charset) => asV1FileIngestFormat(format).map { fmt => V1.StandardInputIngest( fmt, charset.name(), parallelism, maximumLineSize.getOrElse(Integer.MAX_VALUE), maxPerSecond, ) } case NumberIteratorIngest(_, startOffset, limit) => Success( V1.NumberIteratorIngest(V1.IngestRoutes.defaultNumberFormat, startOffset, limit, maxPerSecond, parallelism), ) case WebsocketIngest(format, url, initMessages, keepAlive, charset) => asV1StreamedRecordFormat(format).map { fmt => V1.WebsocketSimpleStartupIngest( fmt, url, initMessages, keepAlive, parallelism, charset.name(), ) } case KinesisIngest( format, streamName, shardIds, credentials, region, iteratorType, numRetries, recordDecoders, ) => asV1StreamedRecordFormat(format).map { fmt => V1.KinesisIngest( fmt, streamName, shardIds, parallelism, credentials, region, iteratorType, numRetries, maxPerSecond, recordDecoders, ) } case ServerSentEventIngest(format, url, recordDecoders) => asV1StreamedRecordFormat(format).map { fmt => V1.ServerSentEventsIngest(fmt, url, parallelism, maxPerSecond, recordDecoders) } case SQSIngest(format, queueUrl, readParallelism, credentials, region, deleteReadMessages, recordDecoders) => asV1StreamedRecordFormat(format).map { fmt => V1.SQSIngest( fmt, queueUrl, readParallelism, parallelism, credentials, region, deleteReadMessages, maxPerSecond, recordDecoders, ) } case KafkaIngest( format, topics, bootstrapServers, groupId, securityProtocol, offsetCommitting, autoOffsetReset, sslKeystorePassword, sslTruststorePassword, sslKeyPassword, saslJaasConfig, kafkaProperties, endingOffset, recordDecoders, ) => asV1StreamedRecordFormat(format).map { fmt => V1.KafkaIngest( fmt, topics, parallelism, bootstrapServers, groupId, securityProtocol, offsetCommitting, autoOffsetReset, kafkaProperties, endingOffset, maxPerSecond, recordDecoders, sslKeystorePassword, sslTruststorePassword, sslKeyPassword, saslJaasConfig.map { case api.v2.PlainLogin(username, password) => V1.SaslJaasConfig.PlainLogin(username, password) case api.v2.ScramLogin(username, password) => V1.SaslJaasConfig.ScramLogin(username, password) case api.v2.OAuthBearerLogin(clientId, clientSecret, _, _) => V1.SaslJaasConfig.OAuthBearerLogin(clientId, clientSecret) }, ) } case _: KinesisKclIngest => Failure(new Exception("v2 KCL Kinesis unsupported in v1 ingests")) case _: ReactiveStreamIngest => Failure(new Exception("Reactive Streams unsupported in v1 ingests")) case _: WebSocketFileUpload => Failure(new Exception("WebSocket File Upload unsupported in v1 ingests")) } tryConfig match { case Success(v1Config) => v1Config case Failure(_) => /* Note: This value is only here in the case that we're trying to render v2 ingests in the v1 api where we need to convert them to the v1 format. In these cases if we've created a v2 ingest that's not render-able as a v1 configuration this returns an empty placeholder object so that the api doesn't throw a 500. Note that creating this situation is only possible by creating an ingest in the v2 api and then trying to view it via the v1 api. */ V1.StandardInputIngest( V1.FileIngestFormat.CypherLine("Unrenderable", "Unrenderable"), "UTF-8", 0, 0, None, ) } } } object QuineIngestConfiguration { implicit lazy val encoder: Encoder[QuineIngestConfiguration] = deriveConfiguredEncoder implicit lazy val decoder: Decoder[QuineIngestConfiguration] = deriveConfiguredDecoder implicit lazy val encoderDecoder: EncoderDecoder[QuineIngestConfiguration] = EncoderDecoder.ofEncodeDecode /** Encoder that preserves credential values for persistence. * Requires witness (`import Secret.Unsafe._`) to call. */ def preservingEncoder(implicit ev: Secret.UnsafeAccess): Encoder[QuineIngestConfiguration] = { // Use preserving encoders for components that contain secrets implicit val ingestSourceEncoder: Encoder[IngestSource] = IngestSource.preservingEncoder implicit val onRecordErrorEncoder: Encoder[OnRecordErrorHandler] = OnRecordErrorHandler.preservingEncoder deriveConfiguredEncoder } } /** WebSocket file upload feedback messages sent from server to client */ object WebSocketFileUploadFeedback { /** Type of JSON message sent back in a WebSocket file upload stream */ sealed trait FeedbackMessage /** Acknowledgement that WebSocket connection is established */ case object Ack extends FeedbackMessage /** Progress update indicating number of records processed */ final case class Progress(count: Long) extends FeedbackMessage /** Error occurred during processing */ final case class Error(message: String, index: Option[Long], record: Option[String]) extends FeedbackMessage /** Upload complete with guaranteed final record count */ final case class Complete(finalCount: Long) extends FeedbackMessage object FeedbackMessage { import io.circe.generic.extras.semiauto implicit val feedbackMessageEncoder: Encoder[FeedbackMessage] = semiauto.deriveConfiguredEncoder } } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/V2IngestSources.scala ================================================ package com.thatdot.quine.app.model.ingest2 import java.nio.charset.Charset import java.time.Instant import io.circe.generic.extras.semiauto.{ deriveConfiguredDecoder, deriveConfiguredEncoder, deriveEnumerationDecoder, deriveEnumerationEncoder, } import io.circe.{Decoder, Encoder} import sttp.tapir.Schema import sttp.tapir.Schema.annotations.{description, title} import com.thatdot.api.codec.SecretCodecs import com.thatdot.api.v2.SaslJaasConfig import com.thatdot.api.v2.TypeDiscriminatorConfig.instances.circeConfig import com.thatdot.common.security.Secret import com.thatdot.quine.app.routes.UnifiedIngestConfiguration import com.thatdot.quine.app.util.StringOps.syntax.MultilineTransforms import com.thatdot.quine.{routes => V1} /** Ingest supports charset specification. */ trait IngestCharsetSupport { val characterEncoding: Charset } /** Ingest supports start and end bounding. */ trait IngestBoundingSupport { val startOffset: Long val limit: Option[Long] } /** Ingest supports decompression (e.g. Base64, gzip, zip) */ trait IngestDecompressionSupport { def recordDecoders: Seq[V1.RecordDecodingType] } @title("Ingest source") sealed trait IngestSource { def format: IngestFormat } sealed trait FileIngestSource extends IngestSource { def format: FileFormat } sealed trait StreamingIngestSource extends IngestSource { def format: StreamingFormat } object IngestSource { import V1IngestCodecs._ import com.thatdot.api.v2.codec.DisjointEither.syntax._ import com.thatdot.api.v2.codec.DisjointEvidence._ import com.thatdot.api.v2.codec.ThirdPartyCodecs.jdk.{charsetDecoder, charsetEncoder} import com.thatdot.api.v2.schema.ThirdPartySchemas.jdk.charsetSchema def apply(config: UnifiedIngestConfiguration): IngestSource = config.config match { case Left(v2) => v2.source case Right(v1) => IngestSource(v1) } def apply(ingest: V1.IngestStreamConfiguration): IngestSource = ingest match { case ingest: V1.KafkaIngest => KafkaIngest( StreamingFormat(ingest.format), ingest.topics, ingest.bootstrapServers, ingest.groupId, ingest.securityProtocol, ingest.offsetCommitting, ingest.autoOffsetReset, sslKeystorePassword = None, // V1 doesn't have typed secret params sslTruststorePassword = None, sslKeyPassword = None, saslJaasConfig = None, ingest.kafkaProperties, ingest.endingOffset, ingest.recordDecoders, ) case ingest: V1.KinesisIngest => KinesisIngest( StreamingFormat(ingest.format), ingest.streamName, ingest.shardIds, ingest.credentials, ingest.region, ingest.iteratorType, ingest.numRetries, ingest.recordDecoders, ) case ingest: V1.ServerSentEventsIngest => ServerSentEventIngest( StreamingFormat(ingest.format), ingest.url, ingest.recordDecoders, ) case ingest: V1.SQSIngest => SQSIngest( StreamingFormat(ingest.format), ingest.queueUrl, ingest.readParallelism, ingest.credentials, ingest.region, ingest.deleteReadMessages, ingest.recordDecoders, ) case ingest: V1.WebsocketSimpleStartupIngest => WebsocketIngest( StreamingFormat(ingest.format), ingest.url, ingest.initMessages, ingest.keepAlive, Charset.forName(ingest.encoding), ) case ingest: V1.FileIngest => FileIngest( FileFormat(ingest.format), ingest.path, ingest.fileIngestMode, Some(ingest.maximumLineSize), ingest.startAtOffset, ingest.ingestLimit, Charset.forName(ingest.encoding), ) case ingest: V1.S3Ingest => S3Ingest( FileFormat(ingest.format), ingest.bucket, ingest.key, ingest.credentials, Some(ingest.maximumLineSize), ingest.startAtOffset, ingest.ingestLimit, Charset.forName(ingest.encoding), ) case ingest: V1.StandardInputIngest => StdInputIngest( FileFormat(ingest.format), Some(ingest.maximumLineSize), Charset.forName(ingest.encoding), ) case ingest: V1.NumberIteratorIngest => NumberIteratorIngest( // Can't convert from a FileFormat to a StreamingFormat, // but a format doesn't make sense for NumberIteratorIngest anyway StreamingFormat.RawFormat, ingest.startAtOffset, ingest.ingestLimit, ) case V1.KinesisKCLIngest( format, applicationName, kinesisStreamName, _, credentials, region, initialPosition, numRetries, _, recordDecoders, schedulerSourceSettings, checkpointSettings, advancedSettings, ) => KinesisKclIngest( kinesisStreamName = kinesisStreamName, applicationName = applicationName, format = StreamingFormat(format), credentialsOpt = credentials, regionOpt = region, initialPosition = V1ToV2(initialPosition), numRetries = numRetries, recordDecoders = recordDecoders, schedulerSourceSettings = V1ToV2(schedulerSourceSettings), checkpointSettings = V1ToV2(checkpointSettings), advancedSettings = V1ToV2(advancedSettings), ) } implicit lazy val schema: Schema[IngestSource] = { import V1IngestSchemas._ Schema.derived } implicit lazy val fileIngestEncoder: Encoder[FileIngest] = deriveConfiguredEncoder implicit lazy val fileIngestDecoder: Decoder[FileIngest] = deriveConfiguredDecoder implicit lazy val s3IngestEncoder: Encoder[S3Ingest] = deriveConfiguredEncoder implicit lazy val s3IngestDecoder: Decoder[S3Ingest] = deriveConfiguredDecoder implicit lazy val reactiveStreamIngestEncoder: Encoder[ReactiveStreamIngest] = deriveConfiguredEncoder implicit lazy val reactiveStreamIngestDecoder: Decoder[ReactiveStreamIngest] = deriveConfiguredDecoder implicit lazy val webSocketFileUploadEncoder: Encoder[WebSocketFileUpload] = deriveConfiguredEncoder implicit lazy val webSocketFileUploadDecoder: Decoder[WebSocketFileUpload] = deriveConfiguredDecoder implicit lazy val stdInputIngestEncoder: Encoder[StdInputIngest] = deriveConfiguredEncoder implicit lazy val stdInputIngestDecoder: Decoder[StdInputIngest] = deriveConfiguredDecoder implicit lazy val numberIteratorIngestEncoder: Encoder[NumberIteratorIngest] = deriveConfiguredEncoder implicit lazy val numberIteratorIngestDecoder: Decoder[NumberIteratorIngest] = deriveConfiguredDecoder implicit lazy val websocketIngestEncoder: Encoder[WebsocketIngest] = deriveConfiguredEncoder implicit lazy val websocketIngestDecoder: Decoder[WebsocketIngest] = deriveConfiguredDecoder implicit lazy val kinesisIngestEncoder: Encoder[KinesisIngest] = deriveConfiguredEncoder implicit lazy val kinesisIngestDecoder: Decoder[KinesisIngest] = deriveConfiguredDecoder implicit lazy val kinesisKclIngestEncoder: Encoder[KinesisKclIngest] = deriveConfiguredEncoder implicit lazy val kinesisKclIngestDecoder: Decoder[KinesisKclIngest] = deriveConfiguredDecoder implicit lazy val serverSentEventIngestEncoder: Encoder[ServerSentEventIngest] = deriveConfiguredEncoder implicit lazy val serverSentEventIngestDecoder: Decoder[ServerSentEventIngest] = deriveConfiguredDecoder implicit lazy val sqsIngestEncoder: Encoder[SQSIngest] = deriveConfiguredEncoder implicit lazy val sqsIngestDecoder: Decoder[SQSIngest] = deriveConfiguredDecoder implicit lazy val kafkaIngestEncoder: Encoder[KafkaIngest] = { import SecretCodecs.secretEncoder deriveConfiguredEncoder } implicit lazy val kafkaIngestDecoder: Decoder[KafkaIngest] = { import SecretCodecs.secretDecoder deriveConfiguredDecoder } implicit lazy val encoder: Encoder[IngestSource] = deriveConfiguredEncoder implicit lazy val decoder: Decoder[IngestSource] = deriveConfiguredDecoder /** Encoder that preserves credential values for persistence and cluster communication. * Requires witness (`import Secret.Unsafe._`) to call. */ def preservingEncoder(implicit ev: Secret.UnsafeAccess): Encoder[IngestSource] = IngestSourcePreservingCodecs.encoder } /** Separate object to derive preserving encoders for persistence and cluster communication (without implicit conflicts). */ private object IngestSourcePreservingCodecs { import io.circe.generic.extras.semiauto.deriveConfiguredEncoder import com.thatdot.api.codec.SecretCodecs import com.thatdot.api.v2.TypeDiscriminatorConfig.instances.circeConfig import com.thatdot.api.v2.codec.ThirdPartyCodecs.jdk.charsetEncoder import com.thatdot.api.v2.codec.DisjointEither.syntax._ import com.thatdot.api.v2.codec.DisjointEvidence._ import V1IngestCodecs.{ csvCharacterEncoder, fileIngestModeEncoder, kafkaAutoOffsetResetEncoder, kafkaOffsetCommittingEncoder, kafkaSecurityProtocolEncoder, keepaliveProtocolEncoder, kinesisIteratorTypeEncoder, recordDecodingTypeEncoder, } def encoder(implicit ev: Secret.UnsafeAccess): Encoder[IngestSource] = { // Shadow the redacting encoders with preserving versions implicit val secretEncoder: Encoder[Secret] = SecretCodecs.preservingEncoder implicit val saslJaasConfigEncoder: Encoder[SaslJaasConfig] = SaslJaasConfig.preservingEncoder implicit val awsCredentialsEncoder: Encoder[V1.AwsCredentials] = V1IngestCodecs.awsCredentialsPreservingEncoder implicit val awsRegionEncoder: Encoder[V1.AwsRegion] = V1IngestCodecs.awsRegionEncoder // Derive encoders for subtypes that contain secrets implicit val sqsIngestEncoder: Encoder[SQSIngest] = deriveConfiguredEncoder implicit val kinesisIngestEncoder: Encoder[KinesisIngest] = deriveConfiguredEncoder implicit val kinesisKclIngestEncoder: Encoder[KinesisKclIngest] = deriveConfiguredEncoder implicit val s3IngestEncoder: Encoder[S3Ingest] = deriveConfiguredEncoder implicit val fileIngestEncoder: Encoder[FileIngest] = deriveConfiguredEncoder implicit val stdInputIngestEncoder: Encoder[StdInputIngest] = deriveConfiguredEncoder implicit val numberIteratorIngestEncoder: Encoder[NumberIteratorIngest] = deriveConfiguredEncoder implicit val websocketIngestEncoder: Encoder[WebsocketIngest] = deriveConfiguredEncoder implicit val serverSentEventIngestEncoder: Encoder[ServerSentEventIngest] = deriveConfiguredEncoder implicit val kafkaIngestEncoder: Encoder[KafkaIngest] = deriveConfiguredEncoder implicit val reactiveStreamIngestEncoder: Encoder[ReactiveStreamIngest] = deriveConfiguredEncoder implicit val webSocketFileUploadEncoder: Encoder[WebSocketFileUpload] = deriveConfiguredEncoder deriveConfiguredEncoder[IngestSource] } } @title("File Ingest") @description("An active stream of data being ingested from a file on this Quine host.") case class FileIngest( @description("Format used to decode each incoming line from a file.") format: FileFormat, @description("Local file path.") path: String, fileIngestMode: Option[V1.FileIngestMode], @description("Maximum size (in bytes) of any line in the file.") maximumLineSize: Option[Int] = None, @description( s"""Begin processing at the record with the given index. Useful for skipping some number of lines (e.g. CSV headers) or |resuming ingest from a partially consumed file.""".asOneLine, ) startOffset: Long, @description(s"Optionally limit how many records are ingested from this file.") limit: Option[Long], @description( "The text encoding scheme for the file. UTF-8, US-ASCII and ISO-8859-1 are " + "supported -- other encodings will transcoded to UTF-8 on the fly (and ingest may be slower).", ) characterEncoding: Charset, @description( "List of decodings to be applied to each input. The specified decodings are applied in declared array order.", ) recordDecoders: Seq[V1.RecordDecodingType] = Seq(), ) extends FileIngestSource with IngestCharsetSupport with IngestBoundingSupport with IngestDecompressionSupport @title("S3 File ingest") @description( """An ingest stream from a file in S3, newline delimited. This ingest source is |experimental and is subject to change without warning. In particular, there are |known issues with durability when the stream is inactive for at least 1 minute.""".asOneLine, ) case class S3Ingest( @description("Format used to decode each incoming line from a file in S3.") format: FileFormat, bucket: String, @description("S3 file name.") key: String, @description("AWS credentials to apply to this request.") credentials: Option[V1.AwsCredentials], @description("Maximum size (in bytes) of any line in the file.") maximumLineSize: Option[Int] = None, @description( s"""Begin processing at the record with the given index. Useful for skipping some number of lines (e.g. CSV headers) or |resuming ingest from a partially consumed file.""".asOneLine, ) startOffset: Long, @description(s"Optionally limit how many records are ingested from this file.") limit: Option[Long], @description( "text encoding used to read the file. Only UTF-8, US-ASCII and ISO-8859-1 are directly " + "supported -- other encodings will transcoded to UTF-8 on the fly (and ingest may be slower).", ) characterEncoding: Charset, @description( "List of decodings to be applied to each input. The specified decodings are applied in declared array order.", ) recordDecoders: Seq[V1.RecordDecodingType] = Seq(), ) extends FileIngestSource with IngestCharsetSupport with IngestBoundingSupport with IngestDecompressionSupport case class ReactiveStreamIngest( format: StreamingFormat, url: String, port: Int, ) extends IngestSource @title("WebSocket File Upload") @description("Streamed file upload via WebSocket protocol.") case class WebSocketFileUpload( @description("File format") format: FileFormat, ) extends FileIngestSource @title("Standard Input Ingest Stream") @description("An active stream of data being ingested from standard input to this Quine process.") case class StdInputIngest( @description("Format used to decode each incoming line from stdIn.") format: FileFormat, @description("Maximum size (in bytes) of any line in the file.") maximumLineSize: Option[Int] = None, @description( "text encoding used to read the file. Only UTF-8, US-ASCII and ISO-8859-1 are directly " + "supported -- other encodings will transcoded to UTF-8 on the fly (and ingest may be slower).", ) characterEncoding: Charset, ) extends FileIngestSource with IngestCharsetSupport @title("Number Iterator Ingest") @description( "An infinite ingest stream which requires no data source and just produces new sequential numbers" + " every time the stream is (re)started. The numbers are Java `Long`s` and will wrap at their max value.", ) case class NumberIteratorIngest( format: StreamingFormat, @description("Begin the stream with this number.") startOffset: Long = 0L, @description("Optionally end the stream after consuming this many items.") limit: Option[Long], ) extends StreamingIngestSource with IngestBoundingSupport @title("Websockets Ingest Stream (Simple Startup)") @description("A websocket stream started after a sequence of text messages.") case class WebsocketIngest( @description("Format used to decode each incoming message.") format: StreamingFormat, @description("Websocket (ws: or wss:) url to connect to.") url: String, @description("Initial messages to send to the server on connecting.") initMessages: Seq[String], @description("Strategy to use for sending keepalive messages, if any.") keepAlive: V1.WebsocketSimpleStartupIngest.KeepaliveProtocol = V1.WebsocketSimpleStartupIngest.PingPongInterval(), characterEncoding: Charset, ) extends StreamingIngestSource with IngestCharsetSupport @title("Kinesis Data Stream") @description("A stream of data being ingested from Kinesis.") case class KinesisIngest( @description("The format used to decode each Kinesis record.") format: StreamingFormat, @description("Name of the Kinesis stream to ingest.") streamName: String, @description( "Shards IDs within the named kinesis stream to ingest; if empty or excluded, all shards on the stream are processed.", ) shardIds: Option[Set[String]], @description("AWS credentials for this Kinesis stream.") credentials: Option[V1.AwsCredentials], @description("AWS region for this Kinesis stream.") region: Option[V1.AwsRegion], @description("Shard iterator type.") iteratorType: V1.KinesisIngest.IteratorType = V1.KinesisIngest.IteratorType.Latest, @description("Number of retries to attempt on Kineses error.") numRetries: Int = 3, @description( "List of decodings to be applied to each input, where specified decodings are applied in declared array order.", ) recordDecoders: Seq[V1.RecordDecodingType] = Seq(), ) extends StreamingIngestSource with IngestDecompressionSupport @title("Kinesis Data Stream Using Kcl lib") @description("A stream of data being ingested from Kinesis.") case class KinesisKclIngest( /** The name of the stream that this application processes records from. */ kinesisStreamName: String, /** Overrides the table name used for the Amazon DynamoDB lease table, the default CloudWatch namespace, and consumer name. */ applicationName: String, format: StreamingFormat, credentialsOpt: Option[V1.AwsCredentials], regionOpt: Option[V1.AwsRegion], initialPosition: InitialPosition, numRetries: Int, recordDecoders: Seq[V1.RecordDecodingType] = Seq(), /** Additional settings for the Kinesis Scheduler. */ schedulerSourceSettings: KinesisSchedulerSourceSettings, /** Optional stream checkpoint settings. If present, checkpointing will manage `iteratorType` and `shardIds`, * ignoring those fields in the API request. */ checkpointSettings: KinesisCheckpointSettings, /** Optional advanced configuration, derived from the KCL 3.x documented configuration table * (https://docs.aws.amazon.com/streams/latest/dev/kcl-configuration.html), but without fields that are available * elsewhere in this API object schema. */ advancedSettings: KCLConfiguration, ) extends StreamingIngestSource with IngestDecompressionSupport @title("Server Sent Events Stream") @description( "A server-issued event stream, as might be handled by the EventSource JavaScript API. Only consumes the `data` portion of an event.", ) case class ServerSentEventIngest( @description("Format used to decode each event's `data`.") format: StreamingFormat, @description("URL of the server sent event stream.") url: String, @description( "List of decodings to be applied to each input, where specified decodings are applied in declared array order.", ) recordDecoders: Seq[V1.RecordDecodingType] = Seq(), ) extends StreamingIngestSource with IngestDecompressionSupport @title("Simple Queue Service Queue") @description("An active stream of data being ingested from AWS SQS.") case class SQSIngest( format: StreamingFormat, @description("URL of the queue to ingest.") queueUrl: String, @description("Maximum number of records to read from the queue simultaneously.") readParallelism: Int = 1, credentials: Option[V1.AwsCredentials], region: Option[V1.AwsRegion], @description("Whether the queue consumer should acknowledge receipt of in-flight messages.") deleteReadMessages: Boolean = true, @description( "List of decodings to be applied to each input, where specified decodings are applied in declared array order.", ) recordDecoders: Seq[V1.RecordDecodingType] = Seq(), ) extends StreamingIngestSource with IngestDecompressionSupport @title("Kafka Ingest Stream") @description("A stream of data being ingested from Kafka.") case class KafkaIngest( format: StreamingFormat, @description( """Kafka topics from which to ingest: Either an array of topic names, or an object whose keys are topic names and |whose values are partition indices.""".asOneLine, ) topics: Either[V1.KafkaIngest.Topics, V1.KafkaIngest.PartitionAssignments], @description("A comma-separated list of Kafka broker servers.") bootstrapServers: String, @description( "Consumer group ID that this ingest stream should report belonging to; defaults to the name of the ingest stream.", ) groupId: Option[String], securityProtocol: V1.KafkaSecurityProtocol = V1.KafkaSecurityProtocol.PlainText, offsetCommitting: Option[V1.KafkaOffsetCommitting], autoOffsetReset: V1.KafkaAutoOffsetReset = V1.KafkaAutoOffsetReset.Latest, @description("Password for the SSL keystore. Redacted in API responses.") sslKeystorePassword: Option[Secret] = None, @description("Password for the SSL truststore. Redacted in API responses.") sslTruststorePassword: Option[Secret] = None, @description("Password for the SSL key. Redacted in API responses.") sslKeyPassword: Option[Secret] = None, @description("SASL/JAAS configuration for Kafka authentication. Secrets are redacted in API responses.") saslJaasConfig: Option[SaslJaasConfig] = None, @description( "Map of Kafka client properties. See ", ) kafkaProperties: V1.KafkaIngest.KafkaProperties = Map.empty[String, String], @description( "The offset at which this stream should complete; offsets are sequential integers starting at 0.", ) endingOffset: Option[Long], @description( "List of decodings to be applied to each input, where specified decodings are applied in declared array order.", ) recordDecoders: Seq[V1.RecordDecodingType] = Seq(), ) extends StreamingIngestSource with IngestDecompressionSupport /** Scheduler Checkpoint Settings * * @param disableCheckpointing Disable checkpointing to the DynamoDB table. * @param maxBatchSize Maximum checkpoint batch size. * @param maxBatchWaitMillis Maximum checkpoint batch wait time in milliseconds. */ case class KinesisCheckpointSettings( disableCheckpointing: Boolean = false, maxBatchSize: Option[Int] = None, maxBatchWaitMillis: Option[Long] = None, ) object KinesisCheckpointSettings { implicit lazy val schema: Schema[KinesisCheckpointSettings] = Schema.derived implicit val encoder: Encoder[KinesisCheckpointSettings] = deriveConfiguredEncoder implicit val decoder: Decoder[KinesisCheckpointSettings] = deriveConfiguredDecoder } /** Settings used when materialising a `KinesisSchedulerSource`. * * @param bufferSize Sets the buffer size. Buffer size must be greater than 0; use size `1` to disable * stage buffering. * @param backpressureTimeoutMillis Sets the back‑pressure timeout in milliseconds. */ case class KinesisSchedulerSourceSettings( bufferSize: Option[Int] = None, backpressureTimeoutMillis: Option[Long] = None, ) object KinesisSchedulerSourceSettings { implicit lazy val schema: Schema[KinesisSchedulerSourceSettings] = Schema.derived implicit val encoder: Encoder[KinesisSchedulerSourceSettings] = deriveConfiguredEncoder implicit val decoder: Decoder[KinesisSchedulerSourceSettings] = deriveConfiguredDecoder } /** A complex object comprising abbreviated configuration objects used by the * Kinesis Client Library (KCL). * * @param leaseManagementConfig Lease‑management configuration. * @param retrievalSpecificConfig Configuration for fan out or shared polling. * @param processorConfig Configuration for the record‑processor. * @param coordinatorConfig Configuration for the shard‑coordinator. * @param lifecycleConfig Configuration for lifecycle behaviour. * @param retrievalConfig Configuration for record retrieval. * @param metricsConfig Configuration for CloudWatch metrics. */ case class KCLConfiguration( configsBuilder: ConfigsBuilder = ConfigsBuilder(), leaseManagementConfig: LeaseManagementConfig = LeaseManagementConfig(), retrievalSpecificConfig: Option[RetrievalSpecificConfig] = None, processorConfig: ProcessorConfig = ProcessorConfig(), coordinatorConfig: CoordinatorConfig = CoordinatorConfig(), lifecycleConfig: LifecycleConfig = LifecycleConfig(), retrievalConfig: RetrievalConfig = RetrievalConfig(), metricsConfig: MetricsConfig = MetricsConfig(), ) object KCLConfiguration { implicit lazy val schema: Schema[KCLConfiguration] = Schema.derived implicit lazy val encoder: Encoder[KCLConfiguration] = deriveConfiguredEncoder implicit lazy val decoder: Decoder[KCLConfiguration] = deriveConfiguredDecoder } /** Abbreviated configuration for the KCL `ConfigsBuilder`. */ case class ConfigsBuilder( /** Allows overriding the table name used for the Amazon DynamoDB lease table. */ tableName: Option[String] = None, /** A unique identifier that represents this instantiation of the application processor. */ workerIdentifier: Option[String] = None, ) object ConfigsBuilder { implicit lazy val schema: Schema[ConfigsBuilder] = Schema.derived implicit val encoder: Encoder[ConfigsBuilder] = deriveConfiguredEncoder implicit val decoder: Decoder[ConfigsBuilder] = deriveConfiguredDecoder } sealed trait BillingMode { def value: String } object BillingMode { /** Provisioned billing. */ case object PROVISIONED extends BillingMode { val value = "PROVISIONED" } /** Pay‑per‑request billing. */ case object PAY_PER_REQUEST extends BillingMode { val value = "PAY_PER_REQUEST" } /** The billing mode is not one of the provided options. */ case object UNKNOWN_TO_SDK_VERSION extends BillingMode { val value = "UNKNOWN_TO_SDK_VERSION" } implicit lazy val schema: Schema[BillingMode] = Schema.derived implicit val encoder: Encoder[BillingMode] = deriveConfiguredEncoder implicit val decoder: Decoder[BillingMode] = deriveConfiguredDecoder } /** Initial position in the shard from which the KCL should start consuming. */ sealed trait InitialPosition object InitialPosition { /** All records added to the shard since subscribing. */ case object Latest extends InitialPosition /** All records in the shard. */ case object TrimHorizon extends InitialPosition /** All records starting from the provided date/time. */ final case class AtTimestamp(year: Int, month: Int, date: Int, hourOfDay: Int, minute: Int, second: Int) extends InitialPosition { /** Convenience conversion to `java.time.Instant`. */ def toInstant: Instant = Instant.parse(f"$year%04d-$month%02d-$date%02dT$hourOfDay%02d:$minute%02d:$second%02dZ") } implicit lazy val schema: Schema[InitialPosition] = Schema.derived implicit val encoder: Encoder[InitialPosition] = deriveConfiguredEncoder implicit val decoder: Decoder[InitialPosition] = deriveConfiguredDecoder } /** Lease‑management configuration. */ case class LeaseManagementConfig( /** Milliseconds that must pass before a lease owner is considered to have failed. */ failoverTimeMillis: Option[Long] = None, /** Time between shard‑sync calls. */ shardSyncIntervalMillis: Option[Long] = None, /** Remove leases as soon as child leases have started processing. */ cleanupLeasesUponShardCompletion: Option[Boolean] = None, /** Ignore child shards that have an open shard (primarily for DynamoDB Streams). */ ignoreUnexpectedChildShards: Option[Boolean] = None, /** Maximum number of leases a single worker should accept. */ maxLeasesForWorker: Option[Int] = None, /** Size of the lease‑renewer thread‑pool. */ maxLeaseRenewalThreads: Option[Int] = None, /** Capacity mode of the lease table created in DynamoDB. */ billingMode: Option[BillingMode] = None, /** DynamoDB read capacity when creating a new lease table (provisioned mode). */ initialLeaseTableReadCapacity: Option[Int] = None, /** DynamoDB write capacity when creating a new lease table (provisioned mode). */ initialLeaseTableWriteCapacity: Option[Int] = None, /** Percentage threshold at which the load‑balancing algorithm considers reassigning shards. */ reBalanceThresholdPercentage: Option[Int] = None, /** Dampening percentage used to limit load moved from an overloaded worker during rebalance. */ dampeningPercentage: Option[Int] = None, /** Allow throughput overshoot when taking additional leases from an overloaded worker. */ allowThroughputOvershoot: Option[Boolean] = None, /** Ignore worker resource metrics (such as CPU) when reassigning leases. */ disableWorkerMetrics: Option[Boolean] = None, /** Maximum throughput (KB/s) to assign to a worker during lease assignment. */ maxThroughputPerHostKBps: Option[Double] = None, /** Enable graceful lease hand‑off between workers. */ isGracefulLeaseHandoffEnabled: Option[Boolean] = None, /** Minimum time to wait (ms) for the current shard's processor to shut down gracefully before forcing hand‑off. */ gracefulLeaseHandoffTimeoutMillis: Option[Long] = None, ) object LeaseManagementConfig { implicit lazy val schema: Schema[LeaseManagementConfig] = Schema.derived implicit val encoder: Encoder[LeaseManagementConfig] = deriveConfiguredEncoder implicit val decoder: Decoder[LeaseManagementConfig] = deriveConfiguredDecoder } sealed trait RetrievalSpecificConfig object RetrievalSpecificConfig { case class FanOutConfig( /** The ARN of an already created consumer, if this is set no automatic consumer creation will be attempted. */ consumerArn: Option[String], /** The name of the consumer to create. If this isn't set the `applicationName` will be used. */ consumerName: Option[String], /** The maximum number of retries for calling DescribeStreamSummary. * Once exhausted the consumer creation/retrieval will fail. */ maxDescribeStreamSummaryRetries: Option[Int], /** The maximum number of retries for calling DescribeStreamConsumer. * Once exhausted the consumer creation/retrieval will fail. */ maxDescribeStreamConsumerRetries: Option[Int], /** The maximum number of retries for calling RegisterStreamConsumer. * Once exhausted the consumer creation/retrieval will fail. */ registerStreamConsumerRetries: Option[Int], /** The maximum amount of time that will be made between failed calls. */ retryBackoffMillis: Option[Long], ) extends RetrievalSpecificConfig /** Polling‑specific configuration. */ case class PollingConfig( /** Maximum number of records that Kinesis returns. */ maxRecords: Option[Int] = None, /** Delay between `GetRecords` attempts for failures (seconds). */ retryGetRecordsInSeconds: Option[Int] = None, /** Thread‑pool size used for `GetRecords`. */ maxGetRecordsThreadPool: Option[Int] = None, /** How long KCL waits between `GetRecords` calls (milliseconds). */ idleTimeBetweenReadsInMillis: Option[Long] = None, ) extends RetrievalSpecificConfig implicit lazy val schema: Schema[RetrievalSpecificConfig] = Schema.derived implicit val encoder: Encoder[RetrievalSpecificConfig] = deriveConfiguredEncoder implicit val decoder: Decoder[RetrievalSpecificConfig] = deriveConfiguredDecoder } /** Record‑processor configuration. */ case class ProcessorConfig( /** Invoke the record processor even when Kinesis returns an empty record list. */ callProcessRecordsEvenForEmptyRecordList: Option[Boolean] = None, ) object ProcessorConfig { implicit lazy val schema: Schema[ProcessorConfig] = Schema.derived implicit val encoder: Encoder[ProcessorConfig] = deriveConfiguredEncoder implicit val decoder: Decoder[ProcessorConfig] = deriveConfiguredDecoder } /** Marker trait for shard‑prioritisation strategies. */ sealed trait ShardPrioritization object ShardPrioritization { /** No‑op prioritisation. */ case object NoOpShardPrioritization extends ShardPrioritization /** Process shard parents first, limited by a `maxDepth` argument. */ case class ParentsFirstShardPrioritization(maxDepth: Int) extends ShardPrioritization implicit lazy val schema: Schema[ShardPrioritization] = Schema.derived implicit val encoder: Encoder[ShardPrioritization] = deriveConfiguredEncoder implicit val decoder: Decoder[ShardPrioritization] = deriveConfiguredDecoder } /** Compatibility mode for the KCL client version. */ sealed trait ClientVersionConfig object ClientVersionConfig { case object CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X extends ClientVersionConfig case object CLIENT_VERSION_CONFIG_3X extends ClientVersionConfig implicit lazy val schema: Schema[ClientVersionConfig] = Schema.derived implicit val encoder: Encoder[ClientVersionConfig] = deriveEnumerationEncoder implicit val decoder: Decoder[ClientVersionConfig] = deriveEnumerationDecoder } /** Coordinator (shard‑coordinator) configuration. */ case class CoordinatorConfig( /** Interval between polling to see if the parent shard has completed (ms). */ parentShardPollIntervalMillis: Option[Long] = None, /** Skip shard‑sync on worker initialisation if leases already exist. */ skipShardSyncAtWorkerInitializationIfLeasesExist: Option[Boolean] = None, /** Shard prioritisation strategy. */ shardPrioritization: Option[ShardPrioritization] = None, /** KCL version compatibility mode (used during migration). */ clientVersionConfig: Option[ClientVersionConfig] = None, ) object CoordinatorConfig { implicit lazy val schema: Schema[CoordinatorConfig] = Schema.derived implicit val encoder: Encoder[CoordinatorConfig] = deriveConfiguredEncoder implicit val decoder: Decoder[CoordinatorConfig] = deriveConfiguredDecoder } /** Lifecycle configuration. */ case class LifecycleConfig( /** Time to wait before retrying failed KCL tasks (ms). */ taskBackoffTimeMillis: Option[Long] = None, /** Time before logging a warning if a task hasn't completed (ms). */ logWarningForTaskAfterMillis: Option[Long] = None, ) object LifecycleConfig { implicit lazy val schema: Schema[LifecycleConfig] = Schema.derived implicit val encoder: Encoder[LifecycleConfig] = deriveConfiguredEncoder implicit val decoder: Decoder[LifecycleConfig] = deriveConfiguredDecoder } /** Record‑retrieval configuration. */ case class RetrievalConfig( /** Milliseconds to wait between `ListShards` calls when failures occur. */ listShardsBackoffTimeInMillis: Option[Long] = None, /** Maximum number of retry attempts for `ListShards` before giving up. */ maxListShardsRetryAttempts: Option[Int] = None, ) object RetrievalConfig { implicit lazy val schema: Schema[RetrievalConfig] = Schema.derived implicit val encoder: Encoder[RetrievalConfig] = deriveConfiguredEncoder implicit val decoder: Decoder[RetrievalConfig] = deriveConfiguredDecoder } /** CloudWatch metrics granularity level. */ sealed trait MetricsLevel object MetricsLevel { /** Metrics disabled. */ case object NONE extends MetricsLevel /** Emit only the most significant metrics. */ case object SUMMARY extends MetricsLevel /** Emit all available metrics. */ case object DETAILED extends MetricsLevel implicit lazy val schema: Schema[MetricsLevel] = Schema.derived implicit val encoder: Encoder[MetricsLevel] = deriveEnumerationEncoder implicit val decoder: Decoder[MetricsLevel] = deriveEnumerationDecoder } /** Dimensions that may be attached to CloudWatch metrics. */ sealed trait MetricsDimension { def value: String } object MetricsDimension { case object OPERATION_DIMENSION_NAME extends MetricsDimension { val value = "Operation" } case object SHARD_ID_DIMENSION_NAME extends MetricsDimension { val value = "ShardId" } case object STREAM_IDENTIFIER extends MetricsDimension { val value = "StreamId" } case object WORKER_IDENTIFIER extends MetricsDimension { val value = "WorkerIdentifier" } implicit lazy val schema: Schema[MetricsDimension] = Schema.derived implicit val encoder: Encoder[MetricsDimension] = deriveConfiguredEncoder implicit val decoder: Decoder[MetricsDimension] = deriveConfiguredDecoder } /** CloudWatch metrics configuration. */ case class MetricsConfig( /** Maximum duration (ms) to buffer metrics before publishing to CloudWatch. */ metricsBufferTimeMillis: Option[Long] = None, /** Maximum number of metrics to buffer before publishing to CloudWatch. */ metricsMaxQueueSize: Option[Int] = None, /** Granularity level of CloudWatch metrics to enable and publish. */ metricsLevel: Option[MetricsLevel] = None, /** Allowed dimensions for CloudWatch metrics. */ metricsEnabledDimensions: Option[Set[MetricsDimension]] = None, ) object MetricsConfig { implicit lazy val schema: Schema[MetricsConfig] = Schema.derived implicit val encoder: Encoder[MetricsConfig] = deriveConfiguredEncoder implicit val decoder: Decoder[MetricsConfig] = deriveConfiguredDecoder } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/V2ToV1.scala ================================================ package com.thatdot.quine.app.model.ingest2 import com.thatdot.quine.app.model.ingest2.{V2IngestEntities => V2} import com.thatdot.quine.{routes => V1} object V2ToV1 { def apply(status: V2.IngestStreamStatus): V1.IngestStreamStatus = status match { case V2.IngestStreamStatus.Running => V1.IngestStreamStatus.Running case V2.IngestStreamStatus.Paused => V1.IngestStreamStatus.Paused case V2.IngestStreamStatus.Restored => V1.IngestStreamStatus.Restored case V2.IngestStreamStatus.Completed => V1.IngestStreamStatus.Completed case V2.IngestStreamStatus.Terminated => V1.IngestStreamStatus.Terminated case V2.IngestStreamStatus.Failed => V1.IngestStreamStatus.Failed } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/codec/FrameDecoder.scala ================================================ package com.thatdot.quine.app.model.ingest2.codec import java.io.StringReader import java.nio.charset.{Charset, StandardCharsets} import scala.concurrent.Await import scala.concurrent.duration.Duration import scala.jdk.CollectionConverters._ import scala.util.{Success, Try} import com.google.protobuf.{Descriptors, DynamicMessage} import io.circe.{Json, parser} import org.apache.avro.Schema import org.apache.avro.file.SeekableByteArrayInput import org.apache.avro.generic.{GenericDatumReader, GenericRecord} import org.apache.avro.io.DecoderFactory import org.apache.commons.csv.CSVFormat import com.thatdot.data.{DataFoldableFrom, DataFolderTo} import com.thatdot.quine.app.data.QuineDataFoldablesFrom import com.thatdot.quine.app.model.ingest2.sources.DEFAULT_CHARSET import com.thatdot.quine.app.model.ingest2.{FileFormat, IngestFormat => V2IngestFormat, StreamingFormat} import com.thatdot.quine.graph.cypher import com.thatdot.quine.graph.cypher.Value import com.thatdot.quine.routes._ import com.thatdot.quine.serialization.{AvroSchemaCache, ProtobufSchemaCache} import com.thatdot.quine.util.StringInput.filenameOrUrl trait FrameDecoder[A] { val foldable: DataFoldableFrom[A] def decode(bytes: Array[Byte]): Try[A] } object CypherStringDecoder extends FrameDecoder[cypher.Value] { val foldable: DataFoldableFrom[Value] = QuineDataFoldablesFrom.cypherValueDataFoldable def decode(bytes: Array[Byte]): Try[cypher.Value] = Success(cypher.Expr.Str(new String(bytes, StandardCharsets.UTF_8))) } object StringDecoder extends FrameDecoder[String] { val foldable: DataFoldableFrom[String] = DataFoldableFrom.stringDataFoldable def decode(bytes: Array[Byte]): Try[String] = Success(new String(bytes, StandardCharsets.UTF_8)) } object CypherRawDecoder extends FrameDecoder[cypher.Value] { val foldable: DataFoldableFrom[Value] = QuineDataFoldablesFrom.cypherValueDataFoldable def decode(bytes: Array[Byte]): Try[cypher.Value] = Success(cypher.Expr.Bytes(bytes)) } object JsonDecoder extends FrameDecoder[Json] { val foldable: DataFoldableFrom[Json] = DataFoldableFrom.jsonDataFoldable def decode(bytes: Array[Byte]): Try[Json] = { val decoded = new String(bytes, StandardCharsets.UTF_8) parser.parse(decoded).toTry } } object DropDecoder extends FrameDecoder[Any] { val foldable: DataFoldableFrom[Any] = new DataFoldableFrom[Any] { def fold[B](value: Any, folder: DataFolderTo[B]): B = folder.nullValue } def decode(bytes: Array[Byte]): Success[Any] = Success(()) } case class ProtobufDecoder(schemaUrl: String, typeName: String)(implicit protobufSchemaCache: ProtobufSchemaCache, ) extends FrameDecoder[DynamicMessage] { // this is a blocking call, but it should only actually block until the first time a type is successfully // loaded. // // This was left as blocking because lifting the effect to a broader context would mean either: // - making ingest startup async, which would require extensive changes to QuineApp, startup, and potentially // clustering protocols, OR // - making the decode bytes step of ingest async, which violates the Kafka APIs expectation that a // `org.apache.kafka.common.serialization.Deserializer` is synchronous. val messageDescriptor: Descriptors.Descriptor = Await.result( protobufSchemaCache.getMessageDescriptor(filenameOrUrl(schemaUrl), typeName, flushOnFail = true), Duration.Inf, ) val foldable: DataFoldableFrom[DynamicMessage] = DataFoldableFrom.protobufDataFoldable def decode(bytes: Array[Byte]): Try[DynamicMessage] = Try(DynamicMessage.parseFrom(messageDescriptor, bytes)) } case class AvroDecoder(schemaUrl: String)(implicit schemaCache: AvroSchemaCache) extends FrameDecoder[GenericRecord] { // this is a blocking call, but it should only actually block until the first time a type is successfully // loaded. // // This was left as blocking because lifting the effect to a broader context would mean either: // - making ingest startup async, which would require extensive changes to QuineApp, startup, and potentially // clustering protocols, OR // - making the decode bytes step of ingest async, which violates the Kafka APIs expectation that a // `org.apache.kafka.common.serialization.Deserializer` is synchronous. val schema: Schema = Await.result( schemaCache.getSchema(filenameOrUrl(schemaUrl)), Duration.Inf, ) val foldable: DataFoldableFrom[GenericRecord] = DataFoldableFrom.avroDataFoldable def decode(bytes: Array[Byte]): Try[GenericRecord] = Try { val datumReader = new GenericDatumReader[GenericRecord](schema) val inputStream = new SeekableByteArrayInput(bytes) val decoder = DecoderFactory.get.binaryDecoder(inputStream, null) datumReader.read(null, decoder) } } case class CsvVecDecoder(delimiterChar: Char, quoteChar: Char, escapeChar: Char, charset: Charset = DEFAULT_CHARSET) extends FrameDecoder[Iterable[String]] { val csvFormat: CSVFormat = CSVFormat.Builder .create() .setQuote(quoteChar) .setDelimiter(delimiterChar) .setEscape(escapeChar) .setHeader() .get() override val foldable: DataFoldableFrom[Iterable[String]] = DataFoldableFrom.stringIterableDataFoldable override def decode(bytes: Array[Byte]): Try[Iterable[String]] = Try(csvFormat.parse(new StringReader(new String(bytes, charset))).getHeaderNames.asScala) } case class CsvMapDecoder( keys: Option[Iterable[String]], delimiterChar: Char, quoteChar: Char, escapeChar: Char, charset: Charset = DEFAULT_CHARSET, ) extends FrameDecoder[Map[String, String]] { //if the keys are not passed in the first read values are the keys var headers: Option[Iterable[String]] = keys val vecDecoder: CsvVecDecoder = CsvVecDecoder(delimiterChar, quoteChar, escapeChar, charset) override val foldable: DataFoldableFrom[Map[String, String]] = DataFoldableFrom.stringMapDataFoldable override def decode(bytes: Array[Byte]): Try[Map[String, String]] = vecDecoder .decode(bytes) .map((csv: Iterable[String]) => headers match { case Some(value) => value.zip(csv).toMap case None => throw new Exception("Headers are empty") }, ) } object FrameDecoder { def apply( format: V2IngestFormat, )(implicit protobufCache: ProtobufSchemaCache, avroSchemaCache: AvroSchemaCache): FrameDecoder[_] = format match { case FileFormat.LineFormat => CypherStringDecoder case FileFormat.JsonLinesFormat | FileFormat.JsonFormat | StreamingFormat.JsonFormat => JsonDecoder case FileFormat.CsvFormat(headers, delimiter, quoteChar, escapeChar) => headers match { case Left(false) => CsvVecDecoder(delimiter.byte.toChar, quoteChar.byte.toChar, escapeChar.byte.toChar) // no headers case Left(true) => CsvMapDecoder( None, delimiter.byte.toChar, quoteChar.byte.toChar, escapeChar.byte.toChar, ) // first line as header case Right(values) => CsvMapDecoder( Some(values), delimiter.byte.toChar, quoteChar.byte.toChar, escapeChar.byte.toChar, ) } case StreamingFormat.RawFormat => CypherRawDecoder case StreamingFormat.ProtobufFormat(schemaUrl, typeName) => ProtobufDecoder(schemaUrl, typeName) case StreamingFormat.AvroFormat(schemaUrl) => AvroDecoder(schemaUrl) case StreamingFormat.DropFormat => DropDecoder } def apply(v1Format: StreamedRecordFormat)(implicit protobufCache: ProtobufSchemaCache): FrameDecoder[_] = v1Format match { case StreamedRecordFormat.CypherJson(_, _) => JsonDecoder case StreamedRecordFormat.CypherRaw(_, _) => CypherRawDecoder case StreamedRecordFormat.CypherProtobuf(_, _, schemaUrl, typeName) => ProtobufDecoder(schemaUrl, typeName) case StreamedRecordFormat.Drop => DropDecoder //note: V1 format does not support avro case _ => sys.error(s"Unsupported format: $v1Format") } def apply(v1Format: FileIngestFormat): FrameDecoder[_] = v1Format match { case FileIngestFormat.CypherLine(_, _) => CypherStringDecoder case FileIngestFormat.CypherJson(_, _) => JsonDecoder case FileIngestFormat.CypherCsv(_, _, headers, delimiter, quote, escape) => headers match { case Left(false) => CsvVecDecoder(delimiter.byte.toChar, quote.byte.toChar, escape.byte.toChar) // no headers case Left(true) => CsvMapDecoder(None, delimiter.byte.toChar, quote.byte.toChar, escape.byte.toChar) // first line as header case Right(values) => CsvMapDecoder( Some(values), delimiter.byte.toChar, quote.byte.toChar, escape.byte.toChar, ) // map values provided } case _ => sys.error(s"Unsupported format: $v1Format") } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/source/DecodedSource.scala ================================================ package com.thatdot.quine.app.model.ingest2.source import java.nio.charset.Charset import scala.concurrent.duration.DurationInt import scala.concurrent.{Await, ExecutionContext, Future} import scala.util.{Failure, Success, Try} import org.apache.pekko.actor.ActorSystem import org.apache.pekko.stream.Materializer import org.apache.pekko.stream.scaladsl.{ Flow, Keep, MergeHub, RestartSource, RetryFlow, Sink, Source, SourceWithContext, } import org.apache.pekko.util.ByteString import org.apache.pekko.{Done, NotUsed, stream} import cats.data.{Validated, ValidatedNel} import cats.implicits.catsSyntaxValidatedId import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, Safe, SafeLoggableInterpolator} import com.thatdot.convert.{Api2ToAws, Api2ToOutputs2} import com.thatdot.data.{DataFoldableFrom, DataFolderTo} import com.thatdot.outputs2.FoldableDestinationSteps.{WithByteEncoding, WithDataFoldable} import com.thatdot.outputs2.NonFoldableDestinationSteps.WithRawBytes import com.thatdot.outputs2.OutputEncoder.{JSON, Protobuf} import com.thatdot.outputs2.destination.HttpEndpoint import com.thatdot.outputs2.{ BytesOutputEncoder, DestinationSteps, FoldableDestinationSteps, NonFoldableDestinationSteps, ResultDestination, destination, } import com.thatdot.quine.app.config.FileAccessPolicy import com.thatdot.quine.app.data.QuineDataFoldersTo import com.thatdot.quine.app.model.ingest.QuineIngestSource import com.thatdot.quine.app.model.ingest.serialization.ContentDecoder import com.thatdot.quine.app.model.ingest2.V2IngestEntities._ import com.thatdot.quine.app.model.ingest2._ import com.thatdot.quine.app.model.ingest2.codec.FrameDecoder import com.thatdot.quine.app.model.ingest2.sources.S3Source.s3Source import com.thatdot.quine.app.model.ingest2.sources.StandardInputSource.stdInSource import com.thatdot.quine.app.model.ingest2.sources._ import com.thatdot.quine.app.model.transformation.polyglot.{ PolyglotValueDataFoldableFrom, PolyglotValueDataFolderTo, Transformation, } import com.thatdot.quine.app.routes.IngestMeter import com.thatdot.quine.app.v2api.definitions.ingest2.ApiIngest.RecordRetrySettings import com.thatdot.quine.app.v2api.definitions.ingest2.{DeadLetterQueueOutput, DeadLetterQueueSettings, OutputFormat} import com.thatdot.quine.app.{ControlSwitches, ShutdownSwitch} import com.thatdot.quine.graph.MasterStream.IngestSrcExecToken import com.thatdot.quine.graph.metrics.implicits.TimeFuture import com.thatdot.quine.graph.{CypherOpsGraph, NamespaceId, cypher} import com.thatdot.quine.serialization.{AvroSchemaCache, ProtobufSchemaCache} import com.thatdot.quine.util.StringInput.filenameOrUrl import com.thatdot.quine.util.{BaseError, SwitchMode, Valve, ValveSwitch} import com.thatdot.quine.{routes => V1} final case class DlqEnvelope[Frame, Decoded]( /** The original input data type. */ frame: Frame, /** The type of decoded data to be forwarded to the dlq. */ decoded: Option[Decoded] = None, /** An optional message describing the error that occurred. */ message: String, ) /** A decoded source represents a source of interpreted values, that is, values that have * been translated from raw formats as supplied by their ingest source. */ // Note: The only reason the meter needs to be included here is to enable the creation of // the quineIngestSource for v1 compatibility. If the meter is not used downstream from // that it may not be needed here. abstract class DecodedSource(val meter: IngestMeter) { type Decoded type Frame val foldableFrame: DataFoldableFrom[Frame] val foldable: DataFoldableFrom[Decoded] def content(input: Frame): Array[Byte] /** Stream of decoded values. This stream must already be metered. */ def stream: Source[(() => Try[Decoded], Frame), ShutdownSwitch] def ack: Flow[Frame, Done, NotUsed] = Flow.fromFunction(_ => Done) def onTermination(): Unit = () /** Converts the raw decoded value into the Cypher value that the ingest query expects */ private def preprocessToCypherValue( decoded: Decoded, transformationOpt: Option[Transformation], ): Either[BaseError, cypher.Value] = transformationOpt match { // Just produce a cypher value if no transform. case None => Right(foldable.fold(decoded, QuineDataFoldersTo.cypherValueFolder)) // Transform the input using provided transformation case Some(transformation) => val polyglotInput = foldable.fold(decoded, PolyglotValueDataFolderTo) transformation(polyglotInput).map { polyglotOutput => PolyglotValueDataFoldableFrom.fold(polyglotOutput, QuineDataFoldersTo.cypherValueFolder) } } /** Generate an [[QuineIngestSource]] from this decoded stream Source[(() => Try[A], Frame), ShutdownSwitch] * into a Source[IngestSrcExecToken,NotUsed] * applying * RestartSettings | switch | valve | throttle | writeToGraph | Error Handler | Ack | Termination Hooks | * * return this source as an instance of a source that can be ingested into a Quine graph. */ def toQuineIngestSource( ingestName: String, /* A step ingesting cypher (query,parameters) => graph.*/ ingestQuery: QuineIngestQuery, transformation: Option[Transformation], cypherGraph: CypherOpsGraph, initialSwitchMode: SwitchMode = SwitchMode.Open, parallelism: Int = 1, maxPerSecond: Option[Int] = None, onDecodeError: List[(DestinationSteps, Boolean)] = Nil, retrySettings: Option[RecordRetrySettings] = None, logRecordError: Boolean = false, onStreamErrorHandler: OnStreamErrorHandler = LogStreamError, )(implicit logConfig: LogConfig): QuineIngestSource = new QuineIngestSource { val name: String = ingestName implicit val graph: CypherOpsGraph = cypherGraph override val meter: IngestMeter = DecodedSource.this.meter /** Fully assembled stream with the following operations applied: * * - restart settings * - shutdown switch * - valve * - throttle * - write to graph * - ack * - termination hook */ override def stream( intoNamespace: NamespaceId, registerTerminationHooks: Future[Done] => Unit, ): Source[IngestSrcExecToken, NotUsed] = { val token = IngestSrcExecToken(name) // TODO error handler should be settable from a config, e.g. DeadLetterErrorHandler val ingestStream = DecodedSource.this.stream .viaMat(Valve(initialSwitchMode))(Keep.both) .via(throttle(graph, maxPerSecond)) implicit val ex: ExecutionContext = ExecutionContext.parasitic implicit val toBytesFrame: BytesOutputEncoder[Frame] = BytesOutputEncoder(content) val dlqSinks = DecodedSource.getDlqSinks(name, intoNamespace, onDecodeError)( toBytesFrame, foldableFrame = foldableFrame, foldable = foldable, logConfig = logConfig, ) val src: Source[IngestSrcExecToken, Unit] = SourceWithContext .fromTuples(ingestStream) .asSource .via(DecodedSource.optionallyRetryDecodeStep[Frame, Decoded](logRecordError, retrySettings)) // TODO this is slower than mapAsyncUnordered and is only necessary for Kafka acking case .mapAsync(parallelism) { case Right((t, frame)) => preprocessToCypherValue(t, transformation) match { case Left(value) => Future.successful(Left(DlqEnvelope(frame, Some(t), value.getMessage))) case Right(cypherInput) => graph.metrics .ingestQueryTimer(intoNamespace, name) .time(ingestQuery.apply(cypherInput)) .map(_ => Right((t, frame))) } case other => Future.successful(other) } .alsoToAll( dlqSinks.map { sink => Flow[Either[DlqEnvelope[Frame, Decoded], (Decoded, Frame)]] .collect { case Left(env) => env } .to { sink } }: _*, ) .map { case Right((_, frame)) => frame case Left(env) => env.frame } .via(ack) .map(_ => token) .watchTermination() { case ((a: ShutdownSwitch, b: Future[ValveSwitch]), c: Future[Done]) => c.onComplete(_ => onTermination()) b.map(v => ControlSwitches(a, v, c)) } .mapMaterializedValue(c => setControl(c, initialSwitchMode, registerTerminationHooks)) .named(name) onStreamErrorHandler match { case RetryStreamError(retryCount) => RestartSource.onFailuresWithBackoff( // TODO: Actually lift these // described in IngestSrcDef or expose these settings at the api level. restartSettings.withMaxRestarts(retryCount, restartSettings.maxRestartsWithin), ) { () => src.mapMaterializedValue(_ => NotUsed) } case V2IngestEntities.LogStreamError => src.mapMaterializedValue(_ => NotUsed) } } } private def outputFormatToDestinationBytes(outputFormat: OutputFormat, bytesDestination: ResultDestination.Bytes)( implicit protobufSchemaCache: ProtobufSchemaCache, ): (DestinationSteps, Boolean) = outputFormat match { case OutputFormat.Bytes => (WithRawBytes(bytesDestination), false) case OutputFormat.JSON(withMetaData) => (WithByteEncoding(JSON(), bytesDestination), withMetaData) case OutputFormat.Protobuf(schemaUrl, typeName, withMetaData) => val messageDescriptor = Await.result( protobufSchemaCache.getMessageDescriptor(filenameOrUrl(schemaUrl), typeName, flushOnFail = true), 10.seconds, ) ( WithByteEncoding(Protobuf(schemaUrl, typeName, messageDescriptor), bytesDestination), withMetaData, ) } def getDeadLetterQueues( dlq: DeadLetterQueueSettings, )(implicit protobufSchemaCache: ProtobufSchemaCache, system: ActorSystem): List[(DestinationSteps, Boolean)] = dlq.destinations.map { case DeadLetterQueueOutput.HttpEndpoint(url, parallelism, headers, OutputFormat.JSON(withMetaData)) => (WithDataFoldable(HttpEndpoint(url, parallelism, headers)), withMetaData) case DeadLetterQueueOutput.File(path) => // Update this when non-JSON outputs are supported for File (or to support including the info envelope) (WithByteEncoding(JSON(), destination.File(path)), false) case DeadLetterQueueOutput.Kafka( topic, bootstrapServers, sslKeystorePassword, sslTruststorePassword, sslKeyPassword, saslJaasConfig, kafkaProperties, outputFormat, ) => val kafkaDestination = destination.Kafka( topic = topic, bootstrapServers = bootstrapServers, sslKeystorePassword = sslKeystorePassword, sslTruststorePassword = sslTruststorePassword, sslKeyPassword = sslKeyPassword, saslJaasConfig = saslJaasConfig.map(Api2ToOutputs2.apply), kafkaProperties = kafkaProperties, ) outputFormatToDestinationBytes(outputFormat = outputFormat, bytesDestination = kafkaDestination) case DeadLetterQueueOutput.Kinesis( credentials, region, streamName, kinesisParallelism, kinesisMaxBatchSize, kinesisMaxRecordsPerSecond, kinesisMaxBytesPerSecond, outputFormat, ) => val kinesisDestination = destination.Kinesis( credentials = credentials.map(Api2ToAws.apply), region = region.map(Api2ToAws.apply), streamName = streamName, kinesisParallelism = kinesisParallelism, kinesisMaxBatchSize = kinesisMaxBatchSize, kinesisMaxRecordsPerSecond = kinesisMaxRecordsPerSecond, kinesisMaxBytesPerSecond = kinesisMaxBytesPerSecond, ) outputFormatToDestinationBytes(outputFormat = outputFormat, bytesDestination = kinesisDestination) case DeadLetterQueueOutput.ReactiveStream(address, port, outputFormat) => val bytesDestination = destination.ReactiveStream(address, port) outputFormatToDestinationBytes(outputFormat = outputFormat, bytesDestination = bytesDestination) case DeadLetterQueueOutput.SNS(credentials, region, topic, outputFormat) => val bytesDestination = destination.SNS( credentials = credentials.map(Api2ToAws.apply), region = region.map(Api2ToAws.apply), topic = topic, ) outputFormatToDestinationBytes(outputFormat = outputFormat, bytesDestination = bytesDestination) case DeadLetterQueueOutput.StandardOut => // Update this when non-JSON outputs are supported for StandardOut (or to support including the info envelope) (WithByteEncoding(JSON(), destination.StandardOut), false) } } object DecodedSource extends LazySafeLogging { def dlqFold[Frame, Decoded](implicit foldableFrame: DataFoldableFrom[Frame], foldable: DataFoldableFrom[Decoded], ): DataFoldableFrom[DlqEnvelope[Frame, Decoded]] = new DataFoldableFrom[DlqEnvelope[Frame, Decoded]] { def fold[B](value: DlqEnvelope[Frame, Decoded], folder: DataFolderTo[B]): B = { val builder = folder.mapBuilder() builder.add("frame", foldableFrame.fold(value.frame, folder)) value.decoded.foreach(decoded => builder.add("decoded", foldable.fold(decoded, folder))) builder.add("message", folder.string(value.message)) builder.finish() } } def getDlqSinks[Frame: BytesOutputEncoder, Decoded]( name: String, intoNamespace: NamespaceId, onDecodeError: List[(DestinationSteps, Boolean)], )(implicit foldableFrame: DataFoldableFrom[Frame], foldable: DataFoldableFrom[Decoded], logConfig: LogConfig, ): List[Sink[DlqEnvelope[Frame, Decoded], NotUsed]] = onDecodeError.map { case (steps: FoldableDestinationSteps, true) => Flow[DlqEnvelope[Frame, Decoded]] .to( steps.sink( s"$name-errors", intoNamespace, )(DecodedSource.dlqFold(foldableFrame, foldable), logConfig), ) case (steps: FoldableDestinationSteps, false) => Flow[DlqEnvelope[Frame, Decoded]] .map(_.frame) .to( steps.sink( s"$name-errors", intoNamespace, )(foldableFrame, logConfig), ) case (sink: NonFoldableDestinationSteps, _) => Flow[DlqEnvelope[Frame, Decoded]] .map(_.frame) .to( sink.sink( s"$name-errors", intoNamespace, ), ) } private def decodedFlow[Frame, Decoded]( logRecord: Boolean, ): Flow[(() => Try[Decoded], Frame), Either[DlqEnvelope[Frame, Decoded], (Decoded, Frame)], NotUsed] = Flow[(() => Try[Decoded], Frame)].map { case (decoded, frame) => decoded() match { case Success(d) => Right((d, frame)) case Failure(ex) => if (logRecord) { logger.warn(safe"error decoding: ${Safe(ex.getMessage)}") } Left(DlqEnvelope.apply[Frame, Decoded](frame, None, ex.getMessage)) } } def optionallyRetryDecodeStep[Frame, Decoded]( logRecord: Boolean, retrySettings: Option[RecordRetrySettings], ): Flow[(() => Try[Decoded], Frame), Either[DlqEnvelope[Frame, Decoded], (Decoded, Frame)], NotUsed] = retrySettings match { case Some(settings) => RetryFlow .withBackoff( minBackoff = settings.minBackoff.millis, maxBackoff = settings.maxBackoff.seconds, randomFactor = settings.randomFactor, maxRetries = settings.maxRetries, decodedFlow[Frame, Decoded](logRecord), ) { case (in @ (_, _), Left(_)) => Some(in) case _ => None } case None => decodedFlow[Frame, Decoded](logRecord) } /** Convenience to extract parallelism from v1 configuration types w/o altering v1 configurations */ def parallelism(config: V1.IngestStreamConfiguration): Int = config match { case k: V1.KafkaIngest => k.parallelism case k: V1.KinesisIngest => k.parallelism case s: V1.ServerSentEventsIngest => s.parallelism case s: V1.SQSIngest => s.writeParallelism case w: V1.WebsocketSimpleStartupIngest => w.parallelism case f: V1.FileIngest => f.parallelism case s: V1.S3Ingest => s.parallelism case s: V1.StandardInputIngest => s.parallelism case n: V1.NumberIteratorIngest => n.parallelism case other => throw new NoSuchElementException(s"Ingest type $other not supported") } // build from v1 configuration def apply( name: String, config: V1.IngestStreamConfiguration, meter: IngestMeter, system: ActorSystem, fileAccessPolicy: com.thatdot.quine.app.config.FileAccessPolicy, )(implicit protobufCache: ProtobufSchemaCache, logConfig: LogConfig, ): ValidatedNel[BaseError, DecodedSource] = { config match { case V1.KafkaIngest( format, topics, _, bootstrapServers, groupId, securityProtocol, maybeExplicitCommit, autoOffsetReset, kafkaProperties, endingOffset, _, recordDecoders, sslKeystorePassword, sslTruststorePassword, sslKeyPassword, saslJaasConfig, ) => KafkaSource( topics, bootstrapServers, groupId.getOrElse(name), securityProtocol, maybeExplicitCommit, autoOffsetReset, kafkaProperties, endingOffset, recordDecoders.map(ContentDecoder(_)), meter, system, sslKeystorePassword, sslTruststorePassword, sslKeyPassword, saslJaasConfig.map(V1ToV2(_)), ).framedSource.map(_.toDecoded(FrameDecoder(format))) case V1.FileIngest( format, path, encoding, _, maximumLineSize, startAtOffset, ingestLimit, _, fileIngestMode, ) => FileSource .srcFromIngest(path, fileIngestMode, fileAccessPolicy) .andThen { validatedSource => FileSource.decodedSourceFromFileStream( validatedSource, FileFormat(format), Charset.forName(encoding), maximumLineSize, IngestBounds(startAtOffset, ingestLimit), meter, Seq(), // V1 file ingest does not define recordDecoders ) } case V1.S3Ingest( format, bucketName, key, encoding, _, credsOpt, maxLineSize, startAtOffset, ingestLimit, _, ) => S3Source( FileFormat(format), bucketName, key, credsOpt, maxLineSize, Charset.forName(encoding), IngestBounds(startAtOffset, ingestLimit), meter, Seq(), // There is no compression support in the v1 configuration object. )(system).decodedSource case V1.StandardInputIngest( format, encoding, _, maximumLineSize, _, ) => StandardInputSource( FileFormat(format), maximumLineSize, Charset.forName(encoding), meter, Seq(), ).decodedSource case V1.KinesisIngest( streamedRecordFormat, streamName, shardIds, _, creds, region, iteratorType, numRetries, _, recordEncodings, ) => KinesisSource( streamName, shardIds, creds, region, iteratorType, numRetries, // TODO not currently supported meter, recordEncodings.map(ContentDecoder(_)), )(system.getDispatcher).framedSource.map(_.toDecoded(FrameDecoder(streamedRecordFormat))) case V1.KinesisKCLIngest( format, applicationName, kinesisStreamName, _, credentials, region, initialPosition, numRetries, _, recordDecoders, schedulerSourceSettings, checkpointSettings, advancedSettings, ) => KinesisKclSrc( kinesisStreamName = kinesisStreamName, applicationName = applicationName, meter = meter, credentialsOpt = credentials, regionOpt = region, initialPosition = V1ToV2(initialPosition), numRetries = numRetries, decoders = recordDecoders.map(ContentDecoder(_)), schedulerSettings = V1ToV2(schedulerSourceSettings), checkpointSettings = V1ToV2(checkpointSettings), advancedSettings = V1ToV2(advancedSettings), )(ExecutionContext.parasitic).framedSource.map(_.toDecoded(FrameDecoder(format))) case V1.NumberIteratorIngest(_, startAtOffset, ingestLimit, _, _) => Validated.valid(NumberIteratorSource(IngestBounds(startAtOffset, ingestLimit), meter).decodedSource) case V1.SQSIngest( format, queueURL, readParallelism, _, credentialsOpt, regionOpt, deleteReadMessages, _, recordEncodings, ) => SqsSource( queueURL, readParallelism, credentialsOpt, regionOpt, deleteReadMessages, meter, recordEncodings.map(ContentDecoder(_)), ).framedSource .map(_.toDecoded(FrameDecoder(format))) case V1.ServerSentEventsIngest( format, url, _, _, recordEncodings, ) => ServerSentEventSource(url, meter, recordEncodings.map(ContentDecoder(_)))(system).framedSource .map(_.toDecoded(FrameDecoder(format))) case V1.WebsocketSimpleStartupIngest( format, wsUrl, initMessages, keepAliveProtocol, _, encoding, ) => WebSocketClientSource(wsUrl, initMessages, keepAliveProtocol, Charset.forName(encoding), meter)( system, ).framedSource .map(_.toDecoded(FrameDecoder(format))) } } //V2 configuration def apply(src: FramedSource, format: IngestFormat)(implicit protobufCache: ProtobufSchemaCache, avroCache: AvroSchemaCache, ): DecodedSource = src.toDecoded(FrameDecoder(format)) // build from v2 configuration def apply( name: String, config: V2IngestConfiguration, meter: IngestMeter, system: ActorSystem, fileAccessPolicy: FileAccessPolicy, )(implicit protobufCache: ProtobufSchemaCache, avroCache: AvroSchemaCache, logConfig: LogConfig, ): ValidatedNel[BaseError, DecodedSource] = config.source match { case FileIngest(format, path, mode, maximumLineSize, startOffset, limit, charset, recordDecoders) => FileSource .srcFromIngest(path, mode, fileAccessPolicy) .andThen { validatedSource => FileSource.decodedSourceFromFileStream( validatedSource, format, charset, maximumLineSize.getOrElse(1000000), //TODO - To optional IngestBounds(startOffset, limit), meter, recordDecoders.map(ContentDecoder(_)), ) } case StdInputIngest(format, maximumLineSize, charset) => FileSource.decodedSourceFromFileStream( stdInSource, format, charset, maximumLineSize.getOrElse(1000000), //TODO IngestBounds(), meter, Seq(), ) case S3Ingest(format, bucketName, key, creds, maximumLineSize, startOffset, limit, charset, recordDecoders) => FileSource.decodedSourceFromFileStream( s3Source(bucketName, key, creds)(system), format, charset, maximumLineSize.getOrElse(1000000), //TODO IngestBounds(startOffset, limit), meter, recordDecoders.map(ContentDecoder(_)), ) case NumberIteratorIngest(_, startAtOffset, ingestLimit) => NumberIteratorSource(IngestBounds(startAtOffset, ingestLimit), meter).decodedSource.valid case WebsocketIngest(format, wsUrl, initMessages, keepAliveProtocol, charset) => WebSocketClientSource(wsUrl, initMessages, keepAliveProtocol, charset, meter)(system).framedSource .map(_.toDecoded(FrameDecoder(format))) case KinesisIngest(format, streamName, shardIds, creds, region, iteratorType, numRetries, recordDecoders) => KinesisSource( streamName, shardIds, creds, region, iteratorType, numRetries, //TODO not currently supported meter, recordDecoders.map(ContentDecoder(_)), )(ExecutionContext.parasitic).framedSource.map(_.toDecoded(FrameDecoder(format))) case KinesisKclIngest( kinesisStreamName, applicationName, format, credentialsOpt, regionOpt, iteratorType, numRetries, recordDecoders, schedulerSourceSettings, checkpointSettings, advancedSettings, ) => KinesisKclSrc( kinesisStreamName = kinesisStreamName, applicationName = applicationName, meter = meter, credentialsOpt = credentialsOpt, regionOpt = regionOpt, initialPosition = iteratorType, numRetries = numRetries, decoders = recordDecoders.map(ContentDecoder(_)), schedulerSettings = schedulerSourceSettings, checkpointSettings = checkpointSettings, advancedSettings = advancedSettings, )(ExecutionContext.parasitic).framedSource.map(_.toDecoded(FrameDecoder(format))) case ServerSentEventIngest(format, url, recordDecoders) => ServerSentEventSource(url, meter, recordDecoders.map(ContentDecoder(_)))(system).framedSource .map(_.toDecoded(FrameDecoder(format))) case SQSIngest( format, queueUrl, readParallelism, credentialsOpt, regionOpt, deleteReadMessages, recordDecoders, ) => SqsSource( queueUrl, readParallelism, credentialsOpt, regionOpt, deleteReadMessages, meter, recordDecoders.map(ContentDecoder(_)), ).framedSource.map(_.toDecoded(FrameDecoder(format))) case KafkaIngest( format, topics, bootstrapServers, groupId, securityProtocol, maybeExplicitCommit, autoOffsetReset, sslKeystorePassword, sslTruststorePassword, sslKeyPassword, saslJaasConfig, kafkaProperties, endingOffset, recordDecoders, ) => KafkaSource( topics, bootstrapServers, groupId.getOrElse(name), securityProtocol, maybeExplicitCommit, autoOffsetReset, kafkaProperties, endingOffset, recordDecoders.map(ContentDecoder(_)), meter, system, sslKeystorePassword, sslTruststorePassword, sslKeyPassword, saslJaasConfig, ).framedSource.map(_.toDecoded(FrameDecoder(format))) case ReactiveStreamIngest(format, url, port) => ReactiveSource(url, port, meter)(system).framedSource.map(_.toDecoded(FrameDecoder(format))) case WebSocketFileUpload(format) => val decoding = FileSource.decodingFoldableFrom(format, meter, Int.MaxValue) implicit val mat: Materializer = stream.Materializer(system) val (hubSink, hubSource) = MergeHub .source[decoding.Element](perProducerBufferSize = 16) .toMat(Sink.asPublisher(fanout = false))(Keep.both) .run() val sourceFromPublisher = Source.fromPublisher(hubSource).mapMaterializedValue(_ => NotUsed) val decodingHub = new DecodingHub { override type Element = decoding.Element override val source: Source[decoding.Element, NotUsed] = sourceFromPublisher override val dataFoldableFrom: DataFoldableFrom[decoding.Element] = decoding.dataFoldableFrom def decodingFlow: Flow[ByteString, Element, NotUsed] = decoding.decodingFlow def sink: Sink[Element, NotUsed] = hubSink } new com.thatdot.quine.app.model.ingest2.sources.WebSocketFileUploadSource(meter, decodingHub).valid } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/source/FramedSource.scala ================================================ package com.thatdot.quine.app.model.ingest2.source import scala.util.Try import org.apache.pekko.stream.scaladsl.{Flow, Source} import org.apache.pekko.{Done, NotUsed} import com.thatdot.data.DataFoldableFrom import com.thatdot.quine.app.ShutdownSwitch import com.thatdot.quine.app.model.ingest2.codec.FrameDecoder import com.thatdot.quine.app.routes.IngestMeter /** Define a source in terms of Frames it can return. * * A Frame is a chunk of an original data source that contains * an Array of bytes representing a single element. * To retrieve decoded values a FramedSource must be paired with a [[com.thatdot.quine.app.ingest2.codec.FrameDecoder]]. * * Frames are defined by the ingest type, e.g. * - Kafka: Content delimited * - SQS:message * - Kinesis: Record * * Decoded Formats * - CSV with header (Map[String,String]) * - CSV rows (Iterable[String]) * - Json * - String * - Drop (ignoring) * - Array[Byte] * - Protobuf Dynamic Message * * The stream defined as a part of a framed source must have metering * as well as any stream features already applied. */ trait FramedSource { type SrcFrame val stream: Source[SrcFrame, ShutdownSwitch] val meter: IngestMeter def content(input: SrcFrame): Array[Byte] def foldableFrame: DataFoldableFrom[SrcFrame] /** Note that the ack flow is only applied at the usage site (e.g. directly * in quine/novelty). This is because the ack is applied after the platform * specific use (e.g. insert into graph). */ val ack: Flow[SrcFrame, Done, NotUsed] = Flow.fromFunction(_ => Done) /** Close any associated resources after terminating the stream. */ def onTermination(): Unit = () /** Pair a framed source with a decoder in order to interpret the raw * frame data. * * Any type for which there is a decoder is foldable into * common types. */ def toDecoded[DecodedA](decoder: FrameDecoder[DecodedA]): DecodedSource = new DecodedSource(meter) { type Decoded = DecodedA type Frame = SrcFrame val foldableFrame: DataFoldableFrom[SrcFrame] = FramedSource.this.foldableFrame val foldable: DataFoldableFrom[Decoded] = decoder.foldable override def content(input: SrcFrame): Array[Byte] = FramedSource.this.content(input) private val deserializationTimer = this.meter.unmanagedDeserializationTimer def stream: Source[(() => Try[Decoded], Frame), ShutdownSwitch] = FramedSource.this.stream.map { envelope => val timer = deserializationTimer.time() val decoded = () => decoder.decode(content(envelope)) decoded().foreach(_ => timer.stop()) // only time successful deserializations decoded -> envelope } override def ack: Flow[SrcFrame, Done, NotUsed] = FramedSource.this.ack override def onTermination(): Unit = FramedSource.this.onTermination() } } object FramedSource { /** Construct a framed source from a raw stream of frames along with a definition of how to extract * bytes from the frame. * * Any features this source supports must be applied before calling this method. */ def apply[Frame]( source: Source[Frame, ShutdownSwitch], ingestMeter: IngestMeter, decodeFrame: Frame => Array[Byte], foldableFrameInp: DataFoldableFrom[Frame], ackFlow: Flow[Frame, Done, NotUsed] = Flow.fromFunction[Frame, Done](_ => Done), terminationHook: () => Unit = () => (), ): FramedSource = new FramedSource { type SrcFrame = Frame val stream: Source[Frame, ShutdownSwitch] = source val meter: IngestMeter = ingestMeter override def content(input: Frame): Array[Byte] = decodeFrame(input) override val foldableFrame: DataFoldableFrom[SrcFrame] = foldableFrameInp override def onTermination(): Unit = terminationHook() override val ack = ackFlow } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/source/IngestBounds.scala ================================================ package com.thatdot.quine.app.model.ingest2.source case class IngestBounds(startAtOffset: Long = 0L, ingestLimit: Option[Long] = None) ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/source/QuineIngestQuery.scala ================================================ package com.thatdot.quine.app.model.ingest2.source import scala.concurrent.{ExecutionContext, Future} import scala.util.Try import com.typesafe.scalalogging.LazyLogging import com.thatdot.common.logging.Log.LogConfig import com.thatdot.quine.app.model.ingest2.StreamingFormat.DropFormat import com.thatdot.quine.app.model.ingest2.V2IngestEntities.QuineIngestConfiguration import com.thatdot.quine.app.util.AtLeastOnceCypherQuery import com.thatdot.quine.compiler import com.thatdot.quine.graph.cypher.{CompiledQuery, Location} import com.thatdot.quine.graph.{CypherOpsGraph, NamespaceId, cypher} import com.thatdot.quine.routes._ trait QuineIngestQuery { def apply( deserialized: cypher.Value, ): Future[Unit] } case class QuineValueIngestQuery( graph: CypherOpsGraph, query: CompiledQuery[Location.Anywhere], parameter: String, namespaceId: NamespaceId, )(implicit logConfig: LogConfig) extends (cypher.Value => Future[Unit]) with QuineIngestQuery { lazy val atLeastOnceQuery: AtLeastOnceCypherQuery = AtLeastOnceCypherQuery(query, parameter, "ingest-query") def apply( deserialized: cypher.Value, ): Future[Unit] = atLeastOnceQuery .stream(deserialized, namespaceId)(graph) .run()(graph.materializer) .map(_ => ())(ExecutionContext.parasitic) } case object QuineDropIngestQuery extends QuineIngestQuery { def apply( deserialized: cypher.Value, ): Future[Unit] = Future.successful(()) } object QuineValueIngestQuery extends LazyLogging { def apply(config: QuineIngestConfiguration, graph: CypherOpsGraph, namespaceId: NamespaceId)(implicit logConfig: LogConfig, ): QuineIngestQuery = config.source.format match { case DropFormat => QuineDropIngestQuery case _ => QuineValueIngestQuery.build(graph, config.query, config.parameter, namespaceId).get } def getQueryWarnings(query: String, parameter: String): Set[String] = Try(compiler.cypher.compile(query, unfixedParameters = Seq(parameter))) .map { compiled: CompiledQuery[Location.Anywhere] => var warnings: Set[String] = Set() if (compiled.query.canContainAllNodeScan) { warnings = warnings ++ Set( "Cypher query may contain full node scan; for improved performance, re-write without full node scan. " + (compiled.queryText match { case Some(text) => "The provided query was: " + text case None => "" }), ) } if (!compiled.query.isIdempotent) { warnings = warnings ++ Set( """Could not verify that the provided ingest query is idempotent. If timeouts occur, query |execution may be retried and duplicate data may be created.""".stripMargin.replace( '\n', ' ', ), ) } warnings } .getOrElse(Set()) def apply( config: IngestStreamConfiguration, //v1 graph: CypherOpsGraph, namespaceId: NamespaceId, )(implicit logConfig: LogConfig): QuineIngestQuery = { def fromStreamedRecordFormat(f: StreamedRecordFormat): QuineIngestQuery = f match { case StreamedRecordFormat.Drop => QuineDropIngestQuery case s: IngestQuery => QuineValueIngestQuery.build(graph, s.query, s.parameter, namespaceId).get case _ => throw new UnsupportedOperationException(s"Can't extract query and parameters from $f") } def fromFileIngestFormat(f: FileIngestFormat): QuineIngestQuery = QuineValueIngestQuery.build(graph, f.query, f.parameter, namespaceId).get config match { case k: KafkaIngest => fromStreamedRecordFormat(k.format) case k: KinesisIngest => fromStreamedRecordFormat(k.format) case s: ServerSentEventsIngest => fromStreamedRecordFormat(s.format) case s: SQSIngest => fromStreamedRecordFormat(s.format) case s: WebsocketSimpleStartupIngest => fromStreamedRecordFormat(s.format) case s: FileIngest => fromFileIngestFormat(s.format) case s: S3Ingest => fromFileIngestFormat(s.format) case s: StandardInputIngest => fromFileIngestFormat(s.format) case s: NumberIteratorIngest => fromFileIngestFormat(s.format) case _ => throw new UnsupportedOperationException(s"Can't extract ingest query from $config") } } def build( graph: CypherOpsGraph, query: String, parameter: String, namespaceId: NamespaceId, )(implicit logConfig: LogConfig): Try[QuineValueIngestQuery] = Try(compiler.cypher.compile(query, unfixedParameters = Seq(parameter))).map { compiled: CompiledQuery[Location.Anywhere] => if (compiled.query.canContainAllNodeScan) { // TODO this should be lifted to an (overridable, see allowAllNodeScan in SQ outputs) API error logger.warn( "Cypher query may contain full node scan; for improved performance, re-write without full node scan. " + "The provided query was: " + compiled.queryText, ) } if (!compiled.query.isIdempotent) { // TODO allow user to override this (see: allowAllNodeScan) and only retry when idempotency is asserted logger.warn( """Could not verify that the provided ingest query is idempotent. If timeouts occur, query |execution may be retried and duplicate data may be created.""".stripMargin.replace('\n', ' '), ) } QuineValueIngestQuery(graph, compiled, parameter, namespaceId) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/sources/CsvFileSource.scala ================================================ package com.thatdot.quine.app.model.ingest2.sources import java.nio.charset.{Charset, StandardCharsets} import scala.util.{Success, Try} import org.apache.pekko.NotUsed import org.apache.pekko.stream.connectors.csv.scaladsl.{CsvParsing, CsvToMap} import org.apache.pekko.stream.scaladsl.{Flow, Keep, Source} import org.apache.pekko.util.ByteString import com.thatdot.data.DataFoldableFrom import com.thatdot.data.DataFoldableFrom._ import com.thatdot.quine.app.ShutdownSwitch import com.thatdot.quine.app.model.ingest.serialization.ContentDecoder import com.thatdot.quine.app.model.ingest2.source.{DecodedSource, IngestBounds} import com.thatdot.quine.app.model.ingest2.sources import com.thatdot.quine.app.routes.IngestMeter case class CsvFileSource( src: Source[ByteString, NotUsed], ingestBounds: IngestBounds, ingestMeter: IngestMeter, headers: Either[Boolean, List[String]], charset: Charset, delimiterChar: Byte, quoteChar: Byte, escapeChar: Byte, maximumLineSize: Int, decoders: Seq[ContentDecoder] = Seq(), ) { private val csvLineParser: Flow[ByteString, List[ByteString], NotUsed] = { val lineScanner = CsvParsing.lineScanner(delimiterChar, quoteChar, escapeChar, maximumLineSize) charset match { case StandardCharsets.UTF_8 | StandardCharsets.ISO_8859_1 | StandardCharsets.US_ASCII => lineScanner case _ => sources .transcodingFlow(charset) .via(lineScanner) .map(_.map(bs => ByteString(bs.decodeString(StandardCharsets.UTF_8), charset))) } } def decodedSource: DecodedSource = headers match { case Right(h) => toDecodedSource(CsvToMap.withHeadersAsStrings(charset, h: _*), stringMapDataFoldable) case Left(true) => toDecodedSource(CsvToMap.toMapAsStrings(charset), stringMapDataFoldable) case Left(false) => toDecodedSource( Flow[List[ByteString]] .map(l => l.map(bs => bs.decodeString(charset))), stringIterableDataFoldable, ) } private def toDecodedSource[T](parsingFlow: Flow[List[ByteString], T, NotUsed], foldableFrom: DataFoldableFrom[T]) = new DecodedSource(ingestMeter) { type Decoded = T type Frame = ByteString override val foldableFrame: DataFoldableFrom[ByteString] = byteStringDataFoldable override def content(input: ByteString): Array[Byte] = input.toArrayUnsafe() def stream: Source[(() => Try[T], Frame), ShutdownSwitch] = { val csvStream: Source[() => Success[T], NotUsed] = src .via(decompressingFlow(decoders)) .via(csvLineParser) .via(boundingFlow(ingestBounds)) .wireTap(bs => meter.mark(bs.map(_.length).sum)) .via(parsingFlow) .map(value => () => scala.util.Success(value)) //TODO meaningfully extract errors withKillSwitches(csvStream.zipWith(src)(Keep.both)) } val foldable: DataFoldableFrom[T] = foldableFrom } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/sources/FileSource.scala ================================================ package com.thatdot.quine.app.model.ingest2.sources import java.nio.charset.{Charset, StandardCharsets} import org.apache.pekko.NotUsed import org.apache.pekko.http.scaladsl.common.EntityStreamingSupport import org.apache.pekko.stream.connectors.csv.scaladsl.{CsvParsing, CsvToMap} import org.apache.pekko.stream.scaladsl.{Flow, Framing, JsonFraming, Source} import org.apache.pekko.util.ByteString import cats.data.ValidatedNel import cats.implicits.catsSyntaxValidatedId import cats.syntax.either._ import com.typesafe.scalalogging.LazyLogging import io.circe.parser import com.thatdot.common.logging.Log.LogConfig import com.thatdot.data.DataFoldableFrom import com.thatdot.quine.app.ShutdownSwitch import com.thatdot.quine.app.config.FileAccessPolicy import com.thatdot.quine.app.model.ingest.NamedPipeSource import com.thatdot.quine.app.model.ingest.serialization.ContentDecoder import com.thatdot.quine.app.model.ingest2.FileFormat import com.thatdot.quine.app.model.ingest2.codec.{CypherStringDecoder, FrameDecoder, JsonDecoder} import com.thatdot.quine.app.model.ingest2.source.{DecodedSource, FramedSource, IngestBounds} import com.thatdot.quine.app.routes.IngestMeter import com.thatdot.quine.routes.FileIngestMode import com.thatdot.quine.util.BaseError /** Build a framed source from a file-like stream of ByteStrings. In practice this * means a finite, non-streaming source: File sources, S3 file sources, and std ingest. * * This framing provides * - ingest bounds * - char encoding * - compression * - record delimit sizing * - metering * * so these capabilities should not be applied to the provided src stream. */ case class FramedFileSource( src: Source[ByteString, NotUsed], charset: Charset = DEFAULT_CHARSET, delimiterFlow: Flow[ByteString, ByteString, NotUsed], ingestBounds: IngestBounds = IngestBounds(), decoders: Seq[ContentDecoder] = Seq(), ingestMeter: IngestMeter, ) { val source: Source[ByteString, NotUsed] = src .via(decompressingFlow(decoders)) .via(transcodingFlow(charset)) .via(delimiterFlow) // TODO note this will not properly delimit streaming binary formats (e.g. protobuf) //Note: bounding is applied _after_ delimiter. .via(boundingFlow(ingestBounds)) .via(metered(ingestMeter, _.size)) private def framedSource: FramedSource = new FramedSource { type SrcFrame = ByteString val stream: Source[SrcFrame, ShutdownSwitch] = withKillSwitches(source) val meter: IngestMeter = ingestMeter def content(input: SrcFrame): Array[Byte] = input.toArrayUnsafe() val foldableFrame: DataFoldableFrom[SrcFrame] = DataFoldableFrom.byteStringDataFoldable } def decodedSource[A](decoder: FrameDecoder[A]): DecodedSource = framedSource.toDecoded(decoder) } object FileSource extends LazyLogging { private def jsonDelimitingFlow(maximumLineSize: Int): Flow[ByteString, ByteString, NotUsed] = EntityStreamingSupport.json(maximumLineSize).framingDecoder private def lineDelimitingFlow(maximumLineSize: Int): Flow[ByteString, ByteString, NotUsed] = Framing .delimiter(ByteString("\n"), maximumLineSize, allowTruncation = true) .map(line => if (!line.isEmpty && line.last == '\r') line.dropRight(1) else line) def srcFromIngest( path: String, fileIngestMode: Option[FileIngestMode], fileAccessPolicy: FileAccessPolicy, )(implicit logConfig: LogConfig, ): ValidatedNel[BaseError, Source[ByteString, NotUsed]] = FileAccessPolicy.validatePath(path, fileAccessPolicy).map { validatedPath => NamedPipeSource.fileOrNamedPipeSource(validatedPath, fileIngestMode) } def decodedSourceFromFileStream( fileSource: Source[ByteString, NotUsed], format: FileFormat, charset: Charset, maximumLineSize: Int, bounds: IngestBounds = IngestBounds(), meter: IngestMeter, decoders: Seq[ContentDecoder] = Seq(), ): ValidatedNel[BaseError, DecodedSource] = format match { case FileFormat.LineFormat => FramedFileSource( fileSource, charset, lineDelimitingFlow(maximumLineSize), bounds, decoders, meter, ).decodedSource(CypherStringDecoder).valid case FileFormat.JsonLinesFormat => FramedFileSource( fileSource, charset, lineDelimitingFlow(maximumLineSize), bounds, decoders, meter, ).decodedSource(JsonDecoder).valid case FileFormat.JsonFormat => FramedFileSource( fileSource, charset, jsonDelimitingFlow(maximumLineSize), bounds, decoders, meter, ).decodedSource(JsonDecoder).valid case FileFormat.CsvFormat(headers, delimiter, quoteChar, escapeChar) => CsvFileSource( fileSource, bounds, meter, headers, charset, delimiter.byte, quoteChar.byte, escapeChar.byte, maximumLineSize, decoders, ).decodedSource.valid } def decodingFoldableFrom(fileFormat: FileFormat, meter: IngestMeter, maximumLineSize: Int): DecodingFoldableFrom = fileFormat match { case FileFormat.LineFormat => new DecodingFoldableFrom { override type Element = String override def decodingFlow: Flow[ByteString, Element, NotUsed] = lineDelimitingFlow(maximumLineSize).map { byteString => val bytes = byteString.toArray meter.mark(bytes.length) new String(bytes, StandardCharsets.UTF_8) } override val dataFoldableFrom: DataFoldableFrom[String] = DataFoldableFrom.stringDataFoldable } case FileFormat.JsonLinesFormat => new DecodingFoldableFrom { override type Element = io.circe.Json override def decodingFlow: Flow[ByteString, Element, NotUsed] = Framing .delimiter(ByteString("\n"), maximumFrameLength = Int.MaxValue, allowTruncation = true) .wireTap(line => meter.mark(line.length)) .map((bs: ByteString) => parser.parse(bs.utf8String).valueOr(throw _)) override val dataFoldableFrom: DataFoldableFrom[Element] = DataFoldableFrom.jsonDataFoldable } case FileFormat.JsonFormat => new DecodingFoldableFrom { override type Element = io.circe.Json override def decodingFlow: Flow[ByteString, Element, NotUsed] = JsonFraming .objectScanner(maximumObjectLength = Int.MaxValue) .wireTap(obj => meter.mark(obj.length)) .map((bs: ByteString) => parser.parse(bs.utf8String).valueOr(throw _)) override val dataFoldableFrom: DataFoldableFrom[Element] = DataFoldableFrom.jsonDataFoldable } case FileFormat.CsvFormat(headers, delimiter, quoteChar, escapeChar) => def lineBytes(line: List[ByteString]): Int = line.foldLeft(0)((size, field) => size + field.length) def meterLineBytes: List[ByteString] => Unit = { line => meter.mark(lineBytes(line)) } headers match { case Left(firstLineIsHeader) => if (firstLineIsHeader) { new DecodingFoldableFrom { override type Element = Map[String, String] override val dataFoldableFrom: DataFoldableFrom[Element] = DataFoldableFrom.stringMapDataFoldable override def decodingFlow: Flow[ByteString, Element, NotUsed] = CsvParsing .lineScanner( delimiter = delimiter.byte, quoteChar = quoteChar.byte, escapeChar = escapeChar.byte, ) .wireTap(meterLineBytes) .via(CsvToMap.toMapAsStrings()) } } else { new DecodingFoldableFrom { override type Element = Vector[String] override val dataFoldableFrom: DataFoldableFrom[Element] = DataFoldableFrom.stringVectorDataFoldable override def decodingFlow: Flow[ByteString, Element, NotUsed] = CsvParsing .lineScanner( delimiter = delimiter.byte, quoteChar = quoteChar.byte, escapeChar = escapeChar.byte, ) .wireTap(meterLineBytes) .map(byteStringList => byteStringList.map(_.utf8String).toVector) } } case Right(staticFieldNames) => new DecodingFoldableFrom { override type Element = Map[String, String] override val dataFoldableFrom: DataFoldableFrom[Element] = DataFoldableFrom.stringMapDataFoldable override def decodingFlow: Flow[ByteString, Element, NotUsed] = CsvParsing .lineScanner( delimiter = delimiter.byte, quoteChar = quoteChar.byte, escapeChar = escapeChar.byte, ) .wireTap(meterLineBytes) .via(CsvToMap.withHeaders(staticFieldNames: _*).map(_.view.mapValues(_.utf8String).toMap)) } } } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/sources/FramedSourceProvider.scala ================================================ package com.thatdot.quine.app.model.ingest2.sources import cats.data.ValidatedNel import com.thatdot.quine.app.model.ingest2.source.FramedSource import com.thatdot.quine.util.BaseError abstract class FramedSourceProvider[T] { val validators: List[PartialFunction[T, String]] = List() /** Attempt to build a framed source. Validation failures * are returned as part of the ValidatedNel failures. */ def framedSource: ValidatedNel[BaseError, FramedSource] } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/sources/KafkaSource.scala ================================================ package com.thatdot.quine.app.model.ingest2.sources import java.util.UUID import scala.concurrent.duration.{Duration, FiniteDuration, MILLISECONDS} import scala.jdk.OptionConverters.RichOptional import scala.util.{Failure, Success, Try} import org.apache.pekko.actor.ActorSystem import org.apache.pekko.kafka.scaladsl.{Committer, Consumer} import org.apache.pekko.kafka.{ CommitDelivery, CommitterSettings, ConsumerMessage, ConsumerSettings, Subscription, Subscriptions => KafkaSubscriptions, } import org.apache.pekko.stream.scaladsl.{Flow, Source} import org.apache.pekko.{Done, NotUsed} import cats.data.ValidatedNel import cats.implicits.catsSyntaxOption import cats.syntax.functor._ import cats.syntax.validated._ import org.apache.kafka.clients.CommonClientConfigs.SECURITY_PROTOCOL_CONFIG import org.apache.kafka.clients.consumer.ConsumerConfig.AUTO_OFFSET_RESET_CONFIG import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.config.ConfigException import org.apache.kafka.common.serialization.{ByteArrayDeserializer, Deserializer} import com.thatdot.api.v2.SaslJaasConfig import com.thatdot.common.logging.Log._ import com.thatdot.common.security.Secret import com.thatdot.data.{DataFoldableFrom, DataFolderTo} import com.thatdot.quine.app.KafkaKillSwitch import com.thatdot.quine.app.model.ingest.serialization.ContentDecoder import com.thatdot.quine.app.model.ingest.util.KafkaSettingsValidator import com.thatdot.quine.app.model.ingest2.source.FramedSource import com.thatdot.quine.app.model.ingest2.sources.KafkaSource._ import com.thatdot.quine.app.routes.IngestMeter import com.thatdot.quine.exceptions.KafkaValidationException import com.thatdot.quine.routes.KafkaOffsetCommitting.ExplicitCommit import com.thatdot.quine.routes._ import com.thatdot.quine.util.BaseError object KafkaSource { /** Stream values where we won't need to retain committable offset information */ type NoOffset = ConsumerRecord[Array[Byte], Array[Byte]] /** Stream values where we'll retain committable offset information */ type WithOffset = ConsumerMessage.CommittableMessage[Array[Byte], Array[Byte]] //See [[KafkaSrcDef]], same sans decoder def buildConsumerSettings( bootstrapServers: String, groupId: String, autoOffsetReset: KafkaAutoOffsetReset, kafkaProperties: KafkaIngest.KafkaProperties, securityProtocol: KafkaSecurityProtocol, decoders: Seq[ContentDecoder], system: ActorSystem, ): ConsumerSettings[Array[Byte], Array[Byte]] = { val deserializer: Deserializer[Array[Byte]] = (_: String, data: Array[Byte]) => ContentDecoder.decode(decoders, data) val keyDeserializer: ByteArrayDeserializer = new ByteArrayDeserializer() //NO-OP // Create Map of kafka properties: combination of user passed properties from `kafkaProperties` // as well as those templated by `KafkaAutoOffsetReset` and `KafkaSecurityProtocol` // NOTE: This divergence between how kafka properties are set should be resolved, most likely by removing // `KafkaAutoOffsetReset`, `KafkaSecurityProtocol`, and `KafkaOffsetCommitting.AutoCommit` // in favor of `KafkaIngest.KafkaProperties`. Additionally, the current "template" properties override those in kafkaProperties val properties = kafkaProperties ++ Map( AUTO_OFFSET_RESET_CONFIG -> autoOffsetReset.name, SECURITY_PROTOCOL_CONFIG -> securityProtocol.name, ) ConsumerSettings(system, keyDeserializer, deserializer) .withBootstrapServers(bootstrapServers) .withGroupId(groupId) // Note: The ConsumerSettings stop-timeout delays stopping the Kafka Consumer // and the stream, but when using drainAndShutdown that delay is not required and can be set to zero (as below). // https://doc.akka.io/docs/alpakka-kafka/current/consumer.html#draining-control // We're calling .drainAndShutdown on the Kafka [[Consumer.Control]] .withStopTimeout(Duration.Zero) .withProperties(properties) } def subscription(topics: Either[KafkaIngest.Topics, KafkaIngest.PartitionAssignments]): Subscription = topics.fold( KafkaSubscriptions.topics, assignments => KafkaSubscriptions.assignment( ( for { (topic, partitions) <- assignments partition <- partitions } yield new TopicPartition(topic, partition) ).toSet, ), ) def ackFlow( koc: KafkaOffsetCommitting.ExplicitCommit, system: ActorSystem, ): Flow[WithOffset, Done, NotUsed] = { val committer: Flow[ConsumerMessage.Committable, ConsumerMessage.CommittableOffsetBatch, NotUsed] = Committer .batchFlow( CommitterSettings(system) .withMaxBatch(koc.maxBatch) .withMaxInterval(FiniteDuration(koc.maxIntervalMillis.toLong, MILLISECONDS)) .withParallelism(koc.parallelism) .withDelivery( if (koc.waitForCommitConfirmation) CommitDelivery.WaitForAck else CommitDelivery.SendAndForget, ), ) // Note - In cases where we are in ExplicitCommit mode with CommitDelivery.WaitForAck _and_ there is an // endingOffset set , we will get a akka.kafka.CommitTimeoutException here, since the commit delivery is // batched and it's possible to have remaining commit offsets remaining that don't get sent. // // e.g. partition holds 1000 values, we set koc.maxBatch=100, and endingOffset to 150. Last ack sent will // be 100, last 50 will not be sent. Flow[WithOffset] .map(_.committableOffset) .via(committer) .map(_ => Done) } val withOffsetFoldable: DataFoldableFrom[WithOffset] = new DataFoldableFrom[WithOffset] { def fold[B](value: WithOffset, folder: DataFolderTo[B]): B = { val recordBuilder = folder.mapBuilder() recordBuilder.add("value", folder.bytes(value.record.value())) // Key can be null if not specified per Kafka API Option(value.record.key()).foreach(k => recordBuilder.add("key", folder.bytes(k))) recordBuilder.add("topic", folder.string(value.record.topic())) recordBuilder.add("partition", folder.integer(value.record.partition().toLong)) recordBuilder.add("offset", folder.integer(value.record.offset())) recordBuilder.add("timestamp", folder.integer(value.record.timestamp())) recordBuilder.add("timestampType", folder.string(value.record.timestampType().name())) value.record.leaderEpoch().toScala.foreach { epoch => recordBuilder.add("leaderEpoch", folder.integer(epoch.toLong)) } recordBuilder.add("serializedKeySize", folder.integer(value.record.serializedKeySize().toLong)) recordBuilder.add("serializedValueSize", folder.integer(value.record.serializedValueSize().toLong)) if (value.record.headers() != null && value.record.headers().iterator().hasNext) { val headersBuilder = folder.mapBuilder() val it = value.record.headers().iterator() while (it.hasNext) { val h = it.next() headersBuilder.add(h.key(), folder.bytes(h.value())) } recordBuilder.add("headers", headersBuilder.finish()) } val partitionBuilder = folder.mapBuilder() val committableOffset = value.committableOffset val partitionOffset = committableOffset.partitionOffset partitionBuilder.add("topic", folder.string(partitionOffset.key.topic)) partitionBuilder.add("partition", folder.integer(partitionOffset.key.partition.toLong)) partitionBuilder.add("offset", folder.integer(partitionOffset.offset)) val committableOffsetBuilder = folder.mapBuilder() committableOffsetBuilder.add("partitionOffset", partitionBuilder.finish()) committableOffset match { case metadata: ConsumerMessage.CommittableOffsetMetadata => committableOffsetBuilder.add("metadata", folder.string(metadata.metadata)) } val committableMessageBuilder = folder.mapBuilder() committableMessageBuilder.add("record", recordBuilder.finish()) committableMessageBuilder.add("committableOffset", committableOffsetBuilder.finish()) committableMessageBuilder.finish() } } val noOffsetFoldable: DataFoldableFrom[NoOffset] = new DataFoldableFrom[NoOffset] { def fold[B](value: NoOffset, folder: DataFolderTo[B]): B = { val builder = folder.mapBuilder() builder.add("value", folder.bytes(value.value())) // Key can be null if not specified per Kafka API Option(value.key()).foreach(k => builder.add("key", folder.bytes(k))) builder.add("topic", folder.string(value.topic())) builder.add("partition", folder.integer(value.partition().toLong)) builder.add("offset", folder.integer(value.offset())) builder.add("timestamp", folder.integer(value.timestamp())) builder.add("timestampType", folder.string(value.timestampType().name())) value.leaderEpoch().toScala.foreach { epoch => builder.add("leaderEpoch", folder.integer(epoch.toLong)) } builder.add("serializedKeySize", folder.integer(value.serializedKeySize().toLong)) builder.add("serializedValueSize", folder.integer(value.serializedValueSize().toLong)) if (value.headers() != null && value.headers().iterator().hasNext) { val headersBuilder = folder.mapBuilder() val it = value.headers().iterator() while (it.hasNext) { val h = it.next() headersBuilder.add(h.key(), folder.bytes(h.value())) } builder.add("headers", headersBuilder.finish()) } builder.finish() } } } case class KafkaSource( topics: Either[KafkaIngest.Topics, KafkaIngest.PartitionAssignments], bootstrapServers: String, groupId: String, securityProtocol: KafkaSecurityProtocol, maybeExplicitCommit: Option[KafkaOffsetCommitting], autoOffsetReset: KafkaAutoOffsetReset, kafkaProperties: KafkaIngest.KafkaProperties, endingOffset: Option[Long], decoders: Seq[ContentDecoder], meter: IngestMeter, system: ActorSystem, sslKeystorePassword: Option[Secret] = None, sslTruststorePassword: Option[Secret] = None, sslKeyPassword: Option[Secret] = None, saslJaasConfig: Option[SaslJaasConfig] = None, ) extends FramedSourceProvider with LazySafeLogging { /** Log warnings for any kafkaProperties keys that will be overridden by typed Secret params. */ private def warnOnOverriddenProperties(): Unit = { val typedSecretKeys: Set[String] = Set.empty ++ sslKeystorePassword.map(_ => "ssl.keystore.password") ++ sslTruststorePassword.map(_ => "ssl.truststore.password") ++ sslKeyPassword.map(_ => "ssl.key.password") ++ saslJaasConfig.map(_ => "sasl.jaas.config") val overriddenKeys = kafkaProperties.keySet.intersect(typedSecretKeys) overriddenKeys.foreach { key => logger.warn( safe"Kafka property '${Safe(key)}' in kafkaProperties will be overridden by typed Secret parameter. " + safe"Remove '${Safe(key)}' from kafkaProperties to suppress this warning.", ) } } /** Merge typed secret params into Kafka properties. Typed params take precedence. * * Visible within `ingest2` for testing. */ private[ingest2] def effectiveProperties: Map[String, String] = { import Secret.Unsafe._ val secretProps: Map[String, String] = Map.empty ++ sslKeystorePassword.map("ssl.keystore.password" -> _.unsafeValue) ++ sslTruststorePassword.map("ssl.truststore.password" -> _.unsafeValue) ++ sslKeyPassword.map("ssl.key.password" -> _.unsafeValue) ++ saslJaasConfig.map("sasl.jaas.config" -> SaslJaasConfig.toJaasConfigString(_)) kafkaProperties ++ secretProps } def framedSource: ValidatedNel[BaseError, FramedSource] = Try { warnOnOverriddenProperties() saslJaasConfig.foreach(config => logger.info(safe"Kafka SASL config: $config")) val subs = subscription(topics) val consumerSettings: ConsumerSettings[Array[Byte], Array[Byte]] = buildConsumerSettings( bootstrapServers, groupId, autoOffsetReset, effectiveProperties, securityProtocol, decoders, system, ) val complaintsFromValidator: ValidatedNel[BaseError, Unit] = KafkaSettingsValidator .validateInput(consumerSettings.properties, assumeConfigIsFinal = true) .map(_.map(KafkaValidationException.apply)) .toInvalid(()) maybeExplicitCommit match { case Some(explicitCommit: ExplicitCommit) => // Committing source complaintsFromValidator.as { val consumer: Source[WithOffset, Consumer.Control] = Consumer.committableSource(consumerSettings, subs) val source: Source[WithOffset, KafkaKillSwitch] = endingOffset .fold(consumer)(o => consumer.takeWhile(r => r.record.offset() <= o)) .via(metered[WithOffset](meter, o => o.record.serializedValueSize())) .mapMaterializedValue(KafkaKillSwitch) FramedSource[WithOffset]( source, meter, input => input.record.value(), withOffsetFoldable, ackFlow(explicitCommit, system), ) } case None => // Non-committing source complaintsFromValidator.as { val consumer: Source[NoOffset, Consumer.Control] = Consumer.plainSource(consumerSettings, subs) val source = endingOffset .fold(consumer)(o => consumer.takeWhile(r => r.offset() <= o)) .via(metered[NoOffset](meter, o => o.serializedValueSize())) .mapMaterializedValue(KafkaKillSwitch) FramedSource[NoOffset](source, meter, noOffset => noOffset.value(), noOffsetFoldable) } } } match { case Success(result) => result case Failure(configEx: ConfigException) => val correlationId = UUID.randomUUID() logger.error( safe"Kafka ConfigException during source creation [correlationId: ${Safe(correlationId.toString)}]: ${Safe(configEx.getMessage)}", ) KafkaValidationException( s"Kafka configuration error check logs for [correlationId: ${correlationId.toString}]", ).invalidNel case Failure(exception) => val correlationId = UUID.randomUUID() logger.error( safe"Error during source creation [correlationId: ${Safe(correlationId.toString)}]: ${Safe(exception.getMessage)},", ) KafkaValidationException( s"A configuration error occurred check logs for [correlationId: ${correlationId.toString}]", ).invalidNel } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/sources/KinesisKclSrc.scala ================================================ package com.thatdot.quine.app.model.ingest2.sources import java.net.InetAddress import java.nio.ByteBuffer import java.util.{Calendar, Optional, UUID} import scala.concurrent.ExecutionContext import scala.concurrent.duration._ import scala.jdk.CollectionConverters._ import scala.jdk.DurationConverters.ScalaDurationOps import org.apache.pekko.stream.connectors.kinesis.scaladsl.KinesisSchedulerSource import org.apache.pekko.stream.connectors.kinesis.{ CommittableRecord, KinesisSchedulerCheckpointSettings, KinesisSchedulerSourceSettings => PekkoKinesisSchedulerSourceSettings, } import org.apache.pekko.stream.scaladsl.{Flow, Source} import org.apache.pekko.{Done, NotUsed} import cats.data.Validated.Valid import cats.data.ValidatedNel import com.typesafe.scalalogging.LazyLogging import software.amazon.awssdk.awscore.retry.AwsRetryStrategy import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration import software.amazon.awssdk.http.async.SdkAsyncHttpClient import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient import software.amazon.awssdk.retries.StandardRetryStrategy import software.amazon.awssdk.services.cloudwatch.CloudWatchAsyncClient import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient import software.amazon.awssdk.services.dynamodb.model.{BillingMode => AwsBillingMode} import software.amazon.awssdk.services.kinesis.KinesisAsyncClient import software.amazon.awssdk.services.kinesis.model.EncryptionType import software.amazon.kinesis.common.{ConfigsBuilder, InitialPositionInStream, InitialPositionInStreamExtended} import software.amazon.kinesis.coordinator.CoordinatorConfig.{ClientVersionConfig => AwsClientVersionConfig} import software.amazon.kinesis.coordinator.Scheduler import software.amazon.kinesis.leases.{NoOpShardPrioritization, ParentsFirstShardPrioritization} import software.amazon.kinesis.metrics.{MetricsLevel => AwsMetricsLevel} import software.amazon.kinesis.processor.{ShardRecordProcessorFactory, SingleStreamTracker} import software.amazon.kinesis.retrieval.fanout.FanOutConfig import software.amazon.kinesis.retrieval.polling.PollingConfig import com.thatdot.data.{DataFoldableFrom, DataFolderTo} import com.thatdot.quine.app.model.ingest.serialization.ContentDecoder import com.thatdot.quine.app.model.ingest.util.AwsOps import com.thatdot.quine.app.model.ingest.util.AwsOps.AwsBuilderOps import com.thatdot.quine.app.model.ingest2._ import com.thatdot.quine.app.model.ingest2.source.FramedSource import com.thatdot.quine.app.routes.IngestMeter import com.thatdot.quine.util.BaseError import com.thatdot.quine.{routes => V1} /** The definition of a source stream from Amazon Kinesis using KCL, * now translated to expose a framedSource. * * @param kinesisStreamName The name of the kinesis stream to start ingesting from * @param applicationName The name of the dynamo db table and cloud watch metrics, unless overridden * @param meter An instance of [[IngestMeter]] for metering the ingest flow * @param credentialsOpt The AWS credentials to access the stream (optional) * @param regionOpt The AWS region in which Kinesis resides (optional) * @param initialPosition The KCL initial position in stream describing where to begin reading records * @param numRetries The maximum number of retry attempts for AWS client calls * @param decoders A sequence of [[ContentDecoder]] for handling inbound Kinesis records * @param schedulerSettings Pekko Connectors scheduler settings * @param checkpointSettings Pekko Connectors checkpointing configuration * @param advancedSettings All additional configuration settings for KCL */ final case class KinesisKclSrc( kinesisStreamName: String, applicationName: String, meter: IngestMeter, credentialsOpt: Option[V1.AwsCredentials], regionOpt: Option[V1.AwsRegion], initialPosition: InitialPosition, numRetries: Int, decoders: Seq[ContentDecoder], schedulerSettings: KinesisSchedulerSourceSettings, checkpointSettings: KinesisCheckpointSettings, advancedSettings: KCLConfiguration, )(implicit val ec: ExecutionContext) extends FramedSourceProvider with LazyLogging { import KinesisKclSrc._ /** Builds and returns a `FramedSource`, wrapped in a `ValidatedNel` for error handling. * This method instantiates Kinesis, DynamoDB, and CloudWatch async clients, * configures a KCL scheduler, and returns a framed Akka Stream source that * emits byte representation of [[CommittableRecord]] instances. * * @return A [[ValidatedNel]] of [[BaseError]] or a [[FramedSource]]. */ override def framedSource: ValidatedNel[BaseError, FramedSource] = { val httpClient = buildAsyncHttpClient val kinesisClient = buildAsyncClient(httpClient, credentialsOpt, regionOpt, numRetries) val dynamoClient: DynamoDbAsyncClient = DynamoDbAsyncClient.builder .credentials(credentialsOpt) .httpClient(httpClient) .region(regionOpt) .build val cloudWatchClient: CloudWatchAsyncClient = CloudWatchAsyncClient.builder .credentials(credentialsOpt) .httpClient(httpClient) .region(regionOpt) .build val schedulerSourceSettings: PekkoKinesisSchedulerSourceSettings = { val base = PekkoKinesisSchedulerSourceSettings.defaults val withSize = schedulerSettings.bufferSize.fold(base)(base.withBufferSize) val withSizeAndTimeout = schedulerSettings.backpressureTimeoutMillis.fold(withSize) { t => withSize.withBackpressureTimeout(java.time.Duration.ofMillis(t)) } withSizeAndTimeout } val builder: ShardRecordProcessorFactory => Scheduler = { recordProcessorFactory => val initialPositionInStream: InitialPositionInStreamExtended = initialPosition match { case InitialPosition.Latest => InitialPositionInStreamExtended.newInitialPosition(InitialPositionInStream.LATEST) case InitialPosition.TrimHorizon => InitialPositionInStreamExtended.newInitialPosition(InitialPositionInStream.TRIM_HORIZON) case InitialPosition.AtTimestamp(year, month, date, hourOfDay, minute, second) => val time = Calendar.getInstance() // Minus one because Calendar Month is 0 indexed time.set(year, month - 1, date, hourOfDay, minute, second) InitialPositionInStreamExtended.newInitialPositionAtTimestamp(time.getTime) } val streamTracker = new SingleStreamTracker(kinesisStreamName, initialPositionInStream) val workerId = advancedSettings.configsBuilder.workerIdentifier .getOrElse(s"${InetAddress.getLocalHost.getHostName}:${UUID.randomUUID()}") val configsBuilder = new ConfigsBuilder( streamTracker, applicationName, kinesisClient, dynamoClient, cloudWatchClient, workerId, recordProcessorFactory, ) advancedSettings.configsBuilder.tableName.foreach(configsBuilder.tableName) val leaseManagementConfig = configsBuilder.leaseManagementConfig // This should be covered by `streamTracker`, but this is to be safe since we're // not providing an override in the abbreviated `LeaseManagementConfig` API schema .initialPositionInStream(initialPositionInStream) val processorConfig = configsBuilder.processorConfig val coordinatorConfig = configsBuilder.coordinatorConfig val lifecycleConfig = configsBuilder.lifecycleConfig val retrievalConfig = configsBuilder.retrievalConfig val metricsConfig = configsBuilder.metricsConfig advancedSettings.leaseManagementConfig.failoverTimeMillis.foreach(leaseManagementConfig.failoverTimeMillis) advancedSettings.leaseManagementConfig.shardSyncIntervalMillis.foreach( leaseManagementConfig.shardSyncIntervalMillis, ) advancedSettings.leaseManagementConfig.cleanupLeasesUponShardCompletion.foreach( leaseManagementConfig.cleanupLeasesUponShardCompletion, ) advancedSettings.leaseManagementConfig.ignoreUnexpectedChildShards.foreach( leaseManagementConfig.ignoreUnexpectedChildShards, ) advancedSettings.leaseManagementConfig.maxLeasesForWorker.foreach(leaseManagementConfig.maxLeasesForWorker) advancedSettings.leaseManagementConfig.maxLeaseRenewalThreads.foreach(value => leaseManagementConfig.maxLeaseRenewalThreads(value), ) advancedSettings.leaseManagementConfig.billingMode.foreach { case BillingMode.PROVISIONED => leaseManagementConfig.billingMode(AwsBillingMode.PROVISIONED) case BillingMode.PAY_PER_REQUEST => leaseManagementConfig.billingMode(AwsBillingMode.PAY_PER_REQUEST) case BillingMode.UNKNOWN_TO_SDK_VERSION => leaseManagementConfig.billingMode(AwsBillingMode.UNKNOWN_TO_SDK_VERSION) } advancedSettings.leaseManagementConfig.initialLeaseTableReadCapacity.foreach( leaseManagementConfig.initialLeaseTableReadCapacity, ) advancedSettings.leaseManagementConfig.initialLeaseTableWriteCapacity.foreach( leaseManagementConfig.initialLeaseTableWriteCapacity, ) // Begin setting workerUtilizationAwareAssignmentConfig val workerUtilizationAwareAssignmentConfig = leaseManagementConfig.workerUtilizationAwareAssignmentConfig() advancedSettings.leaseManagementConfig.reBalanceThresholdPercentage.foreach( workerUtilizationAwareAssignmentConfig.reBalanceThresholdPercentage, ) advancedSettings.leaseManagementConfig.dampeningPercentage.foreach( workerUtilizationAwareAssignmentConfig.dampeningPercentage, ) advancedSettings.leaseManagementConfig.allowThroughputOvershoot.foreach( workerUtilizationAwareAssignmentConfig.allowThroughputOvershoot, ) advancedSettings.leaseManagementConfig.disableWorkerMetrics.foreach( workerUtilizationAwareAssignmentConfig.disableWorkerMetrics, ) advancedSettings.leaseManagementConfig.maxThroughputPerHostKBps.foreach( workerUtilizationAwareAssignmentConfig.maxThroughputPerHostKBps, ) // Finalize setting workerUtilizationAwareAssignmentConfig by updating its value in the leaseManagementConfig leaseManagementConfig.workerUtilizationAwareAssignmentConfig(workerUtilizationAwareAssignmentConfig) val gracefulLeaseHandoffConfig = leaseManagementConfig.gracefulLeaseHandoffConfig() advancedSettings.leaseManagementConfig.isGracefulLeaseHandoffEnabled.foreach( gracefulLeaseHandoffConfig.isGracefulLeaseHandoffEnabled, ) advancedSettings.leaseManagementConfig.gracefulLeaseHandoffTimeoutMillis.foreach( gracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis, ) leaseManagementConfig.gracefulLeaseHandoffConfig(gracefulLeaseHandoffConfig) advancedSettings.retrievalSpecificConfig .map { case RetrievalSpecificConfig.FanOutConfig( consumerArn, consumerName, maxDescribeStreamSummaryRetries, maxDescribeStreamConsumerRetries, registerStreamConsumerRetries, retryBackoffMillis, ) => val fanOutConfig = new FanOutConfig(kinesisClient) fanOutConfig.streamName(kinesisStreamName) consumerArn.foreach(fanOutConfig.consumerArn) consumerName.foreach(fanOutConfig.consumerName) maxDescribeStreamSummaryRetries.foreach(fanOutConfig.maxDescribeStreamSummaryRetries) maxDescribeStreamConsumerRetries.foreach(fanOutConfig.maxDescribeStreamConsumerRetries) registerStreamConsumerRetries.foreach(fanOutConfig.registerStreamConsumerRetries) retryBackoffMillis.foreach(fanOutConfig.retryBackoffMillis) fanOutConfig case RetrievalSpecificConfig.PollingConfig( maxRecords, retryGetRecordsInSeconds, maxGetRecordsThreadPool, idleTimeBetweenReadsInMillis, ) => val pollingConfig = new PollingConfig(kinesisStreamName, kinesisClient) maxRecords.foreach(pollingConfig.maxRecords) // It's tempting to always set the config value for Optional types, using RichOption or some such, // but we really only want to set something other than the library default if one is provided via the API maxGetRecordsThreadPool.foreach(value => pollingConfig.maxGetRecordsThreadPool(Optional.of(value))) retryGetRecordsInSeconds.foreach(value => pollingConfig.retryGetRecordsInSeconds(Optional.of(value))) idleTimeBetweenReadsInMillis.foreach(pollingConfig.idleTimeBetweenReadsInMillis) pollingConfig } .foreach(retrievalConfig.retrievalSpecificConfig) advancedSettings.processorConfig.callProcessRecordsEvenForEmptyRecordList.foreach( processorConfig.callProcessRecordsEvenForEmptyRecordList, ) advancedSettings.coordinatorConfig.parentShardPollIntervalMillis.foreach( coordinatorConfig.parentShardPollIntervalMillis, ) advancedSettings.coordinatorConfig.skipShardSyncAtWorkerInitializationIfLeasesExist.foreach( coordinatorConfig.skipShardSyncAtWorkerInitializationIfLeasesExist, ) advancedSettings.coordinatorConfig.shardPrioritization.foreach { case ShardPrioritization.ParentsFirstShardPrioritization(maxDepth) => coordinatorConfig.shardPrioritization(new ParentsFirstShardPrioritization(maxDepth)) case ShardPrioritization.NoOpShardPrioritization => coordinatorConfig.shardPrioritization(new NoOpShardPrioritization()) } advancedSettings.coordinatorConfig.clientVersionConfig.foreach { case ClientVersionConfig.CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X => coordinatorConfig.clientVersionConfig(AwsClientVersionConfig.CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X) case ClientVersionConfig.CLIENT_VERSION_CONFIG_3X => coordinatorConfig.clientVersionConfig(AwsClientVersionConfig.CLIENT_VERSION_CONFIG_3X) } advancedSettings.lifecycleConfig.taskBackoffTimeMillis.foreach(lifecycleConfig.taskBackoffTimeMillis) // It's tempting to always set the config value for Optional types, using RichOption or some such, // but we really only want to set something other than the library default if one is provided via the API advancedSettings.lifecycleConfig.logWarningForTaskAfterMillis.foreach(value => lifecycleConfig.logWarningForTaskAfterMillis(Optional.of(value)), ) advancedSettings.retrievalConfig.listShardsBackoffTimeInMillis.foreach( retrievalConfig.listShardsBackoffTimeInMillis, ) advancedSettings.retrievalConfig.maxListShardsRetryAttempts.foreach(retrievalConfig.maxListShardsRetryAttempts) advancedSettings.metricsConfig.metricsBufferTimeMillis.foreach(metricsConfig.metricsBufferTimeMillis) advancedSettings.metricsConfig.metricsMaxQueueSize.foreach(metricsConfig.metricsMaxQueueSize) advancedSettings.metricsConfig.metricsLevel.foreach { case MetricsLevel.NONE => metricsConfig.metricsLevel(AwsMetricsLevel.NONE) case MetricsLevel.SUMMARY => metricsConfig.metricsLevel(AwsMetricsLevel.SUMMARY) case MetricsLevel.DETAILED => metricsConfig.metricsLevel(AwsMetricsLevel.DETAILED) } advancedSettings.metricsConfig.metricsEnabledDimensions.foreach(values => metricsConfig.metricsEnabledDimensions(new java.util.HashSet(values.map(_.value).asJava)), ) // Note: Currently, this config is the only one built within the configs builder // that is not affected by the `advancedSettings` traversal above. That makes // sense because we also have `checkpointSettings` at the same level, but the // reasons that we don't build a `checkpointConfig` from that parameter are: // 1. Those settings are used for `KinesisSchedulerCheckpointSettings` in the // `ack` flow, and that purpose is distinct from this checkpoint config's // purpose, so we probably don't want to re-use those values for discrete // things. // 2. At a glance, the only way to build a checkpoint config other than the // parameterless default one built within the configs builder at this // accessor is to build a `DynamoDBCheckpointer` via its factory, and that // is no small task. val checkpointConfig = configsBuilder.checkpointConfig new Scheduler( checkpointConfig, coordinatorConfig, leaseManagementConfig, lifecycleConfig, metricsConfig, processorConfig, retrievalConfig, ) } val source: Source[CommittableRecord, NotUsed] = KinesisSchedulerSource(builder, schedulerSourceSettings) .mapMaterializedValue(_ => NotUsed) .via(metered[CommittableRecord](meter, r => recordBufferToArray(r.record.data()).length)) val framed = FramedSource[CommittableRecord]( withKillSwitches(source), meter, record => ContentDecoder.decode(decoders, recordBufferToArray(record.record.data())), committableRecordFolder, terminationHook = () => { Seq(kinesisClient, dynamoClient, cloudWatchClient).foreach { client => client.close() } }, // Performs Checkpointing logic, defined below ackFlow = ack, ) Valid(framed) } val ack: Flow[CommittableRecord, Done, NotUsed] = { if (checkpointSettings.disableCheckpointing) { Flow.fromFunction[CommittableRecord, Done](_ => Done) } else { val settings: KinesisSchedulerCheckpointSettings = { val base = KinesisSchedulerCheckpointSettings.defaults val withBatchSize = checkpointSettings.maxBatchSize.fold(base)(base.withMaxBatchSize) val withBatchAndWait = checkpointSettings.maxBatchWaitMillis.fold(withBatchSize) { wait => withBatchSize.withMaxBatchWait(wait.millis.toJava) } withBatchAndWait } KinesisSchedulerSource .checkpointRecordsFlow(settings) .map(_ => Done) } } } object KinesisKclSrc { /** Converts the supplied [[ByteBuffer]] to an `Array[Byte]`. * A new byte array is allocated and populated by reading from a duplication of the buffer. * * @param data The [[ByteBuffer]] to convert * @return A corresponding array of bytes */ private def recordBufferToArray(data: ByteBuffer): Array[Byte] = { // Duplicate in case something else was using the position information val duplicateBuffer = data.duplicate() val bytes = new Array[Byte](duplicateBuffer.remaining()) duplicateBuffer.get(bytes) bytes } def buildAsyncHttpClient: SdkAsyncHttpClient = NettyNioAsyncHttpClient.builder.maxConcurrency(AwsOps.httpConcurrencyPerClient).build() def buildAsyncClient( httpClient: SdkAsyncHttpClient, credentialsOpt: Option[V1.AwsCredentials], regionOpt: Option[V1.AwsRegion], numRetries: Int, ): KinesisAsyncClient = { val retryStrategy: StandardRetryStrategy = AwsRetryStrategy .standardRetryStrategy() .toBuilder .maxAttempts(numRetries) .build() KinesisAsyncClient .builder() .credentials(credentialsOpt) .region(regionOpt) .httpClient(httpClient) .overrideConfiguration( ClientOverrideConfiguration .builder() .retryStrategy(retryStrategy) .build(), ) .build } protected val committableRecordFolder: DataFoldableFrom[CommittableRecord] = new DataFoldableFrom[CommittableRecord] { def fold[B](value: CommittableRecord, folder: DataFolderTo[B]): B = { val builder = folder.mapBuilder() builder.add("data", folder.bytes(recordBufferToArray(value.record.data()))) builder.add("sequenceNumber", folder.string(value.record.sequenceNumber())) builder.add("approximateArrivalTimestamp", folder.string(value.record.approximateArrivalTimestamp().toString)) builder.add("partitionKey", folder.string(value.record.partitionKey())) builder.add( "encryptionType", value.record.encryptionType() match { case EncryptionType.NONE => folder.string(EncryptionType.NONE.toString) case EncryptionType.KMS => folder.string(EncryptionType.KMS.toString) case EncryptionType.UNKNOWN_TO_SDK_VERSION => folder.nullValue }, ) builder.add("subSequenceNumber", folder.integer(value.record.subSequenceNumber())) builder.add("explicitHashKey", folder.string(value.record.explicitHashKey())) builder.add( "aggregated", value.record.aggregated() match { case true => folder.trueValue case false => folder.falseValue }, ) val schemaBuilder = folder.mapBuilder() schemaBuilder.add("schemaName", folder.string(value.record.schema().getSchemaName)) schemaBuilder.add("schemaDefinition", folder.string(value.record.schema().getSchemaDefinition)) schemaBuilder.add("dataFormat", folder.string(value.record.schema().getDataFormat)) builder.add("schema", schemaBuilder.finish()) builder.finish() } } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/sources/KinesisSource.scala ================================================ package com.thatdot.quine.app.model.ingest2.sources import java.time.Instant import scala.collection.Set import scala.compat.java8.FutureConverters.CompletionStageOps import scala.concurrent.duration.DurationInt import scala.concurrent.{ExecutionContext, Future} import scala.jdk.CollectionConverters._ import scala.util.Try import org.apache.pekko.NotUsed import org.apache.pekko.stream.connectors.kinesis.ShardIterator._ import org.apache.pekko.stream.connectors.kinesis.scaladsl.{KinesisSource => PekkoKinesisSource} import org.apache.pekko.stream.connectors.kinesis.{ShardIterator, ShardSettings} import org.apache.pekko.stream.scaladsl.{Flow, Source} import cats.data.Validated.{Valid, invalidNel} import cats.data.ValidatedNel import cats.syntax.apply._ import software.amazon.awssdk.awscore.retry.AwsRetryStrategy import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration import software.amazon.awssdk.http.async.SdkAsyncHttpClient import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient import software.amazon.awssdk.regions.providers.DefaultAwsRegionProviderChain import software.amazon.awssdk.retries.StandardRetryStrategy import software.amazon.awssdk.services.kinesis.model.DescribeStreamRequest import software.amazon.awssdk.services.kinesis.{KinesisAsyncClient, model => kinesisModel} import com.thatdot.data.{DataFoldableFrom, DataFolderTo} import com.thatdot.quine.app.model.ingest.serialization.ContentDecoder import com.thatdot.quine.app.model.ingest.util.AwsOps import com.thatdot.quine.app.model.ingest.util.AwsOps.AwsBuilderOps import com.thatdot.quine.app.model.ingest2.source.FramedSource import com.thatdot.quine.app.model.ingest2.sources.KinesisSource.buildAsyncClient import com.thatdot.quine.app.routes.IngestMeter import com.thatdot.quine.exceptions.{KinesisConfigurationError, ShardIterationException} import com.thatdot.quine.routes.{AwsCredentials, AwsRegion, KinesisIngest} import com.thatdot.quine.util.BaseError object KinesisSource { def buildAsyncHttpClient: SdkAsyncHttpClient = NettyNioAsyncHttpClient.builder.maxConcurrency(AwsOps.httpConcurrencyPerClient).build() private def validateRegion(regionOpt: Option[AwsRegion]): ValidatedNel[BaseError, Option[AwsRegion]] = regionOpt match { case some @ Some(_) => Valid(some) case None => // This has the potential to error in other ways unless // we validate all of the logic the `DefaultAwsRegionProviderChain` // implements. But this should take care of the failing test // due to the Kinesis Client reading from the environment. Try(new DefaultAwsRegionProviderChain().getRegion).fold( _ => invalidNel( KinesisConfigurationError( "No AWS region was provided and no default could be determined from the environment. " + "Provide an explicit region or set AWS_REGION.", ), ), _ => Valid(None), ) } private def validateRetries(numRetries: Int): ValidatedNel[BaseError, Int] = if (numRetries > 0) Valid(numRetries) else invalidNel(KinesisConfigurationError(s"numRetries must be > 0, but was $numRetries")) def buildAsyncClient( credentialsOpt: Option[AwsCredentials], regionOpt: Option[AwsRegion], numRetries: Int, ): ValidatedNel[BaseError, KinesisAsyncClient] = (validateRetries(numRetries), validateRegion(regionOpt)).mapN { (retries, region) => val retryStrategy: StandardRetryStrategy = AwsRetryStrategy .standardRetryStrategy() .toBuilder .maxAttempts(retries) .build() KinesisAsyncClient .builder() .credentials(credentialsOpt) .region(region) .httpClient(buildAsyncHttpClient) .overrideConfiguration( ClientOverrideConfiguration .builder() .retryStrategy(retryStrategy) .build(), ) .build } } case class KinesisSource( streamName: String, shardIds: Option[Set[String]], credentialsOpt: Option[AwsCredentials], regionOpt: Option[AwsRegion], iteratorType: KinesisIngest.IteratorType, numRetries: Int, meter: IngestMeter, decoders: Seq[ContentDecoder] = Seq(), )(implicit val ec: ExecutionContext) extends FramedSourceProvider { val kinesisClient: ValidatedNel[BaseError, KinesisAsyncClient] = buildAsyncClient(credentialsOpt, regionOpt, numRetries) import KinesisIngest.IteratorType private val shardIterator: ValidatedNel[BaseError, ShardIterator] = iteratorType match { case IteratorType.Latest => Valid(Latest) case IteratorType.TrimHorizon => Valid(TrimHorizon) case IteratorType.AtTimestamp(ms) => Valid(AtTimestamp(Instant.ofEpochMilli(ms))) case IteratorType.AtSequenceNumber(_) | IteratorType.AfterSequenceNumber(_) if shardIds.fold(true)(_.size != 1) => invalidNel[BaseError, ShardIterator]( ShardIterationException("To use AtSequenceNumber or AfterSequenceNumber, exactly 1 shard must be specified"), ) // will be caught as an "Invalid" (400) below case IteratorType.AtSequenceNumber(seqNo) => Valid(AtSequenceNumber(seqNo)) case IteratorType.AfterSequenceNumber(seqNo) => Valid(AfterSequenceNumber(seqNo)) } private def kinesisStream( shardIterator: ShardIterator, client: KinesisAsyncClient, ): Source[kinesisModel.Record, NotUsed] = { // a Future yielding the shard IDs to read from val shardSettingsFut: Future[List[ShardSettings]] = (shardIds.getOrElse(Set()) match { case noIds if noIds.isEmpty => client .describeStream( DescribeStreamRequest.builder().streamName(streamName).build(), ) .toScala .map(response => response .streamDescription() .shards() .asScala .map(_.shardId()) .toSet, )(ec) case atLeastOneId => Future.successful(atLeastOneId) }) .map(ids => ids .map(shardId => ShardSettings(streamName, shardId).withShardIterator(shardIterator)) .toList, ) // A Flow that limits the stream to 2MB * (number of shards) per second // TODO This is an imperfect heuristic, as the limit imposed is literally 2MB _per shard_, // not 2MB per shard "on average across all shards". val kinesisRateLimiter: Flow[kinesisModel.Record, kinesisModel.Record, NotUsed] = Flow .futureFlow( shardSettingsFut.map { shards => val kinesisShardCount = shards.length // there are a maximum of 500 shards per stream val throttleBytesPerSecond = kinesisShardCount * 2 * 1024 * 1024 Flow[kinesisModel.Record] .throttle( throttleBytesPerSecond, 1.second, rec => // asByteArrayUnsafe avoids extra allocations, to get the length we can't use a readonly bytebuffer rec.data().asByteArrayUnsafe().length, ) .via(metered[kinesisModel.Record](meter, r => r.data().asByteArrayUnsafe().length)) }(ec), ) .mapMaterializedValue(_ => NotUsed) Source .future(shardSettingsFut) .flatMapConcat(shardSettings => PekkoKinesisSource.basicMerge(shardSettings, client)) .via(kinesisRateLimiter) } private val recordFolder: DataFoldableFrom[kinesisModel.Record] = new DataFoldableFrom[kinesisModel.Record] { def fold[B](value: kinesisModel.Record, folder: DataFolderTo[B]): B = { val builder = folder.mapBuilder() builder.add("data", folder.bytes(value.data().asByteArrayUnsafe())) builder.add("sequenceNumber", folder.string(value.sequenceNumber())) builder.add("partitionKey", folder.string(value.partitionKey())) builder.finish() } } def framedSource: ValidatedNel[BaseError, FramedSource] = (shardIterator, kinesisClient).mapN { (si, client) => FramedSource[kinesisModel.Record]( withKillSwitches(kinesisStream(si, client)), meter, record => ContentDecoder.decode(decoders, record.data().asByteArrayUnsafe()), recordFolder, terminationHook = () => client.close(), ) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/sources/NumberIteratorSource.scala ================================================ package com.thatdot.quine.app.model.ingest2.sources import java.nio.ByteBuffer import scala.util.{Success, Try} import org.apache.pekko.stream.scaladsl.Source import com.thatdot.data.{DataFoldableFrom, DataFolderTo} import com.thatdot.quine.app.ShutdownSwitch import com.thatdot.quine.app.model.ingest2.source.{DecodedSource, IngestBounds} import com.thatdot.quine.app.routes.IngestMeter import com.thatdot.quine.graph.cypher.Expr case class NumberIteratorSource( bounds: IngestBounds = IngestBounds(), ingestMeter: IngestMeter, ) { def decodedSource: DecodedSource = new DecodedSource(ingestMeter) { type Decoded = Expr.Integer type Frame = Expr.Integer private val integerFold: DataFoldableFrom[Expr.Integer] = new DataFoldableFrom[Expr.Integer] { def fold[B](value: Expr.Integer, folder: DataFolderTo[B]): B = folder.integer(value.long) } override val foldable: DataFoldableFrom[Expr.Integer] = integerFold override val foldableFrame: DataFoldableFrom[Expr.Integer] = integerFold override def content(input: Expr.Integer): Array[Byte] = ByteBuffer.allocate(8).putLong(input.long).array() def stream: Source[(() => Try[Expr.Integer], Expr.Integer), ShutdownSwitch] = { val sourceBase = Source.unfold(bounds.startAtOffset)(ln => Some(ln + 1 -> Expr.Integer(ln))) val bounded = bounds.ingestLimit.fold(sourceBase)(limit => sourceBase.take(limit)) withKillSwitches( bounded .via(metered[Expr.Integer](meter, _ => 1)) //TODO this counts values not bytes .map(sum => (() => Success(sum), sum)), ) } } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/sources/ReactiveSource.scala ================================================ package com.thatdot.quine.app.model.ingest2.sources import org.apache.pekko.NotUsed import org.apache.pekko.actor.ActorSystem import org.apache.pekko.stream.scaladsl.{Framing, Tcp} import cats.data.{Validated, ValidatedNel} import com.thatdot.data.DataFoldableFrom import com.thatdot.quine.app.model.ingest2.source.FramedSource import com.thatdot.quine.app.routes.IngestMeter import com.thatdot.quine.util.BaseError case class ReactiveSource( url: String, port: Int, meter: IngestMeter, maximumFrameLength: Int = 10 * 1024 * 1024, // 10MB default max frame size )(implicit system: ActorSystem) extends FramedSourceProvider[Array[Byte]] { /** Attempt to build a framed source. Validation failures * are returned as part of the ValidatedNel failures. */ override def framedSource: ValidatedNel[BaseError, FramedSource] = { import org.apache.pekko.stream.scaladsl.Source // Frame the byte stream using length-field framing (4-byte length prefix) val framing = Framing.lengthField( fieldLength = 4, fieldOffset = 0, maximumFrameLength = maximumFrameLength, byteOrder = java.nio.ByteOrder.BIG_ENDIAN, ) val connection = Tcp().outgoingConnection(url, port) // Create a source that never emits anything but keeps the connection open // The server will push data to us // Using Source.empty would just terminate the connection immediately, while Source.maybe keeps the connection open // https://stackoverflow.com/questions/35398852/reading-tcp-as-client-via-akka-stream val source: Source[Array[Byte], NotUsed] = Source .maybe[org.apache.pekko.util.ByteString] .via(connection) .via(framing) .map(_.drop(4)) // Drop the 4-byte length prefix .via(metered(meter, bs => bs.length)) // Report metrics .map(_.toArray) .mapMaterializedValue(_ => NotUsed) // We never need to send data to the server val framedSource = FramedSource[Array[Byte]]( withKillSwitches(source), meter, frame => frame, DataFoldableFrom.bytesDataFoldable, ) Validated.valid(framedSource) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/sources/S3Source.scala ================================================ package com.thatdot.quine.app.model.ingest2.sources import java.nio.charset.Charset import org.apache.pekko.NotUsed import org.apache.pekko.actor.ActorSystem import org.apache.pekko.stream.connectors.s3.scaladsl.S3 import org.apache.pekko.stream.connectors.s3.{S3Attributes, S3Ext, S3Settings} import org.apache.pekko.stream.scaladsl.Source import org.apache.pekko.util.ByteString import cats.data.ValidatedNel import com.thatdot.quine.app.model.ingest.serialization.ContentDecoder import com.thatdot.quine.app.model.ingest.util.AwsOps import com.thatdot.quine.app.model.ingest2.FileFormat import com.thatdot.quine.app.model.ingest2.source._ import com.thatdot.quine.app.model.ingest2.sources.FileSource.decodedSourceFromFileStream import com.thatdot.quine.app.routes.IngestMeter import com.thatdot.quine.routes._ import com.thatdot.quine.util.BaseError case class S3Source( format: FileFormat, bucket: String, key: String, credentials: Option[AwsCredentials], maximumLineSize: Int, charset: Charset = DEFAULT_CHARSET, ingestBounds: IngestBounds = IngestBounds(), meter: IngestMeter, decoders: Seq[ContentDecoder] = Seq(), )(implicit system: ActorSystem) { def decodedSource: ValidatedNel[BaseError, DecodedSource] = decodedSourceFromFileStream( S3Source.s3Source(bucket, key, credentials), format, charset, maximumLineSize, ingestBounds, meter, decoders, ) } object S3Source { def s3Source(bucket: String, key: String, credentials: Option[AwsCredentials])(implicit system: ActorSystem, ): Source[ByteString, NotUsed] = { val src = credentials match { case None => S3.getObject(bucket, key) case creds @ Some(_) => // TODO: See example: https://stackoverflow.com/questions/61938052/alpakka-s3-connection-issue val settings: S3Settings = S3Ext(system).settings.withCredentialsProvider(AwsOps.staticCredentialsProvider(creds)) val attributes = S3Attributes.settings(settings) S3.getObject(bucket, key).withAttributes(attributes) } src.mapMaterializedValue(_ => NotUsed) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/sources/ServerSentEventSource.scala ================================================ package com.thatdot.quine.app.model.ingest2.sources import org.apache.pekko.actor.ActorSystem import org.apache.pekko.http.scaladsl.Http import org.apache.pekko.http.scaladsl.model.Uri import org.apache.pekko.http.scaladsl.model.sse.ServerSentEvent import org.apache.pekko.stream.connectors.sse.scaladsl.EventSource import org.apache.pekko.stream.scaladsl.Source import cats.data.ValidatedNel import cats.implicits.catsSyntaxValidatedId import com.thatdot.data.{DataFoldableFrom, DataFolderTo} import com.thatdot.quine.app.ShutdownSwitch import com.thatdot.quine.app.model.ingest.serialization.ContentDecoder import com.thatdot.quine.app.model.ingest2.source.FramedSource import com.thatdot.quine.app.routes.IngestMeter import com.thatdot.quine.util.BaseError case class ServerSentEventSource(url: String, meter: IngestMeter, decoders: Seq[ContentDecoder] = Seq())(implicit val system: ActorSystem, ) extends FramedSourceProvider { def stream: Source[ServerSentEvent, ShutdownSwitch] = withKillSwitches( EventSource(uri = Uri(url), send = Http().singleRequest(_)) .via(metered[ServerSentEvent](meter, e => e.data.length)), ) private val serverSentEventFolder: DataFoldableFrom[ServerSentEvent] = new DataFoldableFrom[ServerSentEvent] { def fold[B](value: ServerSentEvent, folder: DataFolderTo[B]): B = { val builder = folder.mapBuilder() builder.add("data", folder.string(value.data)) value.id.foreach(id => builder.add("id", folder.string(id))) value.retry.foreach(retry => builder.add("retry", folder.integer(retry.toLong))) value.eventType.foreach(eventType => builder.add("eventType", folder.string(eventType))) builder.finish() } } def framedSource: ValidatedNel[BaseError, FramedSource] = FramedSource[ServerSentEvent]( stream, meter, ssEvent => ContentDecoder.decode(decoders, ssEvent.data.getBytes()), serverSentEventFolder, ).valid } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/sources/SqsSource.scala ================================================ package com.thatdot.quine.app.model.ingest2.sources import scala.jdk.CollectionConverters.MapHasAsScala import org.apache.pekko.stream.connectors.sqs.scaladsl.{SqsAckFlow, SqsSource => PekkoSqsSource} import org.apache.pekko.stream.connectors.sqs.{MessageAction, SqsSourceSettings} import org.apache.pekko.stream.scaladsl.{Flow, Source} import org.apache.pekko.{Done, NotUsed} import cats.data.ValidatedNel import cats.implicits.catsSyntaxValidatedId import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient import software.amazon.awssdk.services.sqs.SqsAsyncClient import software.amazon.awssdk.services.sqs.model.{Message, MessageAttributeValue} import com.thatdot.data.{DataFoldableFrom, DataFolderTo} import com.thatdot.quine.app.ShutdownSwitch import com.thatdot.quine.app.model.ingest.serialization.ContentDecoder import com.thatdot.quine.app.model.ingest.util.AwsOps import com.thatdot.quine.app.model.ingest.util.AwsOps.AwsBuilderOps import com.thatdot.quine.app.model.ingest2.source.FramedSource import com.thatdot.quine.app.routes.IngestMeter import com.thatdot.quine.routes.{AwsCredentials, AwsRegion} import com.thatdot.quine.util.BaseError case class SqsSource( queueURL: String, readParallelism: Int, credentialsOpt: Option[AwsCredentials], regionOpt: Option[AwsRegion], deleteReadMessages: Boolean, meter: IngestMeter, decoders: Seq[ContentDecoder] = Seq(), ) extends FramedSourceProvider { // Available settings: see https://pekko.apache.org/docs/pekko-connectors/current/sqs.html implicit val client: SqsAsyncClient = SqsAsyncClient .builder() .credentials(credentialsOpt) .region(regionOpt) .httpClient( NettyNioAsyncHttpClient.builder.maxConcurrency(AwsOps.httpConcurrencyPerClient).build(), ) .build() val src: Source[Message, ShutdownSwitch] = withKillSwitches( PekkoSqsSource( queueURL, SqsSourceSettings() .withParallelRequests(readParallelism), ).via(metered[Message](meter, m => m.body().length)), ) private def foldAttr[B](mav: MessageAttributeValue, folder: DataFolderTo[B]): B = { val builder = folder.mapBuilder() builder.add("dataType", folder.string(mav.dataType())) Option(mav.stringValue()).foreach(s => builder.add("stringValue", folder.string(s))) Option(mav.binaryValue()).foreach { binaryValue => builder.add("binaryValue", folder.bytes(binaryValue.asByteArray())) } if (!mav.stringListValues().isEmpty) { val vecBuilder = folder.vectorBuilder() mav.stringListValues().forEach(s => vecBuilder.add(folder.string(s))) builder.add("stringListValues", vecBuilder.finish()) } if (!mav.binaryListValues().isEmpty) { val vecBuilder = folder.vectorBuilder() mav.binaryListValues().forEach(bb => vecBuilder.add(folder.bytes(bb.asByteArray()))) builder.add("binaryListValues", vecBuilder.finish()) } builder.finish() } private val messageFolder: DataFoldableFrom[Message] = new DataFoldableFrom[Message] { def fold[B](value: Message, folder: DataFolderTo[B]): B = { val builder = folder.mapBuilder() builder.add("messageId", folder.string(value.messageId())) builder.add("receiptHandle", folder.string(value.receiptHandle())) builder.add("md5OfBody", folder.string(value.md5OfBody())) builder.add("body", folder.string(value.body())) builder.add("md5OfMessageAttributes", folder.string(value.md5OfMessageAttributes())) val attrsBuilder = folder.mapBuilder() value.attributes().asScala.foreach { case (k, v) => attrsBuilder.add(k.name(), folder.string(v)) } builder.add("attributes", attrsBuilder.finish()) val msgAttrsBuilder = folder.mapBuilder() value.messageAttributes().asScala.foreach { case (name, mav) => msgAttrsBuilder.add(name, foldAttr(mav, folder)) } builder.add("messageAttributes", msgAttrsBuilder.finish()) builder.finish() } } def framedSource: ValidatedNel[BaseError, FramedSource] = { def ack: Flow[Message, Done, NotUsed] = if (deleteReadMessages) Flow[Message].map(MessageAction.delete).via(SqsAckFlow.apply(queueURL)).map { //TODO MAP Result result: SqsAckResult => result.getResult. _ => Done } else Flow.fromFunction(_ => Done) def onTermination(): Unit = client.close() FramedSource[Message]( src, meter, message => ContentDecoder.decode(decoders, message.body().getBytes()), messageFolder, ack, () => onTermination(), ).valid } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/sources/StandardInputSource.scala ================================================ package com.thatdot.quine.app.model.ingest2.sources import java.nio.charset.Charset import org.apache.pekko.NotUsed import org.apache.pekko.stream.scaladsl.{Source, StreamConverters} import org.apache.pekko.util.ByteString import cats.data.ValidatedNel import com.thatdot.quine.app.model.ingest.serialization.ContentDecoder import com.thatdot.quine.app.model.ingest2.FileFormat import com.thatdot.quine.app.model.ingest2.source._ import com.thatdot.quine.app.model.ingest2.sources.FileSource.decodedSourceFromFileStream import com.thatdot.quine.app.model.ingest2.sources.StandardInputSource.stdInSource import com.thatdot.quine.app.routes.IngestMeter import com.thatdot.quine.util.BaseError case class StandardInputSource( format: FileFormat, maximumLineSize: Int, charset: Charset = DEFAULT_CHARSET, meter: IngestMeter, decoders: Seq[ContentDecoder] = Seq(), ) { def decodedSource: ValidatedNel[BaseError, DecodedSource] = decodedSourceFromFileStream( stdInSource, format, charset, maximumLineSize, IngestBounds(), meter, decoders, ) } object StandardInputSource { def stdInSource: Source[ByteString, NotUsed] = StreamConverters .fromInputStream(() => System.in) .mapMaterializedValue(_ => NotUsed) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/sources/WebSocketClientSource.scala ================================================ package com.thatdot.quine.app.model.ingest2.sources import java.nio.charset.Charset import scala.concurrent.duration.DurationInt import scala.concurrent.{ExecutionContext, Future} import scala.util.{Failure, Success} import org.apache.pekko.NotUsed import org.apache.pekko.actor.ActorSystem import org.apache.pekko.http.scaladsl.Http import org.apache.pekko.http.scaladsl.model.ws._ import org.apache.pekko.http.scaladsl.settings.ClientConnectionSettings import org.apache.pekko.stream.scaladsl.{Flow, Keep, Source} import org.apache.pekko.util.ByteString import cats.data.ValidatedNel import cats.implicits.catsSyntaxValidatedId import com.thatdot.data.DataFoldableFrom import com.thatdot.quine.app.model.ingest.WebsocketSimpleStartupSrcDef.UpgradeFailedException import com.thatdot.quine.app.model.ingest2.source.FramedSource import com.thatdot.quine.app.routes.IngestMeter import com.thatdot.quine.routes.WebsocketSimpleStartupIngest import com.thatdot.quine.routes.WebsocketSimpleStartupIngest.KeepaliveProtocol import com.thatdot.quine.util.BaseError case class WebSocketClientSource( wsUrl: String, initMessages: Seq[String], keepaliveProtocol: KeepaliveProtocol, charset: Charset = DEFAULT_CHARSET, meter: IngestMeter, )(implicit system: ActorSystem) extends FramedSourceProvider { val baseHttpClientSettings: ClientConnectionSettings = ClientConnectionSettings(system) def framedSource: ValidatedNel[BaseError, FramedSource] = { // Copy (and potentially tweak) baseHttpClientSettings for websockets usage val httpClientSettings: ClientConnectionSettings = keepaliveProtocol match { case WebsocketSimpleStartupIngest.PingPongInterval(intervalMillis) => baseHttpClientSettings.withWebsocketSettings( baseHttpClientSettings.websocketSettings.withPeriodicKeepAliveMaxIdle(intervalMillis.millis), ) case WebsocketSimpleStartupIngest.SendMessageInterval(message, intervalMillis) => baseHttpClientSettings.withWebsocketSettings( baseHttpClientSettings.websocketSettings .withPeriodicKeepAliveMaxIdle(intervalMillis.millis) .withPeriodicKeepAliveData(() => ByteString(message, charset)), ) case WebsocketSimpleStartupIngest.NoKeepalive => baseHttpClientSettings } // NB Instead of killing this source with the downstream KillSwitch, we could switch this Source.never to a // Source.maybe, completing it with None to kill the connection -- this is closer to the docs for // webSocketClientFlow val outboundMessages: Source[TextMessage.Strict, NotUsed] = Source .fromIterator(() => initMessages.iterator) .map(TextMessage(_)) .concat(Source.never) .named("websocket-ingest-outbound-messages") val wsFlow: Flow[Message, Message, Future[WebSocketUpgradeResponse]] = Http() .webSocketClientFlow( WebSocketRequest(wsUrl), settings = httpClientSettings, ) .named("websocket-ingest-client") val (websocketUpgraded: Future[WebSocketUpgradeResponse], websocketSource: Source[Message, NotUsed]) = outboundMessages .viaMat(wsFlow)(Keep.right) .preMaterialize() val v: Source[ByteString, NotUsed] = websocketSource.flatMapConcat { case textMessage: TextMessage => textMessage.textStream .fold("")(_ + _) .map(ByteString.fromString(_, charset)) case m: BinaryMessage => m.dataStream.fold(ByteString.empty)(_ concat _) } val source: Source[ByteString, NotUsed] = Source .futureSource(websocketUpgraded.transform { // if the websocket upgrade fails, return an already-failed Source case Success(InvalidUpgradeResponse(_, cause)) => Failure(new UpgradeFailedException(cause)) case Failure(ex) => Failure(new UpgradeFailedException(ex)) // the websocket upgrade succeeded: proceed with setting up the ingest stream source case Success(ValidUpgrade(_, _)) => Success(v) }(ExecutionContext.parasitic)) .mapMaterializedValue(_ => NotUsed) // TBD .mapMaterializedValue(_.flatten) .via(metered[ByteString](meter, bs => bs.length)) FramedSource[ByteString]( withKillSwitches(source.via(transcodingFlow(charset))), meter, bs => bs.toArrayUnsafe(), DataFoldableFrom.byteStringDataFoldable, ).valid } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/sources/WebSocketFileUploadSource.scala ================================================ package com.thatdot.quine.app.model.ingest2.sources import scala.util.{Success, Try} import org.apache.pekko.NotUsed import org.apache.pekko.stream.scaladsl.{Flow, Sink, Source} import org.apache.pekko.util.ByteString import com.thatdot.data.DataFoldableFrom import com.thatdot.quine.app.ShutdownSwitch import com.thatdot.quine.app.model.ingest2.source.DecodedSource import com.thatdot.quine.app.model.ingest2.sources import com.thatdot.quine.app.routes.IngestMeter trait PushHub { type Element def sink: Sink[Element, NotUsed] val source: Source[Element, NotUsed] } trait DecodingFoldableFrom { type Element def decodingFlow: Flow[ByteString, Element, NotUsed] val dataFoldableFrom: DataFoldableFrom[Element] } trait DecodingHub extends PushHub with DecodingFoldableFrom class WebSocketFileUploadSource( meter: IngestMeter, val decodingHub: DecodingHub, ) extends DecodedSource(meter) { override type Decoded = decodingHub.Element override type Frame = decodingHub.Element override val foldableFrame: DataFoldableFrom[Frame] = decodingHub.dataFoldableFrom override val foldable: DataFoldableFrom[Decoded] = decodingHub.dataFoldableFrom // We can't meaningfully pass along frames we fail to decode, since we only get the element if decoding is successful override def content(input: Frame): Array[Byte] = Array.emptyByteArray /** Stream of decoded values. This stream must already be metered. */ override def stream: Source[(() => Try[Decoded], Frame), ShutdownSwitch] = sources.withKillSwitches(decodingHub.source.map(element => (() => Success(element), element))) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/ingest2/sources/package.scala ================================================ package com.thatdot.quine.app.model.ingest2 import java.nio.charset.{Charset, StandardCharsets} import org.apache.pekko.NotUsed import org.apache.pekko.stream.connectors.text.scaladsl.TextFlow import org.apache.pekko.stream.scaladsl.{Flow, Keep, Source} import org.apache.pekko.stream.{KillSwitches, UniqueKillSwitch} import org.apache.pekko.util.ByteString import com.typesafe.scalalogging.LazyLogging import com.thatdot.quine.app.model.ingest.serialization.ContentDecoder import com.thatdot.quine.app.model.ingest2.source.IngestBounds import com.thatdot.quine.app.routes.IngestMeter import com.thatdot.quine.app.{PekkoKillSwitch, ShutdownSwitch} package object sources extends LazyLogging { def withKillSwitches[A](src: Source[A, NotUsed]): Source[A, ShutdownSwitch] = src .viaMat(KillSwitches.single)(Keep.right) .mapMaterializedValue((ks: UniqueKillSwitch) => PekkoKillSwitch(ks)) val DEFAULT_CHARSET: Charset = Charset.forName("UTF-8") val DEFAULT_MAXIMUM_LINE_SIZE: Int = 1000 def decompressingFlow(decoders: Seq[ContentDecoder]): Flow[ByteString, ByteString, NotUsed] = ContentDecoder.decoderFlow(decoders) def metered[A](meter: IngestMeter, sizeOf: A => Int): Flow[A, A, NotUsed] = Flow[A].wireTap(bs => meter.mark(sizeOf(bs))) def transcodingFlow(charset: Charset): Flow[ByteString, ByteString, NotUsed] = charset match { case StandardCharsets.UTF_8 | StandardCharsets.ISO_8859_1 | StandardCharsets.US_ASCII => Flow[ByteString] case otherCharset => logger.warn( s"Charset-sensitive ingest does not directly support $otherCharset - transcoding through UTF-8 first", ) TextFlow.transcoding(otherCharset, StandardCharsets.UTF_8) } def boundingFlow[A](ingestBounds: IngestBounds): Flow[A, A, NotUsed] = ingestBounds.ingestLimit.fold(Flow[A].drop(ingestBounds.startAtOffset))(limit => Flow[A].drop(ingestBounds.startAtOffset).take(limit), ) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs/ConsoleLoggingOutput.scala ================================================ package com.thatdot.quine.app.model.outputs import org.apache.pekko.NotUsed import org.apache.pekko.stream.scaladsl.Flow import com.thatdot.common.logging.Log.{ LazySafeLogging, LogConfig, Safe, SafeInterpolator, SafeLoggableInterpolator, SafeLogger, } import com.thatdot.quine.graph.{CypherOpsGraph, MasterStream, NamespaceId, StandingQueryResult} import com.thatdot.quine.routes.StandingQueryResultOutputUserDef import com.thatdot.quine.routes.StandingQueryResultOutputUserDef.PrintToStandardOut import ConsoleLoggingOutput.{printLogger, printLoggerNonBlocking} class ConsoleLoggingOutput(val config: PrintToStandardOut)(implicit private val logConfig: LogConfig, ) extends OutputRuntime with LazySafeLogging { def flow( name: String, inNamespace: NamespaceId, output: StandingQueryResultOutputUserDef, graph: CypherOpsGraph, ): Flow[StandingQueryResult, MasterStream.SqResultsExecToken, NotUsed] = { val token = execToken(name, inNamespace) val PrintToStandardOut(logLevel, logMode, structure) = config import PrintToStandardOut._ val resultLogger: SafeLogger = logMode match { case LogMode.Complete => printLogger case LogMode.FastSampling => printLoggerNonBlocking } val logFn: SafeInterpolator => Unit = logLevel match { case LogLevel.Trace => resultLogger.trace(_) case LogLevel.Debug => resultLogger.debug(_) case LogLevel.Info => resultLogger.info(_) case LogLevel.Warn => resultLogger.warn(_) case LogLevel.Error => resultLogger.error(_) } Flow[StandingQueryResult].map { result => // NB we are using `Safe` here despite `result` potentially containing PII because the entire purpose of this // output is to log SQ results. If the user has configured this output, they have accepted the risk of PII // in their logs. logFn( log"Standing query `${Safe(name)}` match: ${Safe(result.toJson(structure)(graph.idProvider, logConfig).noSpaces)}", ) token } } } object ConsoleLoggingOutput { // Invariant: these keys must be fixed to the names of the loggers in Quine App's application.conf private val printLogger = SafeLogger("thatdot.StandingQueryResults") private val printLoggerNonBlocking = SafeLogger("thatdot.StandingQueryResultsSampled") } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs/CypherQueryOutput.scala ================================================ package com.thatdot.quine.app.model.outputs import org.apache.pekko.NotUsed import org.apache.pekko.stream.scaladsl.Flow import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, Safe, SafeLoggableInterpolator} import com.thatdot.quine.app.util.AtLeastOnceCypherQuery import com.thatdot.quine.compiler import com.thatdot.quine.graph.MasterStream.SqResultsExecToken import com.thatdot.quine.graph.cypher.QueryContext import com.thatdot.quine.graph.{CypherOpsGraph, MasterStream, NamespaceId, StandingQueryResult, cypher} import com.thatdot.quine.model.QuineValue import com.thatdot.quine.routes.StandingQueryResultOutputUserDef import com.thatdot.quine.routes.StandingQueryResultOutputUserDef.CypherQuery import com.thatdot.quine.serialization.ProtobufSchemaCache import com.thatdot.quine.util.Log.implicits._ import com.thatdot.quine.util.PekkoStreams.wireTapFirst class CypherQueryOutput( val config: CypherQuery, val createRecursiveOutput: ( String, NamespaceId, StandingQueryResultOutputUserDef, CypherOpsGraph, ProtobufSchemaCache, LogConfig, ) => Flow[StandingQueryResult, SqResultsExecToken, NotUsed], )(implicit private val logConfig: LogConfig, private val protobufSchemaCache: ProtobufSchemaCache, ) extends OutputRuntime with LazySafeLogging { def flow( name: String, inNamespace: NamespaceId, output: StandingQueryResultOutputUserDef, graph: CypherOpsGraph, ): Flow[StandingQueryResult, MasterStream.SqResultsExecToken, NotUsed] = { val token = execToken(name, inNamespace) val CypherQuery(query, parameter, parallelism, andThen, allowAllNodeScan, shouldRetry, structure) = config val compiledQuery @ cypher.CompiledQuery(_, queryAst, _, _, _) = compiler.cypher.compile( query, unfixedParameters = Seq(parameter), ) // TODO: When in the initial set of SQ outputs, these should be tested before the SQ is registered! if (queryAst.canContainAllNodeScan && !allowAllNodeScan) { throw new RuntimeException( "Cypher query may contain full node scan; re-write without possible full node scan, or pass allowAllNodeScan true. " + s"The provided query was: $query", ) } if (!queryAst.isIdempotent && shouldRetry) { logger.warn( safe"""Could not verify that the provided Cypher query is idempotent. If timeouts or external system errors |occur, query execution may be retried and duplicate data may be created. To avoid this |set shouldRetry = false in the Standing Query output""".cleanLines, ) } val andThenFlow: Flow[(StandingQueryResult.Meta, cypher.QueryContext), SqResultsExecToken, NotUsed] = (andThen match { case None => wireTapFirst[(StandingQueryResult.Meta, cypher.QueryContext)](tup => logger.warn( safe"""Unused Cypher Standing Query output for Standing Query output: |${Safe(name)} with: ${Safe(tup._2.environment.size)} columns. |Did you mean to specify `andThen`?""".cleanLines, ), ).map(_ => token) case Some(thenOutput) => Flow[(StandingQueryResult.Meta, cypher.QueryContext)] .map { case (meta: StandingQueryResult.Meta, qc: cypher.QueryContext) => val newData = qc.environment.map { case (keySym, cypherVal) => keySym.name -> cypher.Expr.toQuineValue(cypherVal).getOrElse { logger.warn( log"""Cypher Value: ${cypherVal} could not be represented as a Quine value in Standing |Query output: ${Safe(name)}. Using `null` instead.""".cleanLines, ) QuineValue.Null } } StandingQueryResult(meta, newData) } .via(createRecursiveOutput(name, inNamespace, thenOutput, graph, protobufSchemaCache, logConfig)) }).named(s"sq-output-andthen-for-$name") lazy val atLeastOnceCypherQuery = AtLeastOnceCypherQuery(compiledQuery, parameter, s"sq-output-action-query-for-$name") Flow[StandingQueryResult] .flatMapMerge( breadth = parallelism, result => { val value: cypher.Value = cypher.Expr.fromQuineValue(result.toQuineValueMap(structure)) val cypherResultRows = if (shouldRetry) atLeastOnceCypherQuery.stream(value, inNamespace)(graph) else graph.cypherOps .query( compiledQuery, namespace = inNamespace, // `atTime` is `None` because we only want current time here—this is where we would // pass in `atTime` for historically aware output queries (if we chose to do that) atTime = None, parameters = Map(parameter -> value), ) .results cypherResultRows .map { resultRow => QueryContext(compiledQuery.columns.zip(resultRow).toMap) } .map(data => (result.meta, data)) }, ) .via(andThenFlow) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs/DropOutput.scala ================================================ package com.thatdot.quine.app.model.outputs import org.apache.pekko.NotUsed import org.apache.pekko.stream.scaladsl.Flow import com.thatdot.quine.graph.{CypherOpsGraph, MasterStream, NamespaceId, StandingQueryResult} import com.thatdot.quine.routes.StandingQueryResultOutputUserDef object DropOutput extends OutputRuntime { def flow( name: String, inNamespace: NamespaceId, output: StandingQueryResultOutputUserDef, graph: CypherOpsGraph, ): Flow[StandingQueryResult, MasterStream.SqResultsExecToken, NotUsed] = { val token = execToken(name, inNamespace) Flow.fromFunction(_ => token) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs/FileOutput.scala ================================================ package com.thatdot.quine.app.model.outputs import java.nio.file.{Paths, StandardOpenOption} import org.apache.pekko.NotUsed import org.apache.pekko.stream.scaladsl.{FileIO, Flow} import org.apache.pekko.util.ByteString import com.thatdot.common.logging.Log.LogConfig import com.thatdot.quine.graph.{CypherOpsGraph, MasterStream, NamespaceId, StandingQueryResult} import com.thatdot.quine.routes.StandingQueryResultOutputUserDef import com.thatdot.quine.routes.StandingQueryResultOutputUserDef.WriteToFile class FileOutput(val config: WriteToFile)(implicit private val logConfig: LogConfig) extends OutputRuntime { def flow( name: String, inNamespace: NamespaceId, output: StandingQueryResultOutputUserDef, graph: CypherOpsGraph, ): Flow[StandingQueryResult, MasterStream.SqResultsExecToken, NotUsed] = { val token = execToken(name, inNamespace) val WriteToFile(path, structure) = config Flow[StandingQueryResult] .map(result => ByteString(result.toJson(structure)(graph.idProvider, logConfig).noSpaces + "\n")) .alsoTo( FileIO .toPath( Paths.get(path), Set(StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.APPEND), ) .named(s"sq-output-file-writer-for-$name"), ) .map(_ => token) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs/KafkaOutput.scala ================================================ package com.thatdot.quine.app.model.outputs import org.apache.pekko.NotUsed import org.apache.pekko.kafka.scaladsl.{Producer => KafkaProducer} import org.apache.pekko.kafka.{ProducerMessage, ProducerSettings} import org.apache.pekko.stream.scaladsl.Flow import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.common.serialization.ByteArraySerializer import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, Safe, SafeLoggableInterpolator} import com.thatdot.common.security.Secret import com.thatdot.quine.app.StandingQueryResultOutput.serialized import com.thatdot.quine.graph.MasterStream.SqResultsExecToken import com.thatdot.quine.graph.{CypherOpsGraph, NamespaceId, StandingQueryResult} import com.thatdot.quine.routes.StandingQueryResultOutputUserDef.WriteToKafka import com.thatdot.quine.routes.{SaslJaasConfig, StandingQueryResultOutputUserDef} import com.thatdot.quine.serialization.ProtobufSchemaCache import com.thatdot.quine.util.Log.implicits._ class KafkaOutput(val config: WriteToKafka)(implicit private val logConfig: LogConfig, private val protobufSchemaCache: ProtobufSchemaCache, ) extends OutputRuntime with LazySafeLogging { import Secret.Unsafe._ /** Log warnings for any kafkaProperties keys that will be overridden by typed Secret params. */ private def warnOnOverriddenProperties(): Unit = { val typedSecretKeys: Set[String] = Set.empty ++ config.sslKeystorePassword.map(_ => "ssl.keystore.password") ++ config.sslTruststorePassword.map(_ => "ssl.truststore.password") ++ config.sslKeyPassword.map(_ => "ssl.key.password") ++ config.saslJaasConfig.map(_ => "sasl.jaas.config") val overriddenKeys = config.kafkaProperties.keySet.intersect(typedSecretKeys) overriddenKeys.foreach { key => logger.warn( safe"Kafka property '${Safe(key)}' in kafkaProperties will be overridden by typed Secret parameter. " + safe"Remove '${Safe(key)}' from kafkaProperties to suppress this warning.", ) } } /** Merge typed secret params into Kafka properties. Typed params take precedence. */ private def effectiveProperties: Map[String, String] = { val secretProps: Map[String, String] = Map.empty ++ config.sslKeystorePassword.map("ssl.keystore.password" -> _.unsafeValue) ++ config.sslTruststorePassword.map("ssl.truststore.password" -> _.unsafeValue) ++ config.sslKeyPassword.map("ssl.key.password" -> _.unsafeValue) ++ config.saslJaasConfig.map("sasl.jaas.config" -> SaslJaasConfig.toJaasConfigString(_)) config.kafkaProperties ++ secretProps } override def flow( name: String, inNamespace: NamespaceId, output: StandingQueryResultOutputUserDef, graph: CypherOpsGraph, ): Flow[StandingQueryResult, SqResultsExecToken, NotUsed] = { val WriteToKafka( topic, bootstrapServers, format, kafkaProperties, _, _, _, _, structure, ) = config warnOnOverriddenProperties() val token = execToken(name, inNamespace) val settings = ProducerSettings( graph.system, new ByteArraySerializer, new ByteArraySerializer, ).withBootstrapServers(bootstrapServers) .withProperties(effectiveProperties) // Log only non-secret kafkaProperties, not effectiveProperties config.saslJaasConfig.foreach(sasl => logger.info(safe"Kafka SASL config: ${Safe(SaslJaasConfig.toRedactedString(sasl))}"), ) logger.info(safe"Writing to kafka with properties ${Safe(kafkaProperties)}") serialized(name, format, graph, structure) .map(bytes => ProducerMessage.single(new ProducerRecord[Array[Byte], Array[Byte]](topic, bytes))) .via(KafkaProducer.flexiFlow(settings).named(s"sq-output-kafka-producer-for-$name")) .map(_ => token) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs/KinesisOutput.scala ================================================ package com.thatdot.quine.app.model.outputs import scala.util.Random import org.apache.pekko.NotUsed import org.apache.pekko.stream.connectors.kinesis.KinesisFlowSettings import org.apache.pekko.stream.connectors.kinesis.scaladsl.KinesisFlow import org.apache.pekko.stream.scaladsl.Flow import software.amazon.awssdk.core.SdkBytes import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient import software.amazon.awssdk.services.kinesis.KinesisAsyncClient import software.amazon.awssdk.services.kinesis.model.PutRecordsRequestEntry import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig} import com.thatdot.quine.app.StandingQueryResultOutput.serialized import com.thatdot.quine.app.model.ingest.util.AwsOps import com.thatdot.quine.app.model.ingest.util.AwsOps.AwsBuilderOps import com.thatdot.quine.graph.{CypherOpsGraph, MasterStream, NamespaceId, StandingQueryResult} import com.thatdot.quine.routes.StandingQueryResultOutputUserDef import com.thatdot.quine.routes.StandingQueryResultOutputUserDef.WriteToKinesis import com.thatdot.quine.serialization.ProtobufSchemaCache class KinesisOutput(val config: WriteToKinesis)(implicit private val logConfig: LogConfig, private val protobufSchemaCache: ProtobufSchemaCache, ) extends OutputRuntime with LazySafeLogging { def flow( name: String, inNamespace: NamespaceId, output: StandingQueryResultOutputUserDef, graph: CypherOpsGraph, ): Flow[StandingQueryResult, MasterStream.SqResultsExecToken, NotUsed] = { val WriteToKinesis( credentialsOpt, regionOpt, streamName, format, kinesisParallelism, kinesisMaxBatchSize, kinesisMaxRecordsPerSecond, kinesisMaxBytesPerSecond, structure, ) = config val token = execToken(name, inNamespace) val builder = KinesisAsyncClient .builder() .credentials(credentialsOpt) .region(regionOpt) .httpClient(NettyNioAsyncHttpClient.builder.maxConcurrency(AwsOps.httpConcurrencyPerClient).build()) val kinesisAsyncClient: KinesisAsyncClient = builder .build() graph.system.registerOnTermination(kinesisAsyncClient.close()) val settings = { var s = KinesisFlowSettings.create() s = kinesisParallelism.foldLeft(s)(_ withParallelism _) s = kinesisMaxBatchSize.foldLeft(s)(_ withMaxBatchSize _) s = kinesisMaxRecordsPerSecond.foldLeft(s)(_ withMaxRecordsPerSecond _) s = kinesisMaxBytesPerSecond.foldLeft(s)(_ withMaxBytesPerSecond _) s } serialized(name, format, graph, structure) .map { bytes => val builder = PutRecordsRequestEntry.builder() builder.data(SdkBytes.fromByteArray(bytes)) builder.partitionKey("undefined") builder.explicitHashKey(BigInt(128, Random).toString) builder.build() } .via( KinesisFlow( streamName, settings, )(kinesisAsyncClient).named(s"sq-output-kinesis-producer-for-$name"), ) .map(_ => token) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs/OutputRuntime.scala ================================================ package com.thatdot.quine.app.model.outputs import org.apache.pekko.NotUsed import org.apache.pekko.stream.scaladsl.Flow import com.thatdot.quine.graph.MasterStream.SqResultsExecToken import com.thatdot.quine.graph.{ CypherOpsGraph, NamespaceId, StandingQueryResult, StandingQueryResultStructure, namespaceToString, } import com.thatdot.quine.routes.{StandingQueryOutputStructure, StandingQueryResultOutputUserDef} trait OutputRuntime { import scala.language.implicitConversions implicit def sqResultOutputStructureConversion( structure: StandingQueryOutputStructure, ): StandingQueryResultStructure = structure match { case StandingQueryOutputStructure.WithMetadata() => StandingQueryResultStructure.WithMetaData() case StandingQueryOutputStructure.Bare() => StandingQueryResultStructure.Bare() } final def execToken(name: String, namespaceId: NamespaceId): SqResultsExecToken = SqResultsExecToken( s"SQ: $name in: ${namespaceToString(namespaceId)}", ) def flow( name: String, inNamespace: NamespaceId, output: StandingQueryResultOutputUserDef, graph: CypherOpsGraph, ): Flow[StandingQueryResult, SqResultsExecToken, NotUsed] } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs/PostToEndpointOutput.scala ================================================ package com.thatdot.quine.app.model.outputs import scala.util.{Failure, Success} import org.apache.pekko.NotUsed import org.apache.pekko.actor.ActorSystem import org.apache.pekko.http.scaladsl.Http import org.apache.pekko.http.scaladsl.model.MediaTypes.`application/json` import org.apache.pekko.http.scaladsl.model.headers.RawHeader import org.apache.pekko.http.scaladsl.model.{HttpEntity, HttpMethods, HttpRequest} import org.apache.pekko.http.scaladsl.unmarshalling.Unmarshal import org.apache.pekko.stream.scaladsl.Flow import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, Safe, SafeLoggableInterpolator} import com.thatdot.common.security.Secret import com.thatdot.quine.app.util.QuineLoggables._ import com.thatdot.quine.graph.{CypherOpsGraph, MasterStream, NamespaceId, StandingQueryResult} import com.thatdot.quine.model.{QuineIdProvider, QuineValue} import com.thatdot.quine.routes.StandingQueryResultOutputUserDef import com.thatdot.quine.routes.StandingQueryResultOutputUserDef.PostToEndpoint import com.thatdot.quine.util.Log.implicits._ class PostToEndpointOutput(val config: PostToEndpoint)(implicit private val logConfig: LogConfig) extends OutputRuntime with LazySafeLogging { def flow( name: String, inNamespace: NamespaceId, output: StandingQueryResultOutputUserDef, graph: CypherOpsGraph, ): Flow[StandingQueryResult, MasterStream.SqResultsExecToken, NotUsed] = { val PostToEndpoint(url, parallelism, onlyPositiveMatchData, headers, structure) = config val token = execToken(name, inNamespace) // TODO: use a host connection pool implicit val system: ActorSystem = graph.system implicit val idProvider: QuineIdProvider = graph.idProvider val http = Http() import Secret.Unsafe._ val customHeaders: List[RawHeader] = headers.map { case (k, v) => RawHeader(k, v.unsafeValue) }.toList Flow[StandingQueryResult] .mapAsync(parallelism) { (result: StandingQueryResult) => val request = HttpRequest( method = HttpMethods.POST, uri = url, headers = customHeaders, entity = HttpEntity( contentType = `application/json`, if (onlyPositiveMatchData) QuineValue.toJson(QuineValue.Map(result.data)).noSpaces else result.toJson(structure).noSpaces, ), ) val posted = http .singleRequest(request) .flatMap(response => if (response.status.isSuccess()) { response.entity .discardBytes() .future() } else { Unmarshal(response) .to[String] .andThen { case Failure(err) => logger.error( log"""Failed to deserialize error response from POST $result to ${Safe(url)}. |Response status was ${response.status}""".cleanLines withException err, ) case Success(responseBody) => logger.error( log"""Failed to POST $result to ${Safe(url)}. |Response was ${response.status} |""".cleanLines + log": ${Safe(responseBody)}", ) }(system.dispatcher) }, )(system.dispatcher) .map(_ => token)(system.dispatcher) // TODO: principled error handling posted.recover { case err => logger.error(log"Failed to POST standing query result" withException err) token }(system.dispatcher) } } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs/QuinePatternOutput.scala ================================================ package com.thatdot.quine.app.model.outputs import scala.collection.immutable.SortedMap import scala.concurrent.Promise import org.apache.pekko.NotUsed import org.apache.pekko.stream.scaladsl.{Flow, Source} import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, Safe, SafeLoggableInterpolator} import com.thatdot.quine.graph.MasterStream.SqResultsExecToken import com.thatdot.quine.graph.cypher.quinepattern.CypherAndQuineHelpers.quineValueToPatternValue import com.thatdot.quine.graph.cypher.quinepattern.{ OutputTarget, QueryContext => QPQueryContext, QueryPlanner, RuntimeMode, } import com.thatdot.quine.graph.cypher.{Expr, QueryContext} import com.thatdot.quine.graph.quinepattern.{LoadQuery, QuinePatternOpsGraph} import com.thatdot.quine.graph.{CypherOpsGraph, MasterStream, NamespaceId, StandingQueryId, StandingQueryResult} import com.thatdot.quine.language.{ast => Pattern} import com.thatdot.quine.model.QuineValue import com.thatdot.quine.routes.StandingQueryResultOutputUserDef import com.thatdot.quine.routes.StandingQueryResultOutputUserDef.QuinePatternQuery import com.thatdot.quine.serialization.ProtobufSchemaCache import com.thatdot.quine.util.Log.implicits._ class QuinePatternOutput( config: QuinePatternQuery, createRecursiveOutput: ( String, NamespaceId, StandingQueryResultOutputUserDef, CypherOpsGraph, ProtobufSchemaCache, LogConfig, ) => Flow[StandingQueryResult, SqResultsExecToken, NotUsed], )(implicit private val logConfig: LogConfig, private val protobufSchemaCache: ProtobufSchemaCache, ) extends OutputRuntime with LazySafeLogging { val maybeIsQPEnabled: Option[Boolean] = for { pv <- Option(System.getProperty("qp.enabled")) b <- pv.toBooleanOption } yield b maybeIsQPEnabled match { case Some(true) => () case _ => sys.error("Quine pattern must be enabled using -Dqp.enabled=true to use this feature.") } override def flow( name: String, inNamespace: NamespaceId, output: StandingQueryResultOutputUserDef, graph: CypherOpsGraph, ): Flow[StandingQueryResult, MasterStream.SqResultsExecToken, NotUsed] = { val token = execToken(name, inNamespace) val planned = QueryPlanner.planFromString(config.query) match { case Right(p) => p case Left(error) => throw new IllegalArgumentException(s"Failed to compile query: $error") } val andThenFlow: Flow[(StandingQueryResult.Meta, QueryContext), SqResultsExecToken, NotUsed] = (config.andThen match { case Some(thenOutput) => Flow[(StandingQueryResult.Meta, QueryContext)] .map { case (meta: StandingQueryResult.Meta, qc: QueryContext) => val newData = qc.environment.map { case (keySym, cypherVal) => keySym.name -> Expr.toQuineValue(cypherVal).getOrElse { logger.warn( log"""Cypher Value: ${cypherVal} could not be represented as a Quine value in Standing |Query output: ${Safe(name)}. Using `null` instead.""".cleanLines, ) QuineValue.Null } } StandingQueryResult(meta, newData) } .via(createRecursiveOutput(name, inNamespace, thenOutput, graph, protobufSchemaCache, logConfig)) case None => Flow[(StandingQueryResult.Meta, QueryContext)].map(_ => token) }).named(s"sq-output-andthen-for-$name") Flow[StandingQueryResult] .flatMapMerge( breadth = config.parallelism, result => { val params = Map( Symbol("that") -> Pattern.Value.Map( SortedMap( Symbol("meta") -> Pattern.Value.Map( SortedMap( Symbol("isPositiveMatch") -> (if (result.meta.isPositiveMatch) Pattern.Value.True else Pattern.Value.False), ), ), Symbol("data") -> Pattern.Value.Map( SortedMap.from(result.data.map(p => Symbol(p._1) -> quineValueToPatternValue(p._2))), ), ), ), ) val hack = graph.asInstanceOf[QuinePatternOpsGraph] implicit val ec = hack.system.dispatcher // Use promise-based EagerCollector val promise = Promise[Seq[QPQueryContext]]() hack.getLoader ! LoadQuery( StandingQueryId.fresh(), planned.plan, RuntimeMode.Eager, params, inNamespace, OutputTarget.EagerCollector(promise), planned.returnColumns, planned.outputNameMapping, queryName = Some(name), // `atTime` is `None` by default (current time)—this is where we would // pass in `atTime` for historically aware output queries (if we chose to do that) ) Source .futureSource(promise.future.map(results => Source(results))) .mapMaterializedValue(_ => NotUsed) .via(Flow[QPQueryContext].map { qpCtx => // Convert QPQueryContext (pattern values) to QueryContext (cypher values) import com.thatdot.quine.graph.cypher.quinepattern.QuinePatternHelpers.patternValueToCypherValue val cypherEnv: Map[Symbol, com.thatdot.quine.graph.cypher.Value] = qpCtx.bindings.map { case (k, v) => val name = planned.outputNameMapping.getOrElse( k, throw new IllegalStateException( s"BindingId(${k.id}) has no entry in outputNameMapping — " + "this indicates a bug in the query planner", ), ) name -> patternValueToCypherValue(v) } val qc = QueryContext(cypherEnv) StandingQueryResult.Meta(isPositiveMatch = true) -> qc }) }, ) .via(andThenFlow) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs/SlackOutput.scala ================================================ package com.thatdot.quine.app.model.outputs import scala.concurrent.duration.DurationInt import scala.util.{Failure, Success} import org.apache.pekko.NotUsed import org.apache.pekko.actor.ActorSystem import org.apache.pekko.http.scaladsl.Http import org.apache.pekko.http.scaladsl.model.MediaTypes.`application/json` import org.apache.pekko.http.scaladsl.model.{HttpEntity, HttpMethods, HttpRequest} import org.apache.pekko.http.scaladsl.unmarshalling.Unmarshal import org.apache.pekko.stream.scaladsl.Flow import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, Safe, SafeLoggableInterpolator} import com.thatdot.quine.app.StandingQueryResultOutput.SlackSerializable import com.thatdot.quine.app.util.QuineLoggables._ import com.thatdot.quine.graph.{CypherOpsGraph, MasterStream, NamespaceId, StandingQueryResult} import com.thatdot.quine.model.QuineIdProvider import com.thatdot.quine.routes.StandingQueryResultOutputUserDef import com.thatdot.quine.routes.StandingQueryResultOutputUserDef.PostToSlack class SlackOutput(val config: PostToSlack)(implicit private val logConfig: LogConfig) extends OutputRuntime with LazySafeLogging { def flow( name: String, inNamespace: NamespaceId, output: StandingQueryResultOutputUserDef, graph: CypherOpsGraph, ): Flow[StandingQueryResult, MasterStream.SqResultsExecToken, NotUsed] = { val token = execToken(name, inNamespace) val PostToSlack(hookUrl, onlyPositiveMatchData, intervalSeconds) = config implicit val system: ActorSystem = graph.system implicit val idProvider: QuineIdProvider = graph.idProvider val http = Http(graph.system) // how often to send notifications (notifications will be batched by [[PostToSlack.SlackSerializable.apply]]) val rate = math.max(1, intervalSeconds).seconds Flow[StandingQueryResult] .conflateWithSeed(List(_))((acc, newResult) => newResult :: acc) .throttle(1, rate) // Slack webhooks have a 1 message per second rate limit .map(newResults => SlackSerializable(onlyPositiveMatchData, newResults)) .collect { case Some(slackMessage) => slackMessage } .mapAsync(1) { result => val request = HttpRequest( method = HttpMethods.POST, uri = hookUrl, entity = HttpEntity.apply(contentType = `application/json`, result.slackJson), ) val posted = http .singleRequest(request) .flatMap { response => if (response.status.isSuccess()) { response.entity .discardBytes() .future() } else { Unmarshal(response) .to[String] .andThen { case Failure(err) => logger.error( log"""Failed to deserialize error response from POST ${result.slackJson} to slack webhook. |Response status was ${response.status} |""".cleanLines withException err, ) case Success(responseBody) => logger.error( log"""Failed to POST ${result.slackJson} to slack webhook. |Response status was ${response.status} |""".cleanLines + log": ${Safe(responseBody)}", ) }(system.dispatcher) } }(system.dispatcher) .map(_ => token)(system.dispatcher) posted.recover { case err => logger.error(log"Failed to POST standing query result" withException err) token }(system.dispatcher) } } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs/SnsOutput.scala ================================================ package com.thatdot.quine.app.model.outputs import org.apache.pekko.NotUsed import org.apache.pekko.stream.connectors.sns.scaladsl.SnsPublisher import org.apache.pekko.stream.scaladsl.{Flow, Keep} import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient import software.amazon.awssdk.services.sns.SnsAsyncClient import com.thatdot.common.logging.Log.LogConfig import com.thatdot.quine.app.model.ingest.util.AwsOps import com.thatdot.quine.app.model.ingest.util.AwsOps.AwsBuilderOps import com.thatdot.quine.graph.{CypherOpsGraph, MasterStream, NamespaceId, StandingQueryResult} import com.thatdot.quine.routes.StandingQueryResultOutputUserDef import com.thatdot.quine.routes.StandingQueryResultOutputUserDef.WriteToSNS class SnsOutput(val config: WriteToSNS)(implicit private val logConfig: LogConfig) extends OutputRuntime { def flow( name: String, inNamespace: NamespaceId, output: StandingQueryResultOutputUserDef, graph: CypherOpsGraph, ): Flow[StandingQueryResult, MasterStream.SqResultsExecToken, NotUsed] = { val token = execToken(name, inNamespace) val WriteToSNS(credentialsOpt, regionOpt, topic, structure) = config val awsSnsClient = SnsAsyncClient .builder() .credentials(credentialsOpt) .region(regionOpt) .httpClient( NettyNioAsyncHttpClient.builder.maxConcurrency(AwsOps.httpConcurrencyPerClient).build(), ) .build() // NOTE pekko-connectors requires we close the SNS client graph.system.registerOnTermination(awsSnsClient.close()) // NB: by default, this will make 10 parallel requests [configurable via parameter to SnsPublisher.flow] // TODO if any request to SNS errors, that thread (of the aforementioned 10) will retry its request // indefinitely. If all worker threads block, the SnsPublisher.flow will backpressure indefinitely. Flow[StandingQueryResult] .map(result => result.toJson(structure)(graph.idProvider, logConfig).noSpaces + "\n") .viaMat(SnsPublisher.flow(topic)(awsSnsClient).named(s"sq-output-sns-producer-for-$name"))(Keep.right) .map(_ => token) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs2/QuineDestinationSteps.scala ================================================ package com.thatdot.quine.app.model.outputs2 import org.apache.pekko.NotUsed import org.apache.pekko.stream.scaladsl.Sink import com.thatdot.common.logging.Log.LogConfig import com.thatdot.data.DataFoldableFrom import com.thatdot.outputs2.DataFoldableSink import com.thatdot.quine.graph.NamespaceId sealed trait QuineDestinationSteps extends DataFoldableSink { // def transform: Option[Core.PostEnrichmentTransform] def destination: QuineResultDestination } object QuineDestinationSteps { case class WithDataFoldable(destination: QuineResultDestination.FoldableData) extends QuineDestinationSteps { override def sink[In: DataFoldableFrom](outputName: String, namespaceId: NamespaceId)(implicit logConfig: LogConfig, ): Sink[In, NotUsed] = destination.sink(outputName, namespaceId) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs2/QuineResultDestination.scala ================================================ package com.thatdot.quine.app.model.outputs2 import com.thatdot.outputs2.{DataFoldableSink, SinkName} sealed trait QuineResultDestination extends DataFoldableSink with SinkName object QuineResultDestination { sealed trait FoldableData extends QuineResultDestination object FoldableData { trait Slack extends FoldableData { def hookUrl: String def onlyPositiveMatchData: Boolean def intervalSeconds: Int } trait CypherQuery extends FoldableData { def queryText: String def parameter: String def parallelism: Int def allowAllNodeScan: Boolean def shouldRetry: Boolean } } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs2/destination/CypherQueryDestination.scala ================================================ package com.thatdot.quine.app.model.outputs2.destination import org.apache.pekko.NotUsed import org.apache.pekko.stream.scaladsl.{Flow, Sink} import com.thatdot.common.logging.Log import com.thatdot.common.logging.Log.LazySafeLogging import com.thatdot.data.DataFoldableFrom import com.thatdot.quine.app.data.QuineDataFoldersTo import com.thatdot.quine.app.model.outputs2 import com.thatdot.quine.app.model.outputs2.QuineResultDestination import com.thatdot.quine.graph.{CypherOpsGraph, NamespaceId, cypher} case class CypherQueryDestination( queryText: String, parameter: String = "that", parallelism: Int, allowAllNodeScan: Boolean, shouldRetry: Boolean, )(implicit graph: CypherOpsGraph) extends QuineResultDestination.FoldableData.CypherQuery with LazySafeLogging { override def slug: String = "cypher" private val underlyingCypherQuery = outputs2.query.CypherQuery(queryText, parameter, parallelism, allowAllNodeScan, shouldRetry) override def sink[A: DataFoldableFrom](name: String, inNamespace: NamespaceId)(implicit logConfig: Log.LogConfig, ): Sink[A, NotUsed] = { import QuineDataFoldersTo.cypherValueFolder val toCypherValue = DataFoldableFrom[A].to[cypher.Value] Flow[A] .map(toCypherValue) .via(underlyingCypherQuery.flow(name, inNamespace)) .to(Sink.ignore) .named(sinkName(name)) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs2/destination/Slack.scala ================================================ package com.thatdot.quine.app.model.outputs2.destination import scala.concurrent.ExecutionContext import scala.concurrent.duration.{DurationInt, FiniteDuration} import scala.util.{Failure, Success} import org.apache.pekko.NotUsed import org.apache.pekko.actor.ActorSystem import org.apache.pekko.http.scaladsl.model.MediaTypes.`application/json` import org.apache.pekko.http.scaladsl.model.{HttpEntity, HttpMethods, HttpRequest} import org.apache.pekko.http.scaladsl.unmarshalling.Unmarshal import org.apache.pekko.http.scaladsl.{Http, HttpExt} import org.apache.pekko.stream.scaladsl.{Flow, Sink} import io.circe.Json import com.thatdot.common.logging.Log._ import com.thatdot.data.DataFoldableFrom import com.thatdot.quine.app.StandingQueryResultOutput.SlackSerializable import com.thatdot.quine.app.model.outputs2.QuineResultDestination import com.thatdot.quine.app.util.QuineLoggables.logStatusCode import com.thatdot.quine.graph.NamespaceId final case class Slack( hookUrl: String, onlyPositiveMatchData: Boolean = false, intervalSeconds: Int = 20, )(implicit system: ActorSystem) extends QuineResultDestination.FoldableData.Slack with LazySafeLogging { override def slug: String = "slack" override def sink[A: DataFoldableFrom](name: String, inNamespace: NamespaceId)(implicit logConfig: LogConfig, ): Sink[A, NotUsed] = { val http: HttpExt = Http(system) // Slack webhooks have a 1 message per second rate limit val rate: FiniteDuration = math.max(1, intervalSeconds).seconds Flow[A] .map(DataFoldableFrom[A].to[Json]) .conflateWithSeed(List(_))((acc, newResult) => newResult :: acc) .throttle(1, rate) .map(results => SlackSerializable(results)) .collect { case Some(slackMessage) => slackMessage } .mapAsync(1) { slackSerializable => val request = HttpRequest( method = HttpMethods.POST, uri = hookUrl, entity = HttpEntity.apply(contentType = `application/json`, slackSerializable.slackJson), ) val posted = http .singleRequest(request) .flatMap { response => if (response.status.isSuccess()) { response.entity .discardBytes() .future() .map(_ => ())(ExecutionContext.parasitic) } else { Unmarshal(response) .to[String] .andThen { case Failure(err) => // FIXME Not importing/mixing in right logging stuff logger.error( log"""Failed to deserialize error response from POST ${slackSerializable.slackJson} to Slack webhook. |Response status was ${response.status} |""".cleanLines withException err, ) case Success(responseBody) => logger.error( log"""Failed to POST ${slackSerializable.slackJson} to Slack webhook. |Response status was ${response.status} |""".cleanLines + log": ${Safe(responseBody)}", ) }(system.dispatcher) .map(_ => ())(ExecutionContext.parasitic) } }(system.dispatcher) posted.recover { case err => logger.error(log"Failed to POST result" withException err) }(system.dispatcher) } .to(Sink.ignore) .named(sinkName(name)) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs2/package.scala ================================================ package com.thatdot.quine.app.model /** This package comprises Quine's extensions to the Outputs V2 project for items that only Quine and its dependents * (i.e. Quine Enterprise) require or support. */ package object outputs2 ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs2/query/CypherQuery.scala ================================================ package com.thatdot.quine.app.model.outputs2.query import org.apache.pekko.NotUsed import org.apache.pekko.stream.scaladsl.Flow import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, SafeLoggableInterpolator} import com.thatdot.quine.app.util.AtLeastOnceCypherQuery import com.thatdot.quine.compiler import com.thatdot.quine.graph.cypher.{QueryContext, Value} import com.thatdot.quine.graph.{CypherOpsGraph, NamespaceId} case class CypherQuery( queryText: String, parameter: String = "that", parallelism: Int, allowAllNodeScan: Boolean, shouldRetry: Boolean, ) extends LazySafeLogging { def flow(name: String, inNamespace: NamespaceId)(implicit graph: CypherOpsGraph, logConfig: LogConfig, ): Flow[Value, QueryContext, NotUsed] = { val compiledQuery = compiler.cypher.compile(queryText, Seq(parameter)) val queryAst = compiledQuery.query if (compiledQuery.canContainAllNodeScan && !allowAllNodeScan) { throw new RuntimeException( "Cypher query may contain full node scan; re-write without possible full node scan, or pass allowAllNodeScan true. " + s"The provided query was: $queryText", ) } if (!queryAst.isIdempotent && shouldRetry) { logger.warn( safe"""Could not verify that the provided Cypher query is idempotent. If timeouts or external system errors |occur, query execution may be retried and duplicate data may be created. To avoid this |set shouldRetry = false in the Standing Query output""".cleanLines, ) } lazy val atLeastOnceCypherQuery = AtLeastOnceCypherQuery(compiledQuery, parameter, s"cypher-query-for--$name") Flow[Value] .flatMapMerge( breadth = parallelism, value => { val cypherResultRows = if (shouldRetry) atLeastOnceCypherQuery.stream(value, inNamespace)(graph) else graph.cypherOps .query( query = compiledQuery, namespace = inNamespace, // `atTime` is `None` because we only want current time here—this is where we would // pass in `atTime` for historically aware output queries (if we chose to do that) atTime = None, parameters = Map(parameter -> value), ) .results cypherResultRows .map { resultRow => QueryContext(compiledQuery.columns.zip(resultRow).toMap) } }, ) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs2/query/standing/Predicate.scala ================================================ package com.thatdot.quine.app.model.outputs2.query.standing import com.thatdot.quine.graph.StandingQueryResult sealed trait Predicate { def apply(standingQueryResult: StandingQueryResult): Boolean } object Predicate { case object OnlyPositiveMatch extends Predicate { override def apply(standingQueryResult: StandingQueryResult): Boolean = standingQueryResult.meta.isPositiveMatch } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs2/query/standing/StandingQuery.scala ================================================ package com.thatdot.quine.app.model.outputs2.query.standing import java.util.UUID object StandingQuery { final case class StandingQueryDefinition( pattern: StandingQueryPattern, outputs: Seq[StandingQueryResultWorkflow], includeCancellations: Boolean = false, inputBufferSize: Int = 32, // should match [[StandingQuery.DefaultQueueBackpressureThreshold]] shouldCalculateResultHashCode: Boolean = false, ) final case class RegisteredStandingQuery( name: String, internalId: UUID, pattern: Option[StandingQueryPattern], // TODO: remove Option once we remove DGB SQs outputs: Seq[StandingQueryResultWorkflow], includeCancellations: Boolean, inputBufferSize: Int, stats: Map[String, StandingQueryStats], ) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs2/query/standing/StandingQueryPattern.scala ================================================ package com.thatdot.quine.app.model.outputs2.query.standing sealed abstract class StandingQueryPattern extends Product with Serializable object StandingQueryPattern { final case class Cypher( query: String, mode: StandingQueryMode = StandingQueryMode.DistinctId, ) extends StandingQueryPattern sealed abstract class StandingQueryMode extends Product with Serializable object StandingQueryMode { // DomainGraphBranch interpreter case object DistinctId extends StandingQueryMode // SQv4/Cypher interpreter case object MultipleValues extends StandingQueryMode case object QuinePattern extends StandingQueryMode val values: Seq[StandingQueryMode] = Seq(DistinctId, MultipleValues, QuinePattern) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs2/query/standing/StandingQueryResultTransformation.scala ================================================ package com.thatdot.quine.app.model.outputs2.query.standing import com.thatdot.data.DataFoldableFrom import com.thatdot.quine.app.data.QuineDataFoldablesFrom import com.thatdot.quine.graph.StandingQueryResult import com.thatdot.quine.model.{QuineIdProvider, QuineValue} sealed trait StandingQueryResultTransformation { type Out def dataFoldableFrom: DataFoldableFrom[Out] def apply(standingQueryResult: StandingQueryResult): Out } object StandingQueryResultTransformation { case class InlineData()(implicit idProvider: QuineIdProvider) extends StandingQueryResultTransformation { override type Out = QuineValue override def dataFoldableFrom: DataFoldableFrom[Out] = QuineDataFoldablesFrom.quineValueDataFoldable override def apply(standingQueryResult: StandingQueryResult): Out = QuineValue(standingQueryResult.data) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs2/query/standing/StandingQueryResultWorkflow.scala ================================================ package com.thatdot.quine.app.model.outputs2.query.standing import org.apache.pekko.NotUsed import org.apache.pekko.stream.scaladsl.Flow import cats.data.NonEmptyList import com.thatdot.common.logging.Log.LogConfig import com.thatdot.data.{DataFoldableFrom, DataFolderTo} import com.thatdot.outputs2.DataFoldableSink import com.thatdot.quine.app.model.outputs2.query.CypherQuery import com.thatdot.quine.graph.cypher.QueryContext import com.thatdot.quine.graph.{CypherOpsGraph, NamespaceId, StandingQueryResult, cypher} import com.thatdot.quine.model.QuineIdProvider case class Workflow( filter: Option[Predicate], /* {"meta": {"isPositiveMatch": true}, "data": {"emailAddress": "i.am.a.user@gmail.com"}} => {"username": "i.am.a.user", "removeDownstream": !meta.isPositiveMatch} => Value: Map("username" -> String, "removeDownstream" -> Boolean) => [1, 2, 3, 4] => Value: [1, 2, 3, 4] */ preEnrichmentTransformation: Option[StandingQueryResultTransformation], /* MATCH (u:User) WHERE id(u) = idFrom(that.username) RETURNING (, that.removeDownstream) */ enrichmentQuery: Option[CypherQuery], ) { import StandingQueryResultWorkflow._ import Workflow._ def flow(outputName: String, namespaceId: NamespaceId)(implicit graph: CypherOpsGraph, logConfig: LogConfig, ): BroadcastableFlow = { implicit val idProvider: QuineIdProvider = graph.idProvider import com.thatdot.quine.app.data.QuineDataFoldersTo.cypherValueFolder val sqOrigin: StandingQueryResultFlow = new StandingQueryResultFlow { override def foldableFrom: DataFoldableFrom[StandingQueryResult] = implicitly } val maybeThenFilter = filter.fold(identity[StandingQueryResultFlow] _) { predicate => (sqFlow: StandingQueryResultFlow) => new StandingQueryResultFlow { override def foldableFrom: DataFoldableFrom[StandingQueryResult] = sqFlow.foldableFrom override def flow: Flow[StandingQueryResult, StandingQueryResult, NotUsed] = sqFlow.flow.filter(predicate.apply) } } val maybeThenPreEnrich = preEnrichmentTransformation.fold((x: StandingQueryResultFlow) => x: BroadcastableFlow) { // Right now, `preEnrichmentTransformation` only supports built-in offerings, but this will need to change when // we want to support JS transformations here, too. transformation => (priorFlow: StandingQueryResultFlow) => new BroadcastableFlow { override type Out = transformation.Out override def foldableFrom: DataFoldableFrom[Out] = transformation.dataFoldableFrom override def flow: Flow[StandingQueryResult, Out, NotUsed] = priorFlow.flow.map(transformation.apply) } } val maybeThenEnrich = enrichmentQuery.fold(identity[BroadcastableFlow] _) { enrichQuery => (priorFlow: BroadcastableFlow) => new BroadcastableFlow { override type Out = cypher.QueryContext override def foldableFrom: DataFoldableFrom[Out] = implicitly override def flow: Flow[StandingQueryResult, Out, NotUsed] = { val dataFold = priorFlow.foldableFrom.to[cypher.Value] priorFlow.flow.map(dataFold).via(enrichQuery.flow(outputName, namespaceId)) } } } val steps = maybeThenFilter .andThen(maybeThenPreEnrich) .andThen(maybeThenEnrich) steps(sqOrigin) } } object Workflow { trait BroadcastableFlow { type Out def foldableFrom: DataFoldableFrom[Out] def flow: Flow[StandingQueryResult, Out, NotUsed] } trait StandingQueryResultFlow extends BroadcastableFlow { type Out = StandingQueryResult def foldableFrom: DataFoldableFrom[StandingQueryResult] def flow: Flow[StandingQueryResult, StandingQueryResult, NotUsed] = Flow[StandingQueryResult] } } case class StandingQueryResultWorkflow( outputName: String, namespaceId: NamespaceId, workflow: Workflow, destinationStepsList: NonEmptyList[DataFoldableSink], ) { def flow(graph: CypherOpsGraph)(implicit logConfig: LogConfig): Flow[StandingQueryResult, Unit, NotUsed] = { val preBroadcastFlow = workflow.flow(outputName, namespaceId)(graph, logConfig) val sinks = destinationStepsList .map(_.sink(outputName, namespaceId)(preBroadcastFlow.foldableFrom, logConfig)) .toList preBroadcastFlow.flow.alsoToAll(sinks: _*).map(_ => ()) } } object StandingQueryResultWorkflow { val title = "Standing Query Result Workflow" implicit def sqDataFoldableFrom(implicit quineIdProvider: QuineIdProvider): DataFoldableFrom[StandingQueryResult] = { import com.thatdot.quine.serialization.data.QuineSerializationFoldablesFrom.quineValueDataFoldableFrom new DataFoldableFrom[StandingQueryResult] { override def fold[B](value: StandingQueryResult, folder: DataFolderTo[B]): B = { val outerMap = folder.mapBuilder() val targetMetaBuilder = folder.mapBuilder() value.meta.toMap.foreach { case (k, v) => targetMetaBuilder.add(k, quineValueDataFoldableFrom.fold(v, folder)) } outerMap.add("meta", targetMetaBuilder.finish()) val targetDataBuilder = folder.mapBuilder() value.data.foreach { case (k, v) => targetDataBuilder.add(k, quineValueDataFoldableFrom.fold(v, folder)) } outerMap.add("data", targetDataBuilder.finish()) outerMap.finish() } } } implicit val queryContextFoldableFrom: DataFoldableFrom[QueryContext] = new DataFoldableFrom[QueryContext] { import com.thatdot.quine.app.data.QuineDataFoldablesFrom.cypherValueDataFoldable override def fold[B](value: QueryContext, folder: DataFolderTo[B]): B = { val builder = folder.mapBuilder() value.environment.foreach { case (k, v) => builder.add(k.name, cypherValueDataFoldable.fold(v, folder)) } builder.finish() } } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs2/query/standing/StandingQueryStats.scala ================================================ package com.thatdot.quine.app.model.outputs2.query.standing import java.time.Instant import com.thatdot.api.v2.RatesSummary final case class StandingQueryStats( rates: RatesSummary, startTime: Instant, totalRuntime: Long, bufferSize: Int, outputHashCode: Long, ) object StandingQueryStats { val title: String = "Statistics About a Running Standing Query" } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/outputs2/query/standing/package.scala ================================================ package com.thatdot.quine.app.model.outputs2.query /** This package comprises Standing Query utilization of Outputs V2 types (see [[com.thatdot.quine.app.model.outputs2]] * and [[com.thatdot.outputs2]]). */ package object standing ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/transformation/polyglot/Polyglot.scala ================================================ package com.thatdot.quine.app.model.transformation.polyglot object Polyglot { /** Value compatible with the org.graalvm.polyglot.Context.asValue parameter. * This can be passed to a GraalVM hosted language as a parameter value. */ type HostValue = AnyRef } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/transformation/polyglot/PolyglotValueDataFoldableFrom.scala ================================================ package com.thatdot.quine.app.model.transformation.polyglot import java.time._ import scala.jdk.CollectionConverters.CollectionHasAsScala import org.graalvm.polyglot import com.thatdot.data.{DataFoldableFrom, DataFolderTo} /** Implementation of [[DataFoldableFrom]] for GraalVM's [[org.graalvm.polyglot.Value]]. * * The goal is to walk a guest-language value and rebuild it in the target produced * by [[DataFolderTo]]. The program is a big pattern-match ordered from most to least * specific. Order does matter. */ object PolyglotValueDataFoldableFrom extends DataFoldableFrom[polyglot.Value] { // Regex used to detect whether a numeric literal *looks* like a floating‑point. // If the string contains "." or an exponent (e/E) we keep it as a Double; otherwise // – provided it fits in Long – we map it to an integral value. // This is necessary for numbers like: 1.9365476157539434e17 as this fits in a long and will take precedence but // the host value intended this as a double private val hasDecimalOrExponent = "[.eE]".r /** Fold a Polyglot value into the caller‑supplied folder. * * @param value Graal VM guest value to inspect * @param folder type‑class instance that knows how to build `B` */ def fold[B](value: polyglot.Value, folder: DataFolderTo[B]): B = { value match { case _ if value.isNull => folder.nullValue case _ if value.isBoolean => if (value.asBoolean()) folder.trueValue else folder.falseValue // integral number: fits in Long and literal has no decimal/exponent part case _ if value.isNumber && value.fitsInLong && hasDecimalOrExponent.findFirstIn(value.toString).isEmpty => folder.integer(value.asLong()) case _ if value.isNumber && value.fitsInDouble => folder.floating(value.asDouble()) case _ if value.isString => folder.string(value.asString()) case _ if value.hasBufferElements => val count = value.getBufferSize.toInt val bytes = Array.ofDim[Byte](count) // This is really inefficient. Later versions of graalvm add a bulk readBuffer operation that // fills a byte array. When we switch to only supporting Java 17+, which is required Graal 23.0+ // this can be improved. for (i <- 0 until count) bytes(i) = value.readBufferByte(i.toLong) folder.bytes(bytes) case _ if value.isDate && value.isTime && value.isTimeZone => folder.zonedDateTime(ZonedDateTime.ofInstant(value.asInstant, value.asTimeZone)) case _ if value.isDate && value.isTime => folder.localDateTime(LocalDateTime.of(value.asDate, value.asTime)) case _ if value.isDate => folder.date(value.asDate) case _ if value.isTime && value.isTimeZone => folder.time(OffsetTime.ofInstant(value.asInstant(), value.asTimeZone())) case _ if value.isTime => folder.localTime(value.asTime) case _ if value.isDuration => folder.duration(value.asDuration) // Any input that is produced by the [[PolyglotValueDataFolderTo]] will be a host object and not match the // above checks case _ if value.isHostObject => value.asHostObject[Object]() match { case time: ZonedDateTime => folder.zonedDateTime(time) case time: LocalDateTime => folder.localDateTime(time) case time: LocalDate => folder.date(time) case time: OffsetTime => folder.time(time) case time: LocalTime => folder.localTime(time) case duration: Duration => folder.duration(duration) case bytes: Array[Byte] => folder.bytes(bytes) case _ => throw new Exception(s"host value $value of class ${value.getClass} not supported") } case _ if value.hasHashEntries => val it = value.getHashEntriesIterator val builder = folder.mapBuilder() while (it.hasIteratorNextElement) { val entry = it.getIteratorNextElement val k = entry.getArrayElement(0) val v = entry.getArrayElement(1) builder.add(k.asString, fold(v, folder)) } builder.finish() case _ if value.hasArrayElements => val size = value.getArraySize val builder = folder.vectorBuilder() var i = 0L while (i < size) { val elem = value.getArrayElement(i) builder.add(fold(elem, folder)) i += 1 } builder.finish() case _ if value.hasIterator => val it = value.getIterator val builder = folder.vectorBuilder() while (it.hasIteratorNextElement) { val elem = it.getIteratorNextElement builder.add(fold(elem, folder)) } builder.finish() case _ if value.hasMembers => val builder = folder.mapBuilder() for (key <- value.getMemberKeys.asScala) { val v = value.getMember(key) builder.add(key, fold(v, folder)) } builder.finish() // Any input that is produced by the [[PolyglotValueDataFolderTo]] will have certain proxy objects that are // handled by the below, as well as any polyglot language that could produce a proxy object. case proxy if value.isProxyObject => value.asProxyObject[polyglot.proxy.Proxy]() match { case array: polyglot.proxy.ProxyArray => val size = array.getSize val builder = folder.vectorBuilder() var i = 0L while (i < size) { val elem = polyglot.Value.asValue(array.get(i)) builder.add(fold(elem, folder)) i += 1 } builder.finish() case obj: polyglot.proxy.ProxyObject => val builder = folder.mapBuilder() // The below cases come from the definition of getMemberKeys as to what it's type could be. obj.getMemberKeys match { case null => () // Do nothing in the case that there are no members case arr: polyglot.proxy.ProxyArray => val size = arr.getSize for (i <- 0L until size) { val key = arr.get(i).toString val v = obj.getMember(key) builder.add(key, fold(polyglot.Value.asValue(v), folder)) } case keys: List[_] => keys.foreach { key => // If this case matches graal vm asserts this is true. If it's not there is probably a bug so fail. // This has been tested assert(key.isInstanceOf[String]) val v = obj.getMember(key.toString) builder.add(key.toString, fold(polyglot.Value.asValue(v), folder)) } case keys: Array[String] => keys.foreach { key => val v = obj.getMember(key) builder.add(key, fold(polyglot.Value.asValue(v), folder)) } case _ => throw new Exception(s"value $proxy of class ${proxy.getClass} not supported") } builder.finish() case _ => throw new Exception(s"value $proxy of class ${proxy.getClass} not supported") } case other => throw new Exception(s"value $other of class ${other.getClass} not supported") } } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/transformation/polyglot/PolyglotValueDataFolderTo.scala ================================================ package com.thatdot.quine.app.model.transformation.polyglot import java.time._ import scala.collection.immutable.SortedMap import org.graalvm.polyglot import org.graalvm.polyglot.proxy.{ProxyArray, ProxyObject} import com.thatdot.data.DataFolderTo object PolyglotValueDataFolderTo extends DataFolderTo[Polyglot.HostValue] { def nullValue: Polyglot.HostValue = null val trueValue: Polyglot.HostValue = Boolean.box(true) val falseValue: Polyglot.HostValue = Boolean.box(false) def integer(l: Long): Polyglot.HostValue = Long.box(l) def string(s: String): Polyglot.HostValue = s def bytes(b: Array[Byte]): Polyglot.HostValue = b def floating(d: Double): Polyglot.HostValue = Double.box(d) def date(d: LocalDate): Polyglot.HostValue = d override def time(t: OffsetTime): Polyglot.HostValue = t def localTime(t: LocalTime): Polyglot.HostValue = t def localDateTime(ldt: LocalDateTime): Polyglot.HostValue = ldt def zonedDateTime(zdt: ZonedDateTime): Polyglot.HostValue = zdt def duration(d: Duration): Polyglot.HostValue = d def vectorBuilder(): DataFolderTo.CollectionBuilder[Polyglot.HostValue] = new DataFolderTo.CollectionBuilder[Polyglot.HostValue] { private val elements = Vector.newBuilder[Polyglot.HostValue] def add(a: Polyglot.HostValue): Unit = elements += a def finish(): Polyglot.HostValue = VectorProxy(elements.result()) } def mapBuilder(): DataFolderTo.MapBuilder[Polyglot.HostValue] = new DataFolderTo.MapBuilder[Polyglot.HostValue] { private val kvs = SortedMap.newBuilder[String, Polyglot.HostValue] def add(key: String, value: Polyglot.HostValue): Unit = kvs += (key -> value) def finish(): Polyglot.HostValue = MapProxy(kvs.result()) } final case class VectorProxy(underlying: Vector[Polyglot.HostValue]) extends ProxyArray { def get(index: Long): Polyglot.HostValue = underlying(index.toInt) def set(index: Long, value: polyglot.Value): Unit = throw new UnsupportedOperationException def getSize: Long = underlying.size.toLong } final private case class MapProxy(underlying: Map[String, Polyglot.HostValue]) extends ProxyObject { def getMember(key: String): Polyglot.HostValue = underlying(key) def getMemberKeys: Polyglot.HostValue = VectorProxy(underlying.keys.toVector) // Could also just be List def hasMember(key: String): Boolean = underlying.contains(key) def putMember(key: String, value: polyglot.Value): Unit = throw new UnsupportedOperationException } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/transformation/polyglot/Transformation.scala ================================================ package com.thatdot.quine.app.model.transformation.polyglot import org.graalvm.polyglot import com.thatdot.quine.util.BaseError trait Transformation { def apply(input: Polyglot.HostValue): Either[BaseError, polyglot.Value] } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/model/transformation/polyglot/langauges/QuineJavaScript.scala ================================================ package com.thatdot.quine.app.model.transformation.polyglot.langauges import java.io.ByteArrayInputStream import scala.collection.SeqView import cats.syntax.all._ import org.graalvm.polyglot import org.graalvm.polyglot._ import org.graalvm.polyglot.io.IOAccess import com.thatdot.quine.app.model.transformation.polyglot.{Polyglot, Transformation} import com.thatdot.quine.exceptions.JavaScriptException import com.thatdot.quine.util.BaseError object JavascriptRuntime { /** Helper function for setting a series of repetitive "js.foo" options on the language runtime * @param builder * @param value The value you wish to set. Will be set to all flags passed * @param flags The flag names you wish to set (sans "js." prefix) * @return */ private def setAll(builder: Engine#Builder, value: String, flags: Seq[String]): Engine#Builder = flags.foldLeft(builder)((b, flag) => b.option("js." + flag, value)) implicit private class EngingBuilderOps(private val builder: Engine#Builder) extends AnyVal { def enableAll(flags: String*): Engine#Builder = setAll(builder, "true", flags) def disableAll(flags: String*): Engine#Builder = setAll(builder, "false", flags) } private val engine = Engine .newBuilder() .in(new ByteArrayInputStream(Array.emptyByteArray)) .allowExperimentalOptions(true) .option("engine.WarnInterpreterOnly", "false") // Enable strict mode, set Array.prototype as the prototype of arrays passed-in from Java, and disable eval() // I can't think of anything unsafe they could do with `eval` (or `Graal`), but I removed them anyways. .enableAll("strict", "foreign-object-prototype", "disable-eval") // remove load, loadWithNewGlobal, print, console, and Graal globals // load / loadWithGlobal just return "PolyglotException: Error: Operation is not allowed for: foo.js" ( because we set allowIO to false ), but go ahead and remove them anyways. .disableAll("load", "print", "console", "graal-builtin") .build // Make the global context (and the objects it contains) immutable to prevent setting / changing global vars. // NB - do we want to recurse all the way down making everything immutable? private val freezeGlobals = polyglot.Source.create( "js", """ Object.freeze(globalThis); Object.getOwnPropertyNames(globalThis).forEach(k => Object.freeze(globalThis[k])); """, ) private def mkContext: Context = { val context = Context .newBuilder("js") .engine(engine) .allowAllAccess(false) .allowCreateProcess(false) .allowCreateThread(false) .allowEnvironmentAccess(EnvironmentAccess.NONE) .allowExperimentalOptions(false) .allowHostAccess(HostAccess.NONE) .allowHostClassLoading(false) .allowIO(IOAccess.NONE) .allowNativeAccess(false) .allowPolyglotAccess(PolyglotAccess.NONE) .build() context.eval(freezeGlobals) context } private val currentJsContext: ThreadLocal[Context] = ThreadLocal.withInitial(() => mkContext) def eval(source: polyglot.Source): polyglot.Value = currentJsContext.get.eval(source) def catchPolyglotException(a: => polyglot.Value): Either[String, polyglot.Value] = // Syntax errors are caught here Either.catchOnly[PolyglotException](a).leftMap(_.getMessage) def asSeqView(value: polyglot.Value): SeqView[polyglot.Value] = (0L until value.getArraySize).view.map(value.getArrayElement) def asList(value: polyglot.Value): Either[String, List[polyglot.Value]] = Either.cond( value.hasArrayElements, asSeqView(value).toList, s"'$value' should be an array", ) } object JavaScriptTransformation { import JavascriptRuntime.{catchPolyglotException, eval} /** Validate the supplied JavaScript text and return a ready‑to‑run instance. * * @param jsText the user‑supplied source (function literal or `function (…) { … }`) * @param outputCardinality whether the JS returns one element or an array of elements * @param recordFormat whether each element is Bare or Tagged */ def makeInstance( jsText: String, ): Either[BaseError, JavaScriptTransformation] = { // Wrap in parentheses so both fat‑arrow and classic functions parse the same way val source = polyglot.Source.create("js", s"($jsText)") catchPolyglotException(eval(source)).left.map(JavaScriptException.apply).flatMap { compiled => Either.cond( compiled.canExecute, new JavaScriptTransformation(compiled), JavaScriptException(s"'$jsText' must be a JavaScript function"), ) } } } final class JavaScriptTransformation( transformationFunction: polyglot.Value, ) extends Transformation { def apply(input: Polyglot.HostValue): Either[BaseError, polyglot.Value] = try Right(transformationFunction.execute(input)) catch { case ex: PolyglotException => Left(JavaScriptException(ex.getMessage)) case ex: IllegalArgumentException => Left(JavaScriptException(ex.getMessage)) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/routes/AdministrationRoutesImpl.scala ================================================ package com.thatdot.quine.app.routes import java.time.Instant import scala.concurrent.{ExecutionContext, Future} import org.apache.pekko.http.scaladsl.server.Directives._ import org.apache.pekko.http.scaladsl.server.Route import org.apache.pekko.util.{ByteString, Timeout} import cats.implicits._ import io.circe.Json import com.thatdot.common.logging.Log.LazySafeLogging import com.thatdot.quine.app.config.{BaseConfig, QuineConfig} import com.thatdot.quine.graph.{BaseGraph, InMemoryNodeLimit} import com.thatdot.quine.model.Milliseconds import com.thatdot.quine.persistor.PersistenceAgent import com.thatdot.quine.routes._ import com.thatdot.quine.{BuildInfo => QuineBuildInfo} trait AdministrationRoutesState { def shutdown()(implicit ec: ExecutionContext): Future[Unit] } object GenerateMetrics { def metricsReport(graph: BaseGraph): MetricsReport = { import scala.jdk.CollectionConverters._ val counters = graph.metrics.metricRegistry.getCounters.asScala.map { case (name, counter) => Counter(name, counter.getCount) } val timers = graph.metrics.metricRegistry.getTimers.asScala.map { case (name, timer) => val NANOS_IN_MILLI = 1e6 val snap = timer.getSnapshot TimerSummary( name, min = snap.getMin.toDouble / NANOS_IN_MILLI, max = snap.getMax.toDouble / NANOS_IN_MILLI, median = snap.getMedian / NANOS_IN_MILLI, mean = snap.getMean / NANOS_IN_MILLI, q1 = snap.getValue(0.25) / NANOS_IN_MILLI, q3 = snap.getValue(0.75) / NANOS_IN_MILLI, oneMinuteRate = timer.getOneMinuteRate, `90` = snap.getValue(0.90) / NANOS_IN_MILLI, `99` = snap.get99thPercentile() / NANOS_IN_MILLI, `80` = snap.getValue(0.80) / NANOS_IN_MILLI, `20` = snap.getValue(0.20) / NANOS_IN_MILLI, `10` = snap.getValue(0.10) / NANOS_IN_MILLI, ) } val gauges: Seq[NumericGauge] = { def coerceDouble[T](value: T): Option[Double] = value match { case x: Double => Some(x) case x: Float => Some(x.toDouble) case x: Long => Some(x.toDouble) case x: Int => Some(x.toDouble) case x: java.lang.Number => Some(x.doubleValue) case _ => // logger.warn("uh oh", // new ClassCastException( // s"Unable to coerce gauged value $value of type ${value.getClass.getSimpleName} to a numeric type" // ) // ) None } (for { (name, g) <- graph.metrics.metricRegistry.getGauges.asScala v <- coerceDouble(g.getValue) } yield NumericGauge(name, v)).toSeq } MetricsReport( Instant.now(), counters.toSeq, timers.toSeq, gauges, ) } } /** The Pekko HTTP implementation of [[AdministrationRoutes]] */ trait AdministrationRoutesImpl extends AdministrationRoutes with com.thatdot.quine.app.routes.exts.circe.JsonEntitiesFromSchemas with com.thatdot.quine.app.routes.exts.PekkoQuineEndpoints { self: LazySafeLogging => def graph: BaseGraph implicit def timeout: Timeout /** Current product version */ val version: String /** Current config */ def currentConfig: Json /** State in the application */ val quineApp: AdministrationRoutesState /** A sample configuration that will be used for documenting the admin/config route. */ def sampleConfig: BaseConfig = QuineConfig() private val buildInfoRoute = buildInfo.implementedBy { _ => val gitCommit: Option[String] = QuineBuildInfo.gitHeadCommit .map(_ + (if (QuineBuildInfo.gitUncommittedChanges) "-DIRTY" else "")) QuineInfo( version, gitCommit, QuineBuildInfo.gitHeadCommitDate, QuineBuildInfo.javaVmName + " " + QuineBuildInfo.javaVersion + " (" + QuineBuildInfo.javaVendor + ")", PersistenceAgent.CurrentVersion.shortString, ) } private val configRoute = config(sampleConfig.loadedConfigJson).implementedBy(_ => currentConfig) private val livenessProbeRoute = livenessProbe.implementedBy(_ => ()) private val readinessProbeRoute = readinessProbe.implementedBy(_ => graph.isReady) private val metricsRoute = metrics.implementedBy(_ => GenerateMetrics.metricsReport(graph)) protected def performShutdown(): Future[Unit] = graph.system.terminate().map(_ => ())(ExecutionContext.parasitic) // Deliberately not using `implementedByAsync`. The API will confirm receipt of the request, but not wait for completion. private def shutdownRoute = shutdown.implementedBy { _ => performShutdown() () } private val metaDataRoute = metaData.implementedByAsync { _ => graph.namespacePersistor .getAllMetaData() .map(_.fmap(ByteString(_)))(graph.shardDispatcherEC) } private val shardSizesRoute = shardSizes.implementedByAsync { resizes => graph .shardInMemoryLimits(resizes.fmap(l => InMemoryNodeLimit(l.softLimit, l.hardLimit))) .map(_.collect { case (shardIdx, Some(InMemoryNodeLimit(soft, hard))) => shardIdx -> ShardInMemoryLimit(soft, hard) })(ExecutionContext.parasitic) } private val requestSleepNodeRoute = requestNodeSleep.implementedByAsync { case (quineId, namespaceParam) => graph.requiredGraphIsReadyFuture( graph.requestNodeSleep(namespaceFromParam(namespaceParam), quineId), ) } private val graphHashCodeRoute = graphHashCode.implementedByAsync { case (atTime, namespaceParam) => graph.requiredGraphIsReadyFuture { val at = atTime.getOrElse(Milliseconds.currentTime()) val ec = ExecutionContext.parasitic graph .getGraphHashCode(namespaceFromParam(namespaceParam), Some(at)) .map(code => GraphHashCode(code.toString, at.millis))(ec) } } final val administrationRoutes: Route = buildInfoRoute ~ configRoute ~ readinessProbeRoute ~ livenessProbeRoute ~ metricsRoute ~ shutdownRoute ~ metaDataRoute ~ shardSizesRoute ~ requestSleepNodeRoute ~ graphHashCodeRoute } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/routes/AlgorithmRoutesImpl.scala ================================================ package com.thatdot.quine.app.routes import java.nio.file.{FileAlreadyExistsException, FileSystemException, Files, InvalidPathException, Paths} import scala.concurrent.{ExecutionContext, Future} import scala.util.Try import scala.util.control.NonFatal import org.apache.pekko.http.scaladsl.server.Directives._ import org.apache.pekko.http.scaladsl.server.Route import org.apache.pekko.stream.connectors.s3.scaladsl.S3 import org.apache.pekko.stream.scaladsl.FileIO import org.apache.pekko.util.Timeout import endpoints4s.Invalid import com.thatdot.quine.app.routes.exts.circe.JsonEntitiesFromSchemas import com.thatdot.quine.compiler.cypher import com.thatdot.quine.graph.cypher.{CompiledQuery, CypherException, Location} import com.thatdot.quine.graph.{AlgorithmGraph, NamespaceId} import com.thatdot.quine.model.Milliseconds import com.thatdot.quine.routes.AlgorithmRoutes trait AlgorithmMethods { def compileWalkQuery(queryOpt: Option[String]): CompiledQuery[Location.OnNode] = { val queryText = queryOpt.fold(AlgorithmGraph.defaults.walkQuery)(AlgorithmGraph.defaults.walkPrefix + _) val compiledQuery = cypher.compile(queryText, unfixedParameters = List("n")) require(compiledQuery.isReadOnly, s"Query must conclusively be a read-only query. Provided: $queryText") require(!compiledQuery.canContainAllNodeScan, s"Query must not scan all nodes. Provided: $queryText") compiledQuery } def generateDefaultFileName( atTime: Option[Milliseconds], lengthOpt: Option[Int], countOpt: Option[Int], queryOpt: Option[String], returnParamOpt: Option[Double], inOutParamOpt: Option[Double], seedOpt: Option[String], ): String = s"""graph-walk- |${atTime.map(_.millis).getOrElse(s"${System.currentTimeMillis}_T")}- |${lengthOpt.getOrElse(AlgorithmGraph.defaults.walkLength)}x |${countOpt.getOrElse(AlgorithmGraph.defaults.walkCount)}-q |${queryOpt.map(_.length).getOrElse("0")}- |${returnParamOpt.getOrElse(AlgorithmGraph.defaults.returnParam)}x |${inOutParamOpt.getOrElse(AlgorithmGraph.defaults.inOutParam)}- |${seedOpt.getOrElse("_")}.csv""".stripMargin.replace("\n", "") } trait AlgorithmRoutesImpl extends AlgorithmRoutes with exts.PekkoQuineEndpoints with AlgorithmMethods with JsonEntitiesFromSchemas { implicit def graph: AlgorithmGraph implicit def timeout: Timeout private val algorithmSaveRandomWalksRoute = algorithmSaveRandomWalks.implementedBy { case ( lengthOpt, countOpt, queryOpt, returnParamOpt, inOutParamOpt, seedOpt, namespaceParam, atTime: Option[Milliseconds], parallelism, saveLocation, ) => graph.requiredGraphIsReady() val namespaceId = namespaceFromParam(namespaceParam) if (!graph.getNamespaces.contains(namespaceId)) Right(None) else { val defaultFileName = generateDefaultFileName(atTime, lengthOpt, countOpt, queryOpt, returnParamOpt, inOutParamOpt, seedOpt) val fileName = saveLocation match { case S3Bucket(_, keyOpt) => keyOpt.getOrElse(defaultFileName) case LocalFile(None) => defaultFileName case LocalFile(Some(fileName)) => if (fileName.nonEmpty) fileName else defaultFileName } Try { require(!lengthOpt.exists(_ < 1), "walk length cannot be less than one.") require(!countOpt.exists(_ < 0), "walk count cannot be less than zero.") require(!inOutParamOpt.exists(_ < 0d), "in-out parameter cannot be less than zero.") require(!returnParamOpt.exists(_ < 0d), "return parameter cannot be less than zero.") require(parallelism >= 1, "parallelism cannot be less than one.") val saveSink = saveLocation match { case S3Bucket(bucketName, _) => S3.multipartUpload(bucketName, fileName) case LocalFile(_) => val p = Paths.get(fileName) Files.createFile(p) // Deliberately cause an error if it is not accessible FileIO.toPath(p) } saveSink -> compileWalkQuery(queryOpt) }.map { case (sink, compiledQuery) => graph.algorithms .saveRandomWalks( sink, compiledQuery, lengthOpt.getOrElse(AlgorithmGraph.defaults.walkLength), countOpt.getOrElse(AlgorithmGraph.defaults.walkCount), returnParamOpt.getOrElse(AlgorithmGraph.defaults.returnParam), inOutParamOpt.getOrElse(AlgorithmGraph.defaults.inOutParam), seedOpt, namespaceFromParam(namespaceParam), atTime, parallelism, ) Some(fileName) }.toEither .left .map { case _: InvalidPathException | _: FileAlreadyExistsException | _: SecurityException | _: FileSystemException => Invalid(s"Invalid file name: $fileName") // Return a Bad Request Error case e: CypherException => Invalid(s"Invalid query: ${e.getMessage}") case e: IllegalArgumentException => Invalid(e.getMessage) case NonFatal(e) => throw e // Return an Internal Server Error case other => throw other // This might expose more than we want } } } private val algorithmRandomWalkRoute = algorithmRandomWalk.implementedByAsync { case (qid, (lengthOpt, queryOpt, returnParamOpt, inOutParamOpt, seedOpt, atTime, namespaceParam)) => val errors = Try { require(!lengthOpt.exists(_ < 1), "walk length cannot be less than one.") require(!inOutParamOpt.exists(_ < 0d), "in-out parameter cannot be less than zero.") require(!returnParamOpt.exists(_ < 0d), "return parameter cannot be less than zero.") Some(Nil) }.toEither.left .map { case e: CypherException => Invalid(s"Invalid query: ${e.getMessage}") case e: IllegalArgumentException => Invalid(e.getMessage) case NonFatal(e) => throw e // Return an Internal Server Error case other => throw other // this might expose more than we want } if (errors.isLeft) Future.successful[Either[Invalid, Option[List[String]]]](errors) else { val ns = namespaceFromParam(namespaceParam) graph.requiredGraphIsReady() ifNamespaceFound(ns)( graph.algorithms .randomWalk( qid, compileWalkQuery(queryOpt), lengthOpt.getOrElse(AlgorithmGraph.defaults.walkLength), returnParamOpt.getOrElse(AlgorithmGraph.defaults.returnParam), inOutParamOpt.getOrElse(AlgorithmGraph.defaults.inOutParam), None, seedOpt, ns, atTime, ) .map(w => Right(w.acc))(ExecutionContext.parasitic), ) } } final val algorithmRoutes: Route = algorithmSaveRandomWalksRoute ~ algorithmRandomWalkRoute final private def ifNamespaceFound[A](namespaceId: NamespaceId)( ifFound: => Future[Either[ClientErrors, A]], ): Future[Either[ClientErrors, Option[A]]] = if (!graph.getNamespaces.contains(namespaceId)) Future.successful(Right(None)) else ifFound.map(_.map(Some(_)))(ExecutionContext.parasitic) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/routes/BaseAppRoutes.scala ================================================ package com.thatdot.quine.app.routes import java.nio.file.Paths import scala.concurrent.Future import scala.concurrent.duration.DurationInt import scala.io.Source import org.apache.pekko.http.scaladsl.model.headers._ import org.apache.pekko.http.scaladsl.model.{HttpCharsets, HttpEntity, MediaType, StatusCodes} import org.apache.pekko.http.scaladsl.server.Directives._ import org.apache.pekko.http.scaladsl.server.Route import org.apache.pekko.http.scaladsl.{ConnectionContext, Http} import org.apache.pekko.stream.Materializer import org.apache.pekko.util.Timeout import nl.altindag.ssl.SSLFactory import com.thatdot.common.logging.Log.{LazySafeLogging, Safe, SafeLoggableInterpolator} import com.thatdot.quine.app.config.{UseMtls, WebServerBindConfig} import com.thatdot.quine.graph.BaseGraph import com.thatdot.quine.model.QuineIdProvider import com.thatdot.quine.util.Tls.SSLFactoryBuilderOps object MediaTypes { val `application/yaml`: MediaType.WithFixedCharset = MediaType.applicationWithFixedCharset("yaml", HttpCharsets.`UTF-8`, "yaml") } trait BaseAppRoutes extends LazySafeLogging with endpoints4s.pekkohttp.server.Endpoints { val graph: BaseGraph val timeout: Timeout implicit def idProvider: QuineIdProvider = graph.idProvider implicit lazy val materializer: Materializer = graph.materializer /** Inject config values into JS resource and return as HttpEntity * * @param resourcePath path to the JS resource file * @param defaultV2Api whether to default to V2 API (true) or V1 API (false) * @return Route that serves the JS with injected config */ protected def getJsWithInjectedConfig(resourcePath: String, defaultV2Api: Boolean): Route = { val resourceUrl = Option(getClass.getClassLoader.getResource(resourcePath)) resourceUrl match { case Some(url) => val source = Source.fromURL(url) try { val content = source.mkString val injectedContent = content.replace("/*{{DEFAULT_V2_API}}*/true", defaultV2Api.toString) val jsContentType = MediaType.applicationWithFixedCharset("javascript", HttpCharsets.`UTF-8`) complete(HttpEntity(jsContentType, injectedContent)) } finally source.close() case None => complete(StatusCodes.NotFound, s"Resource not found: $resourcePath") } } /** Serves up the static assets from resources and for JS/CSS dependencies */ def staticFilesRoute: Route /** OpenAPI route */ def openApiRoute: Route /** Rest API route */ def apiRoute: Route /** Final HTTP route */ def mainRoute: Route = { import Util.RouteHardeningOps.syntax._ staticFilesRoute.withSecurityHardening ~ redirectToNoTrailingSlashIfPresent(StatusCodes.PermanentRedirect) { apiRoute.withHstsHardening ~ respondWithHeader(`Access-Control-Allow-Origin`.*) { // NB the following resources will be available to request from ANY source (including evilsite.com): // be sure this is what you want! openApiRoute.withSecurityHardening } } } /** Bind a webserver to server up the main route */ def bindWebServer( interface: String, port: Int, useTls: Boolean, useMTls: UseMtls = UseMtls(), ): Future[Http.ServerBinding] = { import graph.system val serverBuilder = Http()(system) .newServerAt(interface, port) .adaptSettings( // See https://pekko.apache.org/docs/pekko-http/current//common/http-model.html#registering-custom-media-types _.mapWebsocketSettings(_.withPeriodicKeepAliveMaxIdle(10.seconds)) .mapParserSettings(_.withCustomMediaTypes(MediaTypes.`application/yaml`)), ) import Util.RouteHardeningOps.syntax._ //capture unknown addresses with a 404 val routeWithDefault = mainRoute ~ complete( StatusCodes.NotFound, HttpEntity("The requested resource could not be found."), ).withHstsHardening val sslFactory: Option[SSLFactory] = Option.when(useTls) { val keystoreOverride = (sys.env.get(WebServerBindConfig.KeystorePathEnvVar) -> sys.env.get( WebServerBindConfig.KeystorePasswordEnvVar, )) match { case (Some(keystorePath), Some(password)) => Some(keystorePath -> password.toCharArray) case (Some(_), None) => logger.warn( safe"""'${Safe(WebServerBindConfig.KeystorePathEnvVar)}' was specified but |'${Safe(WebServerBindConfig.KeystorePasswordEnvVar)}' was not. Ignoring. |""".cleanLines, ) None case (None, Some(_)) => logger.warn( safe"""'${Safe(WebServerBindConfig.KeystorePasswordEnvVar)}' was specified but |'${Safe(WebServerBindConfig.KeystorePathEnvVar)}' was not. Ignoring. |""".cleanLines, ) None case (None, None) => None } val baseBuilder = SSLFactory .builder() .withSystemPropertyDerivedIdentityMaterial() .withSystemPropertyDerivedCiphersSafe() .withSystemPropertyDerivedProtocolsSafe() val builderWithOverride = keystoreOverride.fold(baseBuilder) { case (file, password) => baseBuilder.withIdentityMaterial(file, password) } // Add truststore material for mTLS if enabled val builderWithTruststore = if (useMTls.enabled) { val truststoreOverride = // First priority: explicit truststore configuration useMTls.trustStore .map { mtlsTs => mtlsTs.path.getAbsolutePath -> mtlsTs.password.toCharArray } .orElse { // Fallback: system properties (sys.props.get("javax.net.ssl.trustStore") -> sys.props.get("javax.net.ssl.trustStorePassword")) match { case (Some(truststorePath), Some(password)) => Some(truststorePath -> password.toCharArray) case (Some(_), None) => logger.warn( safe"""'javax.net.ssl.trustStore' was specified but 'javax.net.ssl.trustStorePassword' was not. |Client certificate validation will not work as expected. |""".cleanLines, ) None case (None, Some(_)) => logger.warn( safe"""'javax.net.ssl.trustStorePassword' was specified but 'javax.net.ssl.trustStore' was not. |Client certificate validation will not work as expected. |""".cleanLines, ) None case (None, None) => logger.warn( safe"""mTLS is enabled but no truststore is configured. Neither 'useMtls.trustStore' was set |nor were 'javax.net.ssl.trustStore' and 'javax.net.ssl.trustStorePassword' system properties. |Client certificates will not be validated. |""".cleanLines, ) None } } truststoreOverride.fold(builderWithOverride) { case (filePath, password) => builderWithOverride.withTrustMaterial(Paths.get(filePath), password) } } else { builderWithOverride } builderWithTruststore.build() } // Create connection context with mTLS support if enabled val connectionContext = sslFactory.map { factory => if (useMTls.enabled) { ConnectionContext.httpsServer { () => val engine = factory.getSslContext.createSSLEngine() engine.setUseClientMode(false) engine.setNeedClientAuth(true) engine } } else { ConnectionContext.httpsServer(factory.getSslContext) } } connectionContext .fold(serverBuilder)(serverBuilder.enableHttps(_)) .bind(Route.toFunction(routeWithDefault)(system)) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/routes/DebugRoutesImpl.scala ================================================ package com.thatdot.quine.app.routes import scala.concurrent.{ExecutionContext, Future} import org.apache.pekko.http.scaladsl.server.Directives._ import org.apache.pekko.http.scaladsl.server.Route import org.apache.pekko.util.Timeout import com.thatdot.common.logging.Log._ import com.thatdot.common.quineid.QuineId import com.thatdot.quine.graph._ import com.thatdot.quine.graph.messaging.LiteralMessage.{ DgnWatchableEventIndexSummary, LocallyRegisteredStandingQuery, NodeInternalState, SqStateResult, SqStateResults, } import com.thatdot.quine.model import com.thatdot.quine.model.{EdgeDirection => _, _} import com.thatdot.quine.routes.EdgeDirection._ import com.thatdot.quine.routes._ import com.thatdot.quine.routes.exts.NamespaceParameter /** The Pekko HTTP implementation of [[DebugOpsRoutes]] */ trait DebugRoutesImpl extends DebugOpsRoutes with com.thatdot.quine.app.routes.exts.ServerQuineEndpoints with com.thatdot.quine.app.routes.exts.circe.JsonEntitiesFromSchemas { implicit protected def logConfig: LogConfig private def toEdgeDirection(dir: model.EdgeDirection): EdgeDirection = dir match { case model.EdgeDirection.Outgoing => Outgoing case model.EdgeDirection.Incoming => Incoming case model.EdgeDirection.Undirected => Undirected } private def fromEdgeDirection(dir: EdgeDirection): model.EdgeDirection = dir match { case Outgoing => model.EdgeDirection.Outgoing case Incoming => model.EdgeDirection.Incoming case Undirected => model.EdgeDirection.Undirected } /* Not implicit since we use this only _explicitly_ to turn [[NodeInternalState]] * into JSON (the choice not to expose a JSON schema for the endpoint is * intentional, so as to discourage users from using this outside of debugging) * * TODO this should be possible to rewrite as just "define a schema for quinevalue, propertyvalue, and eventtime, then * derive the rest" -- The implicit resolution scope will need to be corrected but we could remove the redundant * intermediate implicits. */ lazy val nodeInternalStateSchema: Record[NodeInternalState] = { implicit val quineValueSchema: JsonSchema[QuineValue] = anySchema(None).xmap(QuineValue.fromJson)(QuineValue.toJson) implicit val propertyValueSchema: JsonSchema[PropertyValue] = quineValueSchema.xmap(PropertyValue.apply)(_.deserialized.get) implicit val eventTimeSchema: JsonSchema[EventTime] = longJsonSchema.xmap(EventTime.fromRaw)(_.eventTime) implicit val msSchema: Record[Milliseconds] = genericRecord[Milliseconds] implicit val halfEdgeSchema: Record[HalfEdge] = genericRecord[HalfEdge] implicit val lSq: Record[LocallyRegisteredStandingQuery] = genericRecord[LocallyRegisteredStandingQuery] implicit val sqIdSchema: Record[StandingQueryId] = genericRecord[StandingQueryId] implicit val dgnLocalEventIndexSummarySchema: Record[DgnWatchableEventIndexSummary] = genericRecord[DgnWatchableEventIndexSummary] implicit val neSchema: Tagged[NodeEvent] = genericTagged[NodeEvent] implicit val newtSchema: Record[NodeEvent.WithTime[NodeEvent]] = genericRecord[NodeEvent.WithTime[NodeEvent]] implicit val sqResult: Record[SqStateResult] = genericRecord[SqStateResult] implicit val sqResults: Record[SqStateResults] = genericRecord[SqStateResults] genericRecord[NodeInternalState] } implicit def graph: LiteralOpsGraph implicit def timeout: Timeout private val debugGetRoute = debugOpsGet.implementedByAsync { case (qid: QuineId, atTime: AtTime, namespaceParam: NamespaceParameter) => graph.requiredGraphIsReadyFuture { val propsF = graph.literalOps(namespaceFromParam(namespaceParam)).getProps(qid, atTime = atTime) val edgesF = graph.literalOps(namespaceFromParam(namespaceParam)).getEdges(qid, atTime = atTime) propsF .zip(edgesF) .map { case (props, edges) => LiteralNode( props.map { case (k, v) => k.name -> QuineValue.toJson(v.deserialized.get)(graph.idProvider, logConfig) }, edges.toSeq.map { case HalfEdge(t, d, o) => RestHalfEdge(t.name, toEdgeDirection(d), o) }, ) }(graph.nodeDispatcherEC) } } private val debugPostRoute = debugOpsPut.implementedByAsync { case (qid: QuineId, namespaceParam: NamespaceParameter, node: LiteralNode[QuineId]) => graph.requiredGraphIsReadyFuture { val namespaceId = namespaceFromParam(namespaceParam) val propsF = Future.traverse(node.properties.toList) { case (typ, value) => graph.literalOps(namespaceId).setProp(qid, typ, QuineValue.fromJson(value)) }(implicitly, graph.nodeDispatcherEC) val edgesF = Future.traverse(node.edges) { case RestHalfEdge(typ, Outgoing, to) => graph.literalOps(namespaceId).addEdge(qid, to, typ, isDirected = true) case RestHalfEdge(typ, Incoming, to) => graph.literalOps(namespaceId).addEdge(to, qid, typ, isDirected = true) case RestHalfEdge(typ, Undirected, to) => graph.literalOps(namespaceId).addEdge(qid, to, typ, isDirected = false) }(implicitly, graph.nodeDispatcherEC) propsF.flatMap(_ => edgesF)(ExecutionContext.parasitic).map(_ => ())(ExecutionContext.parasitic) } } private val debugDeleteRoute = debugOpsDelete.implementedByAsync { case (qid: QuineId, namespaceParam: NamespaceParameter) => graph.requiredGraphIsReadyFuture { graph.literalOps(namespaceFromParam(namespaceParam)).deleteNode(qid) } } protected val debugVerboseRoute: Route = debugOpsVerbose.implementedByAsync { case (qid: QuineId, atTime: AtTime, namespaceParam: NamespaceParameter) => graph.requiredGraphIsReadyFuture { graph .literalOps(namespaceFromParam(namespaceParam)) .logState(qid, atTime) .map(nodeInternalStateSchema.encoder(_))(graph.nodeDispatcherEC) } } private val debugEdgesGetRoute = debugOpsEdgesGet.implementedByAsync { case (qid, (atTime, limit, edgeDirOpt, otherOpt, edgeTypeOpt, namespaceParam)) => graph.requiredGraphIsReadyFuture { val edgeDirOpt2 = edgeDirOpt.map(fromEdgeDirection) graph .literalOps(namespaceFromParam(namespaceParam)) .getEdges(qid, edgeTypeOpt.map(Symbol.apply), edgeDirOpt2, otherOpt, limit, atTime) .map(_.toVector.map { case HalfEdge(t, d, o) => RestHalfEdge(t.name, toEdgeDirection(d), o) })( graph.nodeDispatcherEC, ) } } private val debugEdgesPutRoute = debugOpsEdgesPut.implementedByAsync { case (qid, namespaceParam, edges) => graph.requiredGraphIsReadyFuture { Future .traverse(edges) { case RestHalfEdge(edgeType, edgeDir, other) => edgeDir match { case Undirected => graph.literalOps(namespaceFromParam(namespaceParam)).addEdge(qid, other, edgeType, isDirected = false) case Outgoing => graph.literalOps(namespaceFromParam(namespaceParam)).addEdge(qid, other, edgeType, isDirected = true) case Incoming => graph.literalOps(namespaceFromParam(namespaceParam)).addEdge(other, qid, edgeType, isDirected = true) } }(implicitly, graph.nodeDispatcherEC) .map(_ => ())(ExecutionContext.parasitic) } } private val debugEdgesDeleteRoute = debugOpsEdgeDelete.implementedByAsync { case (qid, namespaceParam, edges) => graph.requiredGraphIsReadyFuture { Future .traverse(edges) { case RestHalfEdge(edgeType, edgeDir, other) => edgeDir match { case Undirected => graph.literalOps(namespaceFromParam(namespaceParam)).removeEdge(qid, other, edgeType, isDirected = false) case Outgoing => graph.literalOps(namespaceFromParam(namespaceParam)).removeEdge(qid, other, edgeType, isDirected = true) case Incoming => graph.literalOps(namespaceFromParam(namespaceParam)).removeEdge(other, qid, edgeType, isDirected = true) } }(implicitly, graph.nodeDispatcherEC) .map(_ => ())(ExecutionContext.parasitic) } } private val debugHalfEdgesGetRoute = debugOpsHalfEdgesGet.implementedByAsync { case (qid, (atTime, limit, edgeDirOpt, otherOpt, edgeTypeOpt, namespaceParam)) => graph.requiredGraphIsReadyFuture { val edgeDirOpt2 = edgeDirOpt.map(fromEdgeDirection) graph .literalOps(namespaceFromParam(namespaceParam)) .getHalfEdges(qid, edgeTypeOpt.map(Symbol.apply), edgeDirOpt2, otherOpt, limit, atTime) .map(_.toVector.map { case HalfEdge(t, d, o) => RestHalfEdge(t.name, toEdgeDirection(d), o) })( graph.nodeDispatcherEC, ) } } private val debugPropertyGetRoute = debugOpsPropertyGet.implementedByAsync { case (qid, propKey, atTime, namespaceParam) => graph.requiredGraphIsReadyFuture { graph .literalOps(namespaceFromParam(namespaceParam)) .getProps(qid, atTime) .map(m => m.get(Symbol(propKey)).map(_.deserialized.get).map(qv => QuineValue.toJson(qv)(graph.idProvider, logConfig)), )( graph.nodeDispatcherEC, ) } } private val debugPropertyPutRoute = debugOpsPropertyPut.implementedByAsync { case (qid, propKey, namespaceParam, value) => graph.requiredGraphIsReadyFuture { graph .literalOps(namespaceFromParam(namespaceParam)) .setProp(qid, propKey, QuineValue.fromJson(value)) } } private val debugPropertyDeleteRoute = debugOpsPropertyDelete.implementedByAsync { case (qid, propKey, namespaceParam) => graph.requiredGraphIsReadyFuture { graph.literalOps(namespaceFromParam(namespaceParam)).removeProp(qid, propKey) } } final val debugRoutes: Route = { debugGetRoute ~ debugDeleteRoute ~ debugPostRoute ~ debugVerboseRoute ~ debugEdgesGetRoute ~ debugEdgesPutRoute ~ debugEdgesDeleteRoute ~ debugHalfEdgesGetRoute ~ debugPropertyGetRoute ~ debugPropertyPutRoute ~ debugPropertyDeleteRoute } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/routes/HealthAppRoutes.scala ================================================ package com.thatdot.quine.app.routes import scala.concurrent.{ExecutionContext, Future} import org.apache.pekko.http.scaladsl.server.Directives._ import org.apache.pekko.http.scaladsl.server.Route import org.apache.pekko.util.Timeout import sttp.apispec.openapi.Info import sttp.tapir.server.ServerEndpoint import sttp.tapir.server.pekkohttp.PekkoHttpServerInterpreter import sttp.tapir.{EndpointInput, query} import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig} import com.thatdot.quine.app.QuineApp import com.thatdot.quine.app.config.BaseConfig import com.thatdot.quine.app.v2api.OssApiMethods import com.thatdot.quine.app.v2api.definitions.{CommonParameters, TapirRoutes} import com.thatdot.quine.app.v2api.endpoints.V2QuineAdministrationEndpoints import com.thatdot.quine.graph.GraphService import com.thatdot.quine.routes.exts.NamespaceParameter /** Health endpoint routes for Quine * * Exposes only the liveness and readiness endpoints on a separate binding. * These endpoints are used for orchestration health checks (e.g., Kubernetes probes). * * @param graph underlying graph * @param quineApp quine application state * @param appConfig current application config * @param timeout timeout */ class HealthAppRoutes( val graph: GraphService, val quineApp: QuineApp, appConfig: BaseConfig, val timeout: Timeout, )(implicit val ec: ExecutionContext, protected val logConfig: LogConfig) extends BaseAppRoutes with V2QuineAdministrationEndpoints with LazySafeLogging { implicit val system: org.apache.pekko.actor.ActorSystem = graph.system override lazy val idProvider = graph.idProvider val appMethods = new OssApiMethods(graph, quineApp, appConfig, timeout) val ingestEndpoints: List[ServerEndpoint[TapirRoutes.Requirements, Future]] = List.empty // Expose only the liveness and readiness endpoints for health checks val apiEndpoints: List[ServerEndpoint[TapirRoutes.Requirements, Future]] = List( livenessServerEndpoint, readinessServerEndpoint, ) val apiInfo: Info = Info( title = "health", version = "1.0.0", description = Some("Health check endpoints"), ) override def memberIdxParameter: EndpointInput[Option[Int]] = query[Option[Int]]("memberIdx").schema(_.hidden(true)) override def namespaceParameter: EndpointInput[Option[NamespaceParameter]] = CommonParameters.hiddenValidatingNamespaceQuery override lazy val staticFilesRoute: Route = reject override lazy val openApiRoute: Route = reject override lazy val apiRoute: Route = PekkoHttpServerInterpreter().toRoute(apiEndpoints) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/routes/IngestApiMethods.scala ================================================ package com.thatdot.quine.app.routes import scala.concurrent.{ExecutionContext, Future} import scala.util.Failure import scala.util.control.NoStackTrace import org.apache.pekko.stream.{Materializer, StreamDetachedException} import com.thatdot.common.logging.Log.LogConfig import com.thatdot.quine.app.routes.IngestApiEntities.PauseOperationException import com.thatdot.quine.app.util.QuineLoggables._ import com.thatdot.quine.graph.{BaseGraph, NamespaceId} import com.thatdot.quine.routes.{ IngestStreamConfiguration, IngestStreamInfo, IngestStreamInfoWithName, IngestStreamStatus, } import com.thatdot.quine.util.SwitchMode object IngestApiEntities { case class PauseOperationException(statusMsg: String) extends Exception with NoStackTrace object PauseOperationException { object Completed extends PauseOperationException("completed") object Terminated extends PauseOperationException("terminated") object Failed extends PauseOperationException("failed") } } trait IngestApiMethods { val graph: BaseGraph implicit def materializer: Materializer def stream2Info(conf: IngestStreamWithControl[IngestStreamConfiguration]): Future[IngestStreamInfo] = conf.status.map { status => IngestStreamInfo( status, conf.terminated().value collect { case Failure(exception) => exception.toString }, conf.settings, conf.metrics.toEndpointResponse, ) }(graph.shardDispatcherEC) val quineApp: IngestStreamState def setIngestStreamPauseState( name: String, namespace: NamespaceId, newState: SwitchMode, )(implicit logConfig: LogConfig): Future[Option[IngestStreamInfoWithName]] = quineApp.getIngestStreamFromState(name, namespace) match { case None => Future.successful(None) case Some(ingest: IngestStreamWithControl[UnifiedIngestConfiguration]) => ingest.initialStatus match { case IngestStreamStatus.Completed => Future.failed(PauseOperationException.Completed) case IngestStreamStatus.Terminated => Future.failed(PauseOperationException.Terminated) case IngestStreamStatus.Failed => Future.failed(PauseOperationException.Failed) case _ => val flippedValve = ingest.valve().flatMap(_.flip(newState))(graph.nodeDispatcherEC) val ingestStatus = flippedValve.flatMap { _ => // HACK: set the ingest's "initial status" to "Paused". `stream2Info` will use this as the stream status // when the valve is closed but the stream is not terminated. However, this assignment is not threadsafe, // and this directly violates the semantics of `initialStatus`. This should be fixed in a future refactor. ingest.initialStatus = IngestStreamStatus.Paused stream2Info(ingest.copy(settings = ingest.settings.asV1Config)) }(graph.nodeDispatcherEC) ingestStatus.map(status => Some(status.withName(name)))(ExecutionContext.parasitic) } } def mkPauseOperationError[ERROR_TYPE]( operation: String, toError: String => ERROR_TYPE, ): PartialFunction[Throwable, Either[ERROR_TYPE, Nothing]] = { case _: StreamDetachedException => // A StreamDetachedException always occurs when the ingest has failed Left(toError(s"Cannot $operation a failed ingest.")) case e: PauseOperationException => Left(toError(s"Cannot $operation a ${e.statusMsg} ingest.")) } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/routes/IngestMeter.scala ================================================ package com.thatdot.quine.app.routes import com.codahale.metrics.{Meter, Metered, Timer} import com.thatdot.quine.graph.NamespaceId import com.thatdot.quine.graph.metrics.HostQuineMetrics /** Like [[Metered]], but maintains multiple counters relevant to ingest */ sealed abstract class IngestMetered { def counts: Metered def bytes: Metered def getCount: Long = counts.getCount } object IngestMetered { /** Freeze a copy of the provided ingestMetered (ie, return a copy which will never change) * @param im the [[IngestMetered]] to freeze a copy of * @return the frozen copy */ def freeze(im: IngestMetered): IngestMetered = new IngestMetered { override val counts: Metered = StoppedMeter.fromMeter(im.counts) override val bytes: Metered = StoppedMeter.fromMeter(im.bytes) } /** Returns an ingest meter with meters retrieved or created based on the provided ingest name * @see com.codahale.metrics.MetricRegistry#meter */ def ingestMeter(namespaceId: NamespaceId, name: String, metrics: HostQuineMetrics): IngestMeter = IngestMeter( name, namespaceId, metrics.metricRegistry.meter(metrics.metricName(namespaceId, List("ingest", name, "count"))), metrics.metricRegistry.meter(metrics.metricName(namespaceId, List("ingest", name, "bytes"))), metrics, ) /** Removes any meters used in ingest meters for the provided ingest name * @see com.codahale.metrics.MetricRegistry#remove */ def removeIngestMeter(namespaceId: NamespaceId, name: String, metrics: HostQuineMetrics): Boolean = metrics.metricRegistry.remove(metrics.metricName(namespaceId, List("ingest", name, "count"))) && metrics.metricRegistry.remove(metrics.metricName(namespaceId, List("ingest", name, "bytes"))) } final case class IngestMeter private[routes] ( name: String, namespaceId: NamespaceId, countMeter: Meter, // mutable bytesMeter: Meter, // mutable private val metrics: HostQuineMetrics, ) extends IngestMetered { def mark(bytes: Int): Unit = { countMeter.mark() bytesMeter.mark(bytes.toLong) } override def counts: Metered = countMeter override def bytes: Metered = bytesMeter /** Returns a timer that can be used to track deserializations. * CAUTION this timer has different lifecycle behavior than the other metrics in this class. * See [[metrics.ingestDeserializationTimer]] for more information. * Note that not all ingest types use this timer. */ def unmanagedDeserializationTimer: Timer = metrics.ingestDeserializationTimer(namespaceId, name) } /** Meter that has been halted (so its rates/counts are no longer changing) * * This is handy for keeping track of rates of a stopped stream (completed or crashed), since we * don't want the rates to trend downwards after the stream has stopped. */ final case class StoppedMeter( getCount: Long, getFifteenMinuteRate: Double, getFiveMinuteRate: Double, getMeanRate: Double, getOneMinuteRate: Double, ) extends Metered object StoppedMeter { def fromMeter(meter: Metered): Metered = StoppedMeter( meter.getCount, meter.getFifteenMinuteRate, meter.getFiveMinuteRate, meter.getMeanRate, meter.getOneMinuteRate, ) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/routes/IngestRoutesImpl.scala ================================================ package com.thatdot.quine.app.routes import scala.concurrent.{ExecutionContext, Future} import scala.util.{Failure, Success} import org.apache.pekko.Done import org.apache.pekko.http.scaladsl.server.Directives._ import org.apache.pekko.http.scaladsl.server.Route import org.apache.pekko.stream.Materializer import org.apache.pekko.util.Timeout import com.thatdot.common.logging.Log.LogConfig import com.thatdot.quine.app.model.ingest.util.KafkaSettingsValidator import com.thatdot.quine.exceptions.NamespaceNotFoundException import com.thatdot.quine.graph.NamespaceId import com.thatdot.quine.routes._ import com.thatdot.quine.util.SwitchMode /** The Pekko HTTP implementation of [[IngestRoutes]] */ trait IngestRoutesImpl extends IngestRoutes with com.thatdot.quine.app.routes.exts.PekkoQuineEndpoints with IngestApiMethods with endpoints4s.pekkohttp.server.Endpoints with com.thatdot.quine.app.routes.exts.circe.JsonEntitiesFromSchemas { implicit def timeout: Timeout implicit def materializer: Materializer val quineApp: IngestStreamState /** Try to register a new ingest stream. * The Either represents a bad request on the Left, and the inner Option represents Some(success) or that the * namespace was not found (404). */ implicit protected def logConfig: LogConfig private val ingestStreamStartRoute: Route = { val http404: Either[ClientErrors, Option[Nothing]] = Right(None) def http400(errors: ClientErrors): Either[ClientErrors, Option[Nothing]] = Left(errors) def httpSuccess[A](a: A): Either[ClientErrors, Option[A]] = Right(Some(a)) def addSettings( name: String, intoNamespace: NamespaceId, settings: IngestStreamConfiguration, ): Either[ClientErrors, Option[Unit]] = quineApp.addIngestStream( name, settings, intoNamespace, previousStatus = None, // this ingest is being created, not restored, so it has no previous status shouldResumeRestoredIngests = false, timeout, memberIdx = None, ) match { case Success(false) => http400( endpoints4s.Invalid( s"Cannot create ingest stream `$name` (a stream with this name already exists)", ), ) case Success(true) => httpSuccess(()) case Failure(_: NamespaceNotFoundException) => http404 case Failure(err) => http400(endpoints4s.Invalid(s"Failed to create ingest stream `$name`: ${err.getMessage}")) } ingestStreamStart.implementedBy { case (ingestName, namespaceParam, settings: KafkaIngest) => graph.requiredGraphIsReady() val namespace = namespaceFromParam(namespaceParam) KafkaSettingsValidator.validateInput( settings.kafkaProperties, settings.groupId, settings.offsetCommitting, ) match { case Some(errors) => http400( endpoints4s.Invalid( s"Cannot create ingest stream `$ingestName`: ${errors.toList.mkString(",")}", ), ) case None => addSettings(ingestName, namespace, settings) } case (ingestName, namespaceParam, settings) => graph.requiredGraphIsReady() val namespace = namespaceFromParam(namespaceParam) addSettings(ingestName, namespace, settings) } } /** Try to stop an ingest stream */ private val ingestStreamStopRoute = ingestStreamStop.implementedByAsync { case (ingestName, namespaceParam) => graph.requiredGraphIsReadyFuture { quineApp.removeIngestStream(ingestName, namespaceFromParam(namespaceParam)) match { case None => Future.successful(None) case Some( control @ IngestStreamWithControl( settings, metrics, valve @ _, terminated, close, initialStatus @ _, optWs @ _, optWsV2 @ _, ), ) => val finalStatus = control.status.map { previousStatus => import IngestStreamStatus._ previousStatus match { // in these cases, the ingest was healthy and runnable/running case Running | Paused | Restored => Terminated // in these cases, the ingest was not running/runnable case Completed | Failed | Terminated => previousStatus } }(ExecutionContext.parasitic) val terminationMessage: Future[Option[String]] = { // start terminating the ingest close() // future will return when termination finishes terminated() .flatMap(t => t .map({ case Done => None })(graph.shardDispatcherEC) .recover({ case e => Some(e.toString) })(graph.shardDispatcherEC), )(graph.shardDispatcherEC) } finalStatus .zip(terminationMessage) .map { case (newStatus, message) => Some( IngestStreamInfoWithName( ingestName, newStatus, message, settings, metrics.toEndpointResponse, ), ) }(graph.shardDispatcherEC) } } } /** Query out a particular ingest stream */ private val ingestStreamLookupRoute = ingestStreamLookup.implementedByAsync { case (ingestName, namespaceParam) => graph.requiredGraphIsReadyFuture { quineApp.getIngestStream(ingestName, namespaceFromParam(namespaceParam)) match { case None => Future.successful(None) case Some(stream) => stream2Info(stream).map(s => Some(s.withName(ingestName)))(graph.shardDispatcherEC) } } } /** List out all of the currently active ingest streams */ private val ingestStreamListRoute = ingestStreamList.implementedByAsync { namespaceParam => graph.requiredGraphIsReadyFuture { Future .traverse( quineApp.getIngestStreams(namespaceFromParam(namespaceParam)).toList, ) { case (name, ingest) => stream2Info(ingest).map(name -> _)(graph.shardDispatcherEC) }(implicitly, graph.shardDispatcherEC) .map(_.toMap)(graph.shardDispatcherEC) } } private val ingestStreamPauseRoute = ingestStreamPause.implementedByAsync { case (ingestName, namespaceParam) => graph.requiredGraphIsReadyFuture { setIngestStreamPauseState(ingestName, namespaceFromParam(namespaceParam), SwitchMode.Close) .map(Right(_))(ExecutionContext.parasitic) .recover(mkPauseOperationError("pause", endpoints4s.Invalid(_)))(ExecutionContext.parasitic) } } private val ingestStreamUnpauseRoute = ingestStreamUnpause.implementedByAsync { case (ingestName, namespaceParam) => graph.requiredGraphIsReadyFuture { setIngestStreamPauseState(ingestName, namespaceFromParam(namespaceParam), SwitchMode.Open) .map(Right(_))(ExecutionContext.parasitic) .recover(mkPauseOperationError("resume", endpoints4s.Invalid(_)))(ExecutionContext.parasitic) } } final val ingestRoutes: Route = { ingestStreamStartRoute ~ ingestStreamStopRoute ~ ingestStreamLookupRoute ~ ingestStreamListRoute ~ ingestStreamPauseRoute ~ ingestStreamUnpauseRoute } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/routes/IngestStreamState.scala ================================================ package com.thatdot.quine.app.routes import scala.concurrent.{ExecutionContext, Future} import scala.util.{Failure, Success, Try} import org.apache.pekko.Done import org.apache.pekko.stream.Materializer import org.apache.pekko.util.Timeout import cats.data.Validated.{invalidNel, validNel} import cats.data.ValidatedNel import com.thatdot.common.logging.Log.LogConfig import com.thatdot.quine.app.config.FileAccessPolicy import com.thatdot.quine.app.model.ingest.QuineIngestSource import com.thatdot.quine.app.model.ingest2.V2IngestEntities.{ QuineIngestConfiguration => V2IngestConfiguration, QuineIngestStreamWithStatus, Transformation, } import com.thatdot.quine.app.model.ingest2.source.{DecodedSource, QuineValueIngestQuery} import com.thatdot.quine.app.model.ingest2.sources.WebSocketFileUploadSource import com.thatdot.quine.app.model.ingest2.{IngestSource, V1ToV2, V2IngestEntities} import com.thatdot.quine.app.model.transformation.polyglot import com.thatdot.quine.app.model.transformation.polyglot.langauges.JavaScriptTransformation import com.thatdot.quine.app.util.QuineLoggables._ import com.thatdot.quine.exceptions.{DuplicateIngestException, NamespaceNotFoundException} import com.thatdot.quine.graph.{CypherOpsGraph, MemberIdx, NamespaceId, defaultNamespaceId, namespaceToString} import com.thatdot.quine.routes._ import com.thatdot.quine.serialization.{AvroSchemaCache, ProtobufSchemaCache} import com.thatdot.quine.util.{BaseError, SwitchMode} /** Store ingests allowing for either v1 or v2 types. */ case class UnifiedIngestConfiguration(config: Either[V2IngestConfiguration, IngestStreamConfiguration]) { def asV1Config: IngestStreamConfiguration = config match { case Left(v2) => v2.asV1IngestStreamConfiguration case Right(v1) => v1 } } trait IngestStreamState { type IngestName = String @volatile protected var ingestStreams: Map[NamespaceId, Map[IngestName, IngestStreamWithControl[UnifiedIngestConfiguration]]] = Map(defaultNamespaceId -> Map.empty) def defaultExecutionContext: ExecutionContext implicit def materializer: Materializer def fileAccessPolicy: FileAccessPolicy /** Add a new ingest stream to the running application. * * @param name Name of the stream to add * @param settings Configuration for the stream * @param intoNamespace Namespace into which the stream should ingest data * @param previousStatus Some previous status of the stream, if it was restored from persistence. * None for new ingests * @param shouldResumeRestoredIngests If restoring an ingest, should the ingest be resumed? When `previousStatus` * is None, this has no effect. * @param timeout How long to allow for the attempt to persist the stream to the metadata table * (when shouldSaveMetadata = true). Has no effect if !shouldSaveMetadata * @param shouldSaveMetadata Whether the application should persist this stream to the metadata table. * This should be false when restoring from persistence (i.e., from the metadata * table) and true otherwise. * @param memberIdx The cluster member index on which this ingest is being created * @return Success(true) when the operation was successful, or a Failure otherwise */ def addIngestStream( name: String, settings: IngestStreamConfiguration, intoNamespace: NamespaceId, previousStatus: Option[IngestStreamStatus], shouldResumeRestoredIngests: Boolean, timeout: Timeout, shouldSaveMetadata: Boolean = true, memberIdx: Option[MemberIdx] = None, ): Try[Boolean] /** Create ingest stream using updated V2 Ingest api. */ def addV2IngestStream( name: String, settings: V2IngestConfiguration, intoNamespace: NamespaceId, timeout: Timeout, memberIdx: MemberIdx, )(implicit logConfig: LogConfig): Future[Either[Seq[String], Unit]] /** Create an ingest stream on this member. */ def createV2IngestStream( name: String, settings: V2IngestConfiguration, intoNamespace: NamespaceId, timeout: Timeout, )(implicit logConfig: LogConfig): ValidatedNel[BaseError, Unit] /** Restore a previously created ingest * * @param name Name of the stream to add * @param settings Configuration for the stream * @param intoNamespace Namespace into which the stream should ingest data * @param previousStatus Some previous status of the stream, if it was restored from persistence. * @param shouldResumeRestoredIngests If restoring an ingest, should the ingest be resumed? When `previousStatus` * is None, this has no effect. * @param timeout How long to allow for the attempt to persist the stream to the metadata table * (when shouldSaveMetadata = true). Has no effect if !shouldSaveMetadata * @param thisMemberIdx This cluster member's index in case the graph is still initializing. * @return Success when the operation was successful, or a Failure otherwise */ def restoreV2IngestStream( name: String, settings: V2IngestConfiguration, intoNamespace: NamespaceId, previousStatus: Option[IngestStreamStatus], shouldResumeRestoredIngests: Boolean, timeout: Timeout, thisMemberIdx: MemberIdx, )(implicit logConfig: LogConfig): ValidatedNel[BaseError, Unit] protected def determineSwitchModeAndStatus( previousStatus: Option[IngestStreamStatus], shouldResumeRestoredIngests: Boolean, ): (SwitchMode, IngestStreamStatus) = previousStatus match { case None => // This is a freshly-created ingest, so there is no status to restore SwitchMode.Open -> IngestStreamStatus.Running case Some(lastKnownStatus) => val newStatus = IngestStreamStatus.decideRestoredStatus(lastKnownStatus, shouldResumeRestoredIngests) val switchMode = newStatus.position match { case ValvePosition.Open => SwitchMode.Open case ValvePosition.Closed => SwitchMode.Close } switchMode -> newStatus } /** Attempt to create a [[QuineIngestSource]] from configuration and * stream components. * * If created, the existing ingestSource will exist in the * ingestStreams state map. * * This method must be called within a synchronized since it makes * changes to the shared saved state of the ingest map (and,eventually, persistence). * * Fails * - if the namespace doesn't exist in the state map * - if the named source already exists. */ def createV2IngestSource( name: String, settings: V2IngestConfiguration, intoNamespace: NamespaceId, previousStatus: Option[IngestStreamStatus], // previousStatus is None if stream was not restored at all shouldResumeRestoredIngests: Boolean, metrics: IngestMetrics, meter: IngestMeter, graph: CypherOpsGraph, )(implicit protobufCache: ProtobufSchemaCache, avroCache: AvroSchemaCache, logConfig: LogConfig, ): ValidatedNel[BaseError, QuineIngestSource] = ingestStreams.get(intoNamespace) match { // TODO Note for review comparison: v1 version fails silently here. // TODO Also, shouldn't this just add the namespace if it's not found? case None => invalidNel(NamespaceNotFoundException(intoNamespace)) // Ingest already exists. case Some(ingests) if ingests.contains(name) => invalidNel(DuplicateIngestException(name, Some(namespaceToString(intoNamespace)))) case Some(ingests) => val (initialValveSwitchMode, initialStatus) = determineSwitchModeAndStatus(previousStatus, shouldResumeRestoredIngests) val decodedSourceNel: ValidatedNel[BaseError, DecodedSource] = DecodedSource.apply(name, settings, meter, graph.system, fileAccessPolicy)( protobufCache, avroCache, logConfig, ) val validatedTransformation: ValidatedNel[BaseError, Option[polyglot.Transformation]] = settings.transformation.fold( validNel(Option.empty): ValidatedNel[BaseError, Option[polyglot.Transformation]], ) { case Transformation.JavaScript(function) => JavaScriptTransformation.makeInstance(function) match { case Left(err) => invalidNel(err) case Right(value) => validNel(Some(value)) } } validatedTransformation.andThen { transformation => decodedSourceNel.map { (s: DecodedSource) => val errorOutputs = s.getDeadLetterQueues(settings.onRecordError.deadLetterQueueSettings)(protobufCache, graph.system) val quineIngestSource: QuineIngestSource = s.toQuineIngestSource( name, QuineValueIngestQuery.apply(settings, graph, intoNamespace), transformation, graph, initialValveSwitchMode, settings.parallelism, settings.maxPerSecond, onDecodeError = errorOutputs, retrySettings = settings.onRecordError.retrySettings, logRecordError = settings.onRecordError.logRecord, onStreamErrorHandler = settings.onStreamError, ) val streamDefWithControl: IngestStreamWithControl[UnifiedIngestConfiguration] = IngestStreamWithControl( UnifiedIngestConfiguration(Left(settings)), metrics, quineIngestSource, initialStatus, ) // For V2 WebSocket file upload, extract and store the packaging in optWsV2 s match { case wsUpload: WebSocketFileUploadSource => streamDefWithControl.optWsV2 = Some(wsUpload.decodingHub) case _ => // Other source types don't need special handling } val newNamespaceIngests = ingests + (name -> streamDefWithControl) //TODO this is blocking in QuineEnterpriseApp ingestStreams += intoNamespace -> newNamespaceIngests quineIngestSource } } } def getIngestStream( name: String, namespace: NamespaceId, )(implicit logConfig: LogConfig): Option[IngestStreamWithControl[IngestStreamConfiguration]] = getIngestStreamFromState(name, namespace).map(isc => isc.copy(settings = isc.settings.asV1Config)) def getV2IngestStream( name: String, namespace: NamespaceId, memberIdx: MemberIdx, )(implicit logConfig: LogConfig): Future[Option[V2IngestEntities.IngestStreamInfoWithName]] /** Get the unified ingest stream stored in memory. The value returned here will _not_ be a copy. * Note: Once v1 and v2 ingests are no longer both supported, distinguishing this method from * [[getIngestStream]] should no longer be necessary. */ def getIngestStreamFromState( name: String, namespace: NamespaceId, ): Option[IngestStreamWithControl[UnifiedIngestConfiguration]] = ingestStreams.getOrElse(namespace, Map.empty).get(name) def getIngestStreams(namespace: NamespaceId): Map[String, IngestStreamWithControl[IngestStreamConfiguration]] def getV2IngestStreams( namespace: NamespaceId, memberIdx: MemberIdx, ): Future[Map[String, V2IngestEntities.IngestStreamInfo]] protected def getIngestStreamsFromState( namespace: NamespaceId, ): Map[IngestName, IngestStreamWithControl[UnifiedIngestConfiguration]] = ingestStreams .getOrElse(namespace, Map.empty) protected def getIngestStreamsWithStatus( namespace: NamespaceId, ): Future[Map[String, Either[IngestStreamWithStatus, QuineIngestStreamWithStatus]]] def removeIngestStream( name: String, namespace: NamespaceId, ): Option[IngestStreamWithControl[IngestStreamConfiguration]] def removeV2IngestStream( name: String, namespace: NamespaceId, memberIdx: MemberIdx, ): Future[Option[V2IngestEntities.IngestStreamInfoWithName]] def pauseV2IngestStream( name: String, namespace: NamespaceId, memberIdx: MemberIdx, ): Future[Option[V2IngestEntities.IngestStreamInfoWithName]] def unpauseV2IngestStream( name: String, namespace: NamespaceId, memberIdx: MemberIdx, ): Future[Option[V2IngestEntities.IngestStreamInfoWithName]] /** Close the ingest stream and return a future that completes when the stream terminates, including an error message * if any. */ def terminateIngestStream(stream: IngestStreamWithControl[_]): Future[Option[String]] = { stream.close() stream .terminated() .flatMap { innerFuture => innerFuture .map { case Done => None }(ExecutionContext.parasitic) .recover(e => Some(e.toString))(ExecutionContext.parasitic) }(ExecutionContext.parasitic) } protected def setIngestStreamPauseState( name: String, namespace: NamespaceId, newState: SwitchMode, )(implicit logConfig: LogConfig): Future[Option[V2IngestEntities.IngestStreamInfoWithName]] = getIngestStreamFromState(name, namespace) match { case None => Future.successful(None) case Some(ingest: IngestStreamWithControl[UnifiedIngestConfiguration]) => ingest.initialStatus match { case IngestStreamStatus.Completed => Future.failed(IngestApiEntities.PauseOperationException.Completed) case IngestStreamStatus.Terminated => Future.failed(IngestApiEntities.PauseOperationException.Terminated) case IngestStreamStatus.Failed => Future.failed(IngestApiEntities.PauseOperationException.Failed) case _ => val flippedValve = ingest.valve().flatMap(_.flip(newState))(defaultExecutionContext) val ingestStatus = flippedValve.flatMap { _ => // HACK: set the ingest's "initial status" to "Paused". `stream2Info` will use this as the stream status // when the valve is closed but the stream is not terminated. However, this assignment is not threadsafe, // and this directly violates the semantics of `initialStatus`. This should be fixed in a future refactor. ingest.initialStatus = IngestStreamStatus.Paused streamToInternalModel(ingest.copy(settings = IngestSource(ingest.settings))) }(defaultExecutionContext) ingestStatus.map(status => Some(status.withName(name)))(ExecutionContext.parasitic) } } protected def streamToInternalModel( stream: IngestStreamWithControl[IngestSource], ): Future[V2IngestEntities.IngestStreamInfo] = stream.status .map { status => V2IngestEntities.IngestStreamInfo( V1ToV2(status), stream .terminated() .value .collect { case Success(innerFuture) => innerFuture.value.flatMap { case Success(_) => None case Failure(exception) => Some(exception.getMessage) } } .flatten, stream.settings, V1ToV2(stream.metrics.toEndpointResponse), ) }(defaultExecutionContext) protected def unifiedIngestStreamToInternalModel( conf: IngestStreamWithControl[UnifiedIngestConfiguration], )(implicit logConfig: LogConfig): Future[Option[V2IngestEntities.IngestStreamInfo]] = conf match { case IngestStreamWithControl( UnifiedIngestConfiguration(Left(v2Config: V2IngestConfiguration)), metrics, valve, terminated, close, initialStatus, optWs, optWsV2, ) => val ingestV2 = IngestStreamWithControl[IngestSource]( v2Config.source, metrics, valve, terminated, close, initialStatus, optWs, optWsV2, ) streamToInternalModel(ingestV2).map(Some(_))(ExecutionContext.parasitic) case _ => Future.successful(None) } protected def determineFinalStatus(statusAtTermination: IngestStreamStatus): IngestStreamStatus = { import com.thatdot.quine.routes.IngestStreamStatus._ statusAtTermination match { // in these cases, the ingest was healthy and runnable/running case Running | Paused | Restored => Terminated // in these cases, the ingest was not running/runnable case Completed | Failed | Terminated => statusAtTermination } } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/routes/IngestStreamWithControl.scala ================================================ package com.thatdot.quine.app.routes import java.time.Instant import java.time.temporal.ChronoUnit.MILLIS import scala.concurrent.duration.DurationInt import scala.concurrent.{ExecutionContext, Future, Promise} import scala.util.{Failure, Success} import org.apache.pekko.stream.Materializer import org.apache.pekko.stream.scaladsl.Sink import org.apache.pekko.{Done, NotUsed, pattern} import com.codahale.metrics.Metered import io.circe.Json import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, Loggable, SafeLoggableInterpolator} import com.thatdot.quine.app.model.ingest.QuineIngestSource import com.thatdot.quine.app.model.ingest2.sources.DecodingHub import com.thatdot.quine.routes.{IngestStreamStats, IngestStreamStatus, RatesSummary} import com.thatdot.quine.util.{SwitchMode, ValveSwitch} /** Adds to the ingest stream configuration extra information that will be * materialized only once the ingest stream is running and which may be * needed for stopping the stream * * @param settings the product-specific stream configuration being managed * @param metrics the metrics handle for this ingest stream * @param valve asynchronous function to get a handle to the ingest's pause valve. Because of the possibility * that stream materialization is attempted multiple times, this function is not idempotent * @param terminated asynchronous function to get a handle to the ingest's termination signal. Because of the * possibility that stream materialization is attempted multiple times, this function is not * idempotent * @param initialStatus the status of the ingest stream when it was first created. This is `Running` for newly-created * ingests, but may have any value except `Terminated` for ingests restored from persistence. * To get the ingest's current status, use `status` instead. This should be a val, but it's * used to patch in a rendered status in setIngestStreamPauseState * @param close Callback to request the ingest stream to stop. Once this is called, `terminated`'s inner future * will eventually complete. This should be a val, but it's constructed out of order by Novelty * streams. * @param optWs HACK: opaque stash of additional information for Novelty websocket streams. This should be * refactored out of this class. * @param optWsV2 HACK: Like optWs for V1, but with the decoding flow packaged up with the hub to the data format * to be chosen rather than fixed to just JSON. */ final case class IngestStreamWithControl[+Conf: Loggable]( settings: Conf, metrics: IngestMetrics, valve: () => Future[ValveSwitch], terminated: () => Future[Future[Done]], var close: () => Unit, var initialStatus: IngestStreamStatus, var optWs: Option[(Sink[Json, NotUsed], IngestMeter)] = None, var optWsV2: Option[DecodingHub] = None, )(implicit logConfig: LogConfig) extends LazySafeLogging { // Returns a simpler version of status. Only possible values are completed, failed, or running private def checkTerminated(implicit materializer: Materializer): Future[IngestStreamStatus] = { implicit val ec: ExecutionContext = materializer.executionContext terminated().map(term => term.value match { case Some(Success(Done)) => IngestStreamStatus.Completed case Some(Failure(e)) => // If exception occurs, it means that the ingest stream has failed logger.warn(log"Ingest stream failed: $settings" withException e) IngestStreamStatus.Failed case None => IngestStreamStatus.Running }, ) } private def pendingStatusFuture( valveSwitch: ValveSwitch, )(implicit materializer: Materializer): Future[IngestStreamStatus] = { /* Add a timeout to work around * * Race the actual call to `getMode` with a timeout action */ val theStatus = Promise[IngestStreamStatus]() theStatus.completeWith( valveSwitch .getMode() .map { case SwitchMode.Open => IngestStreamStatus.Running case SwitchMode.Close => // NB this may return an incorrect or outdated status due to thread-unsafe updates to initialStatus and // incomplete information about terminal states across restarts. See discussion and linked diagram on // QU-2003. initialStatus }(materializer.executionContext) .recover { case _: org.apache.pekko.stream.StreamDetachedException => IngestStreamStatus.Terminated }(materializer.executionContext), ) materializer.system.scheduler.scheduleOnce(1.second) { val _ = theStatus.trySuccess(IngestStreamStatus.Terminated) }(materializer.executionContext) theStatus.future } def status(implicit materializer: Materializer): Future[IngestStreamStatus] = { implicit val ec: ExecutionContext = materializer.executionContext val getPendingStatus: Future[IngestStreamStatus] = for { vs <- valve() status <- pendingStatusFuture(vs) } yield status val timeout = pattern.after(200.millis)(Future.successful(IngestStreamStatus.Running))(materializer.system) val getPendingStatusWithTimeout = Future.firstCompletedOf(Seq(getPendingStatus, timeout)) for { terminated <- checkTerminated result <- terminated match { case IngestStreamStatus.Completed => Future.successful(IngestStreamStatus.Completed) case IngestStreamStatus.Failed => Future.successful(IngestStreamStatus.Failed) case _ => getPendingStatusWithTimeout } } yield result } } object IngestStreamWithControl { def apply[Conf: Loggable]( conf: Conf, metrics: IngestMetrics, quineIngestSource: QuineIngestSource, initialStatus: IngestStreamStatus, )(implicit logConfig: LogConfig): IngestStreamWithControl[Conf] = IngestStreamWithControl( settings = conf, metrics = metrics, valve = () => quineIngestSource.getControl.map(_.valveHandle)(ExecutionContext.parasitic), terminated = () => quineIngestSource.getControl.map(_.termSignal)(ExecutionContext.parasitic), close = () => { quineIngestSource.getControl.flatMap(c => c.terminate())(ExecutionContext.parasitic) () // Intentional fire and forget }, initialStatus = initialStatus, ) } final case class IngestMetrics( startTime: Instant, private var completionTime: Option[Instant], private var meter: IngestMetered, ) { def stop(completedAt: Instant): Unit = { completionTime = Some(completedAt) meter = IngestMetered.freeze(meter) } def millisSinceStart(t: Instant): Long = MILLIS.between(startTime, t) private def meterToIngestRates(meter: Metered) = RatesSummary( meter.getCount, meter.getOneMinuteRate, meter.getFiveMinuteRate, meter.getFifteenMinuteRate, meter.getMeanRate, ) def toEndpointResponse: IngestStreamStats = IngestStreamStats( ingestedCount = meter.getCount, rates = meterToIngestRates(meter.counts), byteRates = meterToIngestRates(meter.bytes), startTime = startTime, totalRuntime = millisSinceStart(completionTime getOrElse Instant.now), ) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/routes/QueryUiConfigurationRoutesImpl.scala ================================================ package com.thatdot.quine.app.routes import org.apache.pekko.http.scaladsl.server.Directives._ import org.apache.pekko.http.scaladsl.server.Route import com.thatdot.quine.app.routes.exts.circe.JsonEntitiesFromSchemas import com.thatdot.quine.graph.BaseGraph import com.thatdot.quine.routes.QueryUiConfigurationRoutes trait QueryUiConfigurationRoutesImpl extends QueryUiConfigurationRoutes with JsonEntitiesFromSchemas with exts.PekkoQuineEndpoints { protected val quineApp: QueryUiConfigurationState val graph: BaseGraph def queryUiConfigurationRoutes: Route = queryUiSampleQueries.implementedByAsync(_ => graph.requiredGraphIsReadyFuture(quineApp.getSampleQueries)) ~ updateQueryUiSampleQueries.implementedByAsync(q => graph.requiredGraphIsReadyFuture(quineApp.setSampleQueries(q))) ~ queryUiQuickQueries.implementedByAsync(_ => graph.requiredGraphIsReadyFuture(quineApp.getQuickQueries)) ~ updateQueryUiQuickQueries.implementedByAsync(q => graph.requiredGraphIsReadyFuture(quineApp.setQuickQueries(q))) ~ queryUiAppearance.implementedByAsync(_ => graph.requiredGraphIsReadyFuture(quineApp.getNodeAppearances)) ~ updateQueryUiAppearance.implementedByAsync(q => graph.requiredGraphIsReadyFuture(quineApp.setNodeAppearances(q))) } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/routes/QueryUiConfigurationState.scala ================================================ package com.thatdot.quine.app.routes import scala.concurrent.Future import scala.io.{Codec, Source} import scala.util.Try import scala.util.matching.Regex import com.thatdot.quine.routes.{SampleQuery, UiNodeAppearance, UiNodeQuickQuery} object QueryUiConfigurationState { /** regex to match hex codepoints in several styles: * - c0de * - 샞 * - \c0de * - \uc0de * * plus a few minor variations thereof -- this is used to parse user input, so accepting * too many strings is better than accepting too few * * inv: if this matches, it will return exactly 1 capture group containing hex characters * * NB this will match an odd number of hex characters if provided */ val codepointRegex: Regex = raw"(?:\\|&#x|\\u)?([a-f0-9]+);?".r // map of full ion- icon name to rendered unicode icon val icons: Map[String, String] = Source .fromResource("ionicons.tsv")(Codec.UTF8) .getLines() .map(_.split("\t")) .collect { case Array(name, rendered) => (name -> rendered.trim) } .toMap /** Given a node appearance, return a copy of that appearance where the icon specified (if any) * is rendered to a unicode string. The icon may be specified by its ionicons v2 name or a * hex codepoint prefixed by either \\ or \\u, or hex-escaped as an HTML character * @example a node with icon = Some("cash") => an otherwise-identical node with icon = Some("") * @example a node with icon = Some("&#xF11F;") => an otherwise-identical node with icon = Some("") * @param node a node with an icon specification * @return a node with a rendered unicode icon */ def renderNodeIcons(node: UiNodeAppearance): UiNodeAppearance = node.copy( icon = node.icon match { case Some(namedWithPrefix) if namedWithPrefix.startsWith("ion") => icons.get(namedWithPrefix) case Some(named) if icons.contains("ion-" + named) => icons.get("ion-" + named) case Some(codepointRegex(codepointHex)) => Try(Integer.parseInt(codepointHex, 16).toChar.toString).toOption case other => other }, ) } trait QueryUiConfigurationState { def getSampleQueries: Future[Vector[SampleQuery]] def getQuickQueries: Future[Vector[UiNodeQuickQuery]] def getNodeAppearances: Future[Vector[UiNodeAppearance]] def setSampleQueries(newSampleQueries: Vector[SampleQuery]): Future[Unit] def setQuickQueries(newQuickQueries: Vector[UiNodeQuickQuery]): Future[Unit] def setNodeAppearances(newNodeAppearances: Vector[UiNodeAppearance]): Future[Unit] } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/routes/QueryUiCypherApiMethods.scala ================================================ package com.thatdot.quine.app.routes import scala.concurrent.Promise import scala.concurrent.duration.Duration import scala.util.matching.Regex import org.apache.pekko.NotUsed import org.apache.pekko.stream.scaladsl.Source import io.circe.Json import com.thatdot.common.logging.Log.{ LazySafeLogging, LogConfig, OnlySafeStringInterpolator, Safe, SafeLoggableInterpolator, } import com.thatdot.common.logging.Pretty.PrettyHelper import com.thatdot.common.quineid.QuineId import com.thatdot.quine.compiler.cypher import com.thatdot.quine.compiler.cypher.CypherProcedures import com.thatdot.quine.graph.cypher.quinepattern.{ OutputTarget, QueryContext => QPQueryContext, QueryPlanner, QuinePatternHelpers, RuntimeMode, } import com.thatdot.quine.graph.cypher.{ CypherException, Expr => CypherExpr, RunningCypherQuery => CypherRunningQuery, Type => CypherType, Value => CypherValue, } import com.thatdot.quine.graph.quinepattern.{LoadQuery, QuinePatternOpsGraph} import com.thatdot.quine.graph.{CypherOpsGraph, LiteralOpsGraph, NamespaceId, StandingQueryId} import com.thatdot.quine.language.{ast => Pattern} import com.thatdot.quine.model._ import com.thatdot.quine.routes._ import com.thatdot.quine.util.Log.implicits._ trait QueryUiCypherApiMethods extends LazySafeLogging { import QueryUiCypherApiMethods._ implicit def graph: LiteralOpsGraph with CypherOpsGraph implicit def idProvider: QuineIdProvider implicit protected def logConfig: LogConfig /** Compute the host of a quine ID */ def hostIndex(qid: QuineId): Int private def guessCypherParameters(params: Map[String, Json]): Map[String, CypherValue] = params.map { case (k, v) => k -> CypherExpr.fromQuineValue(QuineValue.fromJson(v)) } /** Post-process UI nodes. This serves as a hook for last minute modifications to the nodes sent out to the UI. * * @param uiNode UI node to modify * @return updated UI node */ protected def transformUiNode(uiNode: UiNode[QuineId]): UiNode[QuineId] /** Query nodes with a given Cypher query * * @note this filters out nodes whose IDs are not supported by the provider * * @param query Cypher query expected to return nodes * @param namespace Which namespace to query in. * @param atTime possibly historical time to query * @return tuple of nodes produced by the query, whether the query is read-only, and whether the query may cause full node scan */ final def queryCypherNodes( query: CypherQuery, namespace: NamespaceId, atTime: Option[Milliseconds], ): (Source[UiNode[QuineId], NotUsed], Boolean, Boolean) = { // QuinePattern branch - early return to keep original code unchanged below if (isQuinePatternEnabled) return quinePatternQueryNodes(query, namespace, atTime) val res: CypherRunningQuery = cypher.queryCypherValues( query.text, parameters = guessCypherParameters(query.parameters), namespace = namespace, atTime = atTime, ) val results = res.results .mapConcat(identity) // this function returns all columns from all rows as 1 sequence without any grouping .mapConcat[UiNode[QuineId]] { case CypherExpr.Node(qid, labels, properties) => val nodeLabel = if (labels.nonEmpty) { labels.map(_.name).mkString(":") } else { "ID: " + qid.pretty } Some( UiNode( id = qid, hostIndex = hostIndex(qid), label = nodeLabel, properties = properties.map { case (k, v) => (k.name, CypherValue.toJson(v)) }, ), ) case CypherExpr.Null => // node-typed values that are null are just ignored rather than generating an error, because they are easily // introduced with eg `OPTIONAL MATCH` None case other => // non-null, non-node values cannot be handled by the pre-UI post-query processing logic, so we need // to drop or error on them. Since the usage contract for this functionality is "I have a query that // returns nodes", we consider this case as bad user input and return an error. throw CypherException.TypeMismatch( expected = Seq(CypherType.Node), actualValue = other, context = "node query return value", ) } .map(transformUiNode) (results, res.compiled.isReadOnly, res.compiled.canContainAllNodeScan) } /** Query edges with a given Cypher query * * @note this filters out nodes whose IDs are not supported by the provider * * @param query Cypher query expected to return edges * @param namespace the namespace in which to run this query * @param atTime possibly historical time to query * @param requestTimeout timeout signalling output results no longer matter * @return tuple of edges produced by the query, readonly, and canContainAllNodeScan */ def queryCypherEdges( query: CypherQuery, namespace: NamespaceId, atTime: Option[Milliseconds], requestTimeout: Duration = Duration.Inf, ): (Source[UiEdge[QuineId], NotUsed], Boolean, Boolean) = { // QuinePattern branch - early return to keep original code unchanged below if (isQuinePatternEnabled) return quinePatternQueryEdges(query, namespace, atTime) val res: CypherRunningQuery = cypher.queryCypherValues( query.text, parameters = guessCypherParameters(query.parameters), namespace = namespace, atTime = atTime, ) val results = res.results .mapConcat(identity) // this function returns all columns from all rows as 1 sequence without any grouping .mapConcat[UiEdge[QuineId]] { case CypherExpr.Relationship(src, lbl, _, tgt) => Some(UiEdge(from = src, to = tgt, edgeType = lbl.name)) case CypherExpr.Null => None // possibly from OPTIONAL MATCH, see comments in [[queryCypherNodes]] case other => throw CypherException.TypeMismatch( expected = Seq(CypherType.Relationship), actualValue = other, context = "edge query return value", ) } (results, res.compiled.isReadOnly, res.compiled.canContainAllNodeScan) } /** Query anything with a given cypher query * * @note queries starting with `EXPLAIN` are intercepted (since they are * anyways not valid Cypher) and return one value which represents the * execution plan of the query without running the query. * * @param query Cypher query * @param namespace the namespace in which to run this query * @param atTime possibly historical time to query * @return tuple of: * - columns of the result * - rows of the result as a Source (each row is a sequence of JSON values whose length matches the * length of the columns) * - boolean isReadOnly * - boolean canContainAllNodeScan */ def queryCypherGeneric( query: CypherQuery, namespace: NamespaceId, atTime: Option[Milliseconds], ): (Seq[String], Source[Seq[Json], NotUsed], Boolean, Boolean) = { // QuinePattern branch - early return to keep original code unchanged below if (isQuinePatternEnabled) return quinePatternQueryGeneric(query, namespace, atTime) query.text match { case Explain(toExplain) => val compiledQuery = cypher .compile(queryText = toExplain, unfixedParameters = query.parameters.keys.toSeq) .query val plan = cypher.Plan.fromQuery( compiledQuery, ) logger.debug(safe"User requested EXPLAIN of query: $compiledQuery") (Vector("plan"), Source.single(Seq(CypherValue.toJson(plan.toValue))), true, false) // rewrite "SHOW PROCEDURES" to an equivalent `help.procedures` call, if possible case ShowProcedures(rewritten, warning) => warning.foreach(logger.warn(_)) queryCypherGeneric(CypherQuery(rewritten, query.parameters), namespace, atTime) // TODO add support for PROFILE statement case queryText => val runnableQuery = cypher.queryCypherValues( queryText, parameters = guessCypherParameters(query.parameters), namespace = namespace, atTime = atTime, ) val columns = runnableQuery.columns.map(_.name) val bodyRows = runnableQuery.results.map(row => row.map(CypherValue.toJson)) (columns, bodyRows, runnableQuery.compiled.isReadOnly, runnableQuery.compiled.canContainAllNodeScan) } } /** Shared helper that executes a QuinePattern query and returns the raw context stream plus planned metadata. * Each quinePatternQuery* method calls this, then applies its own result-mapping step. */ private def executeQuinePattern( query: CypherQuery, namespace: NamespaceId, atTime: Option[Milliseconds], ): (Source[QPQueryContext, NotUsed], QueryPlanner.PlannedQuery) = { requireQuinePatternEnabled() val parameters = toQuinePatternParameters(query.parameters) val qpGraph: QuinePatternOpsGraph = graph.asInstanceOf[QuinePatternOpsGraph] implicit val ec = qpGraph.system.dispatcher val planned = QueryPlanner.planFromString(query.text) match { case Right(p) => p case Left(error) => throw new IllegalArgumentException( s"Failed to parse query. QuinePattern does not support this query syntax: ${query.text.take(100)}: $error", ) } val promise = Promise[Seq[QPQueryContext]]() qpGraph.getLoader ! LoadQuery( standingQueryId = StandingQueryId.fresh(), queryPlan = planned.plan, mode = RuntimeMode.Eager, params = parameters, namespace = namespace, output = OutputTarget.EagerCollector(promise), returnColumns = planned.returnColumns, outputNameMapping = planned.outputNameMapping, atTime = atTime, ) val source = Source .futureSource(promise.future.map(results => Source(results))) .mapMaterializedValue(_ => NotUsed) (source, planned) } private[app] def quinePatternQueryNodes( query: CypherQuery, namespace: NamespaceId, atTime: Option[Milliseconds], ): (Source[UiNode[QuineId], NotUsed], Boolean, Boolean) = { logger.info(safe"Executing node query using QuinePattern interpreter: ${Safe(query.text.take(100))}") val (source, _) = executeQuinePattern(query, namespace, atTime) val results = source .mapConcat { qpCtx => qpCtx.bindings.values.flatMap { case Pattern.Value.Node(qid, labels, props) => val cypherProps = props.values.map { case (k, v) => k -> QuinePatternHelpers.patternValueToCypherValue(v) } val nodeLabel = if (labels.nonEmpty) labels.map(_.name).mkString(":") else "ID: " + qid.pretty Some( UiNode(qid, hostIndex(qid), nodeLabel, cypherProps.map { case (k, v) => (k.name, CypherValue.toJson(v)) }), ) case Pattern.Value.Null => None case _ => None }.toList } .map(transformUiNode) (results, false, true) } private[app] def quinePatternQueryEdges( query: CypherQuery, namespace: NamespaceId, atTime: Option[Milliseconds], ): (Source[UiEdge[QuineId], NotUsed], Boolean, Boolean) = { logger.info(safe"Executing edge query using QuinePattern interpreter: ${Safe(query.text.take(100))}") val (source, _) = executeQuinePattern(query, namespace, atTime) val results = source .mapConcat { qpCtx => qpCtx.bindings.values.flatMap { case v => val cypherVal = QuinePatternHelpers.patternValueToCypherValue(v) cypherVal match { case CypherExpr.Relationship(src, lbl, _, tgt) => Some(UiEdge(from = src, to = tgt, edgeType = lbl.name)) case _ => None } }.toList } (results, false, true) } private[app] def quinePatternQueryGeneric( query: CypherQuery, namespace: NamespaceId, atTime: Option[Milliseconds], ): (Seq[String], Source[Seq[Json], NotUsed], Boolean, Boolean) = { logger.info(safe"Executing query using QuinePattern interpreter: ${Safe(query.text.take(100))}") val (source, planned) = executeQuinePattern(query, namespace, atTime) val columnNames: Seq[String] = planned.outputNameMapping.values.map(_.name).toSeq // Build reverse mapping: human-readable column name -> BindingId val nameToBindingId: Map[String, com.thatdot.quine.language.ast.BindingId] = planned.outputNameMapping.map { case (bindingId, sym) => sym.name -> bindingId } val rowsSource = source.map { qpCtx => columnNames.map { col => val patternValue = nameToBindingId.get(col).flatMap(qpCtx.bindings.get).getOrElse(Pattern.Value.Null) val cypherValue = QuinePatternHelpers.patternValueToCypherValue(patternValue) CypherValue.toJson(cypherValue) } } (columnNames, rowsSource, false, true) } // Helper methods for QuinePattern support private def isQuinePatternEnabled: Boolean = sys.props.get("qp.enabled").flatMap(_.toBooleanOption).getOrElse(false) private def requireQuinePatternEnabled(): Unit = if (!isQuinePatternEnabled) { throw new IllegalStateException("QuinePattern requires -Dqp.enabled=true to be set") } private def toQuinePatternParameters(params: Map[String, Json]): Map[Symbol, com.thatdot.quine.language.ast.Value] = { import com.thatdot.quine.graph.cypher.quinepattern.CypherAndQuineHelpers.quineValueToPatternValue params.map { case (k, v) => Symbol(k) -> quineValueToPatternValue(QuineValue.fromJson(v)) } } } object QueryUiCypherApiMethods extends LazySafeLogging { // EXPLAIN (1 argument: query) private val Explain: Regex = raw"(?is)\s*explain\s+(.*)".r // SHOW PROCEDURES matcher. Matches return 2 values: a converted query using `help.procedures` and an optional // SafeInterpolator with a warning to log back to the user private object ShowProcedures { private val cypherProceduresInvocation = s"CALL ${CypherProcedures.name}()" // see https://regex101.com/r/CwK80x/1 // SHOW PROCEDURES [executable-by filter] [query suffix] (2 arguments). // The first argument is unsupported and used only for warnings. // The second is usable in-place on the procedure call. private val ShowProceduresStatement = raw"(?is)(?:\h*)show\h+procedures?\h*(executable(?: by \S+)?)?\h*(.*)".r def unapply(s: String): Option[(String, Option[OnlySafeStringInterpolator])] = s match { case ShowProceduresStatement(ignoredArgs, querySuffix) => val rewritten = s"$cypherProceduresInvocation $querySuffix".trim val warning = Option(ignoredArgs).filter(_.nonEmpty).map { args => safe"Ignoring unsupported arguments to SHOW PROCEDURES: `${Safe(args)}`" } Some(rewritten -> warning) case _ => None } } } class OSSQueryUiCypherMethods(quineGraph: LiteralOpsGraph with CypherOpsGraph)(implicit protected val logConfig: LogConfig, ) extends QueryUiCypherApiMethods() { def hostIndex(qid: com.thatdot.common.quineid.QuineId): Int = 0 override def idProvider: QuineIdProvider = graph.idProvider def transformUiNode(uiNode: com.thatdot.quine.routes.UiNode[com.thatdot.common.quineid.QuineId]) = uiNode override def graph = quineGraph } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/routes/QueryUiRoutesImpl.scala ================================================ package com.thatdot.quine.app.routes import scala.concurrent.{ExecutionContext, Future} import scala.jdk.CollectionConverters._ import scala.reflect.ClassTag import scala.util.{Failure, Success, Try} import org.apache.pekko.NotUsed import org.apache.pekko.http.scaladsl.server.Directives._ import org.apache.pekko.http.scaladsl.server.Route import org.apache.pekko.stream.Materializer import org.apache.pekko.stream.scaladsl.{Sink, Source} import org.apache.pekko.util.Timeout import io.circe.Json import com.thatdot.common.logging.Log.LazySafeLogging import com.thatdot.common.logging.Pretty.PrettyHelper import com.thatdot.common.quineid.QuineId import com.thatdot.quine.graph.cypher.CypherException import com.thatdot.quine.graph.{CypherOpsGraph, LiteralOpsGraph, NamespaceId} import com.thatdot.quine.gremlin._ import com.thatdot.quine.model._ import com.thatdot.quine.routes.{CypherQueryResult, GremlinQuery, QueryUiRoutes, UiEdge, UiNode} trait QueryUiRoutesImpl extends QueryUiRoutes with exts.PekkoQuineEndpoints with QueryUiCypherApiMethods with endpoints4s.pekkohttp.server.Endpoints with exts.circe.JsonEntitiesFromSchemas with exts.ServerRequestTimeoutOps with LazySafeLogging { val gremlin: GremlinQueryRunner implicit def graph: LiteralOpsGraph with CypherOpsGraph implicit def idProvider: QuineIdProvider implicit def timeout: Timeout implicit def materializer: Materializer private[this] lazy val idProv = idProvider private[this] lazy val CustomIdTypeClassTag: ClassTag[idProv.CustomIdType] = idProv.customIdTag /** Compute the host of a quine ID */ def hostIndex(qid: QuineId): Int = 0 // This is how Gremlin values will be formatted as JSON // NB: this is tuned to consume values coming out of the Gremlin interpreter private def writeGremlinValue(any: Any): Json = any match { // Null value case null | () => Json.Null // Option case None => Json.Null case Some(x) => writeGremlinValue(x) // Numbers case n: Byte => Json.fromInt(n.intValue) case n: Int => Json.fromInt(n) case n: Long => Json.fromLong(n) case n: Float => Json.fromFloatOrString(n) case n: Double => Json.fromDoubleOrString(n) case n: java.lang.Long => Json.fromLong(n) case n: java.lang.Double => Json.fromDoubleOrString(n) // Strings case s: String => Json.fromString(s) // Booleans case b: Boolean => Json.fromBoolean(b) case b: java.lang.Boolean => Json.fromBoolean(b) // Lists case l: java.util.List[_] => writeGremlinValue(l.asScala) case l: List[_] => Json.fromValues(l.map(writeGremlinValue)) case a: Array[_] => Json.fromValues(a.map(writeGremlinValue)) case a: Vector[_] => Json.fromValues(a.map(writeGremlinValue)) // Maps case m: java.util.Map[_, _] => writeGremlinValue(m.asScala) case m: Map[_, _] => Json.fromFields(m map { case (k, v) => (k.toString, writeGremlinValue(v)) }) // Vertex and edges case Vertex(qid) => Json.fromString(s"Vertex($qid)") case Edge(src, lbl, tgt) => Json.fromString(s"Edge($src, ${lbl.name}, $tgt)") // Custom id type case CustomIdTypeClassTag(a) => Json.fromString(idProv.customIdToString(a)) // Other: Any custom 'toString' case o => Json.fromString(o.toString) } private def guessGremlinParameters(params: Map[String, Json]): Map[Symbol, QuineValue] = params.map { case (k, v) => Symbol(k) -> QuineValue.fromJson(v) } /** Given a [[QuineId]], query out a [[UiNode]] * * @note this is not used by Cypher because those nodes already have the needed information! * @param id ID of the node * @param namespace the namespace in which to run this query * @param atTime possibly historical time to query * @return representation of the node for the UI */ private def queryUiNode( id: QuineId, namespace: NamespaceId, atTime: AtTime, ): Future[UiNode[QuineId]] = graph .literalOps(namespace) .getPropsAndLabels(id, atTime) .map { case (props, labels) => val parsedProperties = props.map { case (propKey, pickledValue) => val unpickledValue = pickledValue.deserialized.fold[Any]( _ => pickledValue.serialized, _.underlyingJvmValue, ) propKey.name -> writeGremlinValue(unpickledValue) } val nodeLabel = if (labels.exists(_.nonEmpty)) { labels.get.map(_.name).mkString(":") } else { "ID: " + id.pretty } UiNode( id = id, hostIndex = hostIndex(id), label = nodeLabel, properties = parsedProperties, ) }(graph.shardDispatcherEC) /** Post-process UI nodes. This serves as a hook for last minute modifications to the nodes sen * out to the UI. * * @param uiNode UI node to modify * @return updated UI node */ protected def transformUiNode(uiNode: UiNode[QuineId]): UiNode[QuineId] = uiNode /** Query nodes with a given gremlin query * * @note this filters out nodes whose IDs are not supported by the provider * @param query Gremlin query expected to return nodes * @param namespace the namespace in which to run this query * @param atTime possibly historical time to query * @return nodes produced by the query */ final def queryGremlinNodes( query: GremlinQuery, namespace: NamespaceId, atTime: AtTime, ): Source[UiNode[QuineId], NotUsed] = gremlin .queryExpecting[Vertex]( query.text, guessGremlinParameters(query.parameters), namespace, atTime, ) .mapAsync(parallelism = 4)((vertex: Vertex) => queryUiNode(vertex.id, namespace, atTime)) .map(transformUiNode) /** Query edges with a given gremlin query * * @note this filters out nodes whose IDs are not supported by the provider * @param query Gremlin query expected to return edges * @param namespace the namespace in which to run this query * @param atTime possibly historical time to query * @return edges produced by the query */ final def queryGremlinEdges( query: GremlinQuery, namespace: NamespaceId, atTime: AtTime, ): Source[UiEdge[QuineId], NotUsed] = gremlin .queryExpecting[Edge]( query.text, guessGremlinParameters(query.parameters), namespace, atTime, ) .map { case Edge(src, lbl, tgt) => UiEdge(from = src, to = tgt, edgeType = lbl.name) } /** Query anything with a given Gremlin query * * @param query Gremlin query * @param namespace the namespace in which to run this query * @param atTime possibly historical time to query * @return data produced by the query formatted as JSON */ final def queryGremlinGeneric(query: GremlinQuery, namespace: NamespaceId, atTime: AtTime): Source[Json, NotUsed] = gremlin .query(query.text, guessGremlinParameters(query.parameters), namespace, atTime) .map[Json](writeGremlinValue) // This could be made more general, but the dependency on ClientErrors makes it get "stuck in the cake" here and some // other route implementation traits that share similar private methods. final private def ifNamespaceFound[A](namespaceId: NamespaceId)( ifFound: => Future[Either[ClientErrors, A]], ): Future[Either[ClientErrors, Option[A]]] = if (!graph.getNamespaces.contains(namespaceId)) Future.successful(Right(None)) else ifFound.map(_.map(Some(_)))(ExecutionContext.parasitic) // The Query UI relies heavily on a couple Gremlin endpoints for making queries. final val gremlinApiRoute: Route = { def catchGremlinException[A](futA: => Future[A]): Future[Either[ClientErrors, A]] = Future .fromTry(Try(futA)) .flatten .transform { case Success(a) => Success(Right(a)) case Failure(qge: QuineGremlinException) => Success(Left(endpoints4s.Invalid(qge.toString))) case Failure(err) => Failure(err) }(graph.shardDispatcherEC) gremlinPost.implementedByAsyncWithRequestTimeout(_._2) { case ((atTime, _, namespaceParam, query), t) => graph.requiredGraphIsReadyFuture { val ns = namespaceFromParam(namespaceParam) ifNamespaceFound(ns)(catchGremlinException { queryGremlinGeneric(query, ns, atTime) .via(Util.completionTimeoutOpt(t)) .named(s"gremlin-query-atTime-${atTime.fold("none")(_.millis.toString)}") .runWith(Sink.seq) }) } } ~ gremlinNodesPost.implementedByAsyncWithRequestTimeout(_._2) { case ((atTime, _, namespaceParam, query), t) => graph.requiredGraphIsReadyFuture { val ns = namespaceFromParam(namespaceParam) ifNamespaceFound(ns)(catchGremlinException { queryGremlinNodes(query, ns, atTime) .via(Util.completionTimeoutOpt(t)) .named(s"gremlin-node-query-atTime-${atTime.fold("none")(_.millis.toString)}") .runWith(Sink.seq) }) } } ~ gremlinEdgesPost.implementedByAsyncWithRequestTimeout(_._2) { case ((atTime, _, namespaceParam, query), t) => graph.requiredGraphIsReadyFuture { val ns = namespaceFromParam(namespaceParam) ifNamespaceFound(ns)(catchGremlinException { queryGremlinEdges(query, ns, atTime) .via(Util.completionTimeoutOpt(t)) .named(s"gremlin-edge-query-atTime-${atTime.fold("none")(_.millis.toString)}") .runWith(Sink.seq) }) } } } // The Query UI relies heavily on a couple Cypher endpoints for making queries. final val cypherApiRoute: Route = { def catchCypherException[A](futA: => Future[A]): Future[Either[ClientErrors, A]] = Future .fromTry(Try(futA)) .flatten .transform { case Success(a) => Success(Right(a)) case Failure(qce: CypherException) => Success(Left(endpoints4s.Invalid(qce.pretty))) case Failure(err) => Failure(err) }(ExecutionContext.parasitic) cypherPost.implementedByAsyncWithRequestTimeout(_._2) { case ((atTime, _, namespaceParam, query), t) => graph.requiredGraphIsReadyFuture { val ns = namespaceFromParam(namespaceParam) ifNamespaceFound(ns)(catchCypherException { val (columns, results, isReadOnly, _) = queryCypherGeneric(query, ns, atTime) // TODO read canContainAllNodeScan results .via(Util.completionTimeoutOpt(t, allowTimeout = isReadOnly)) .named(s"cypher-query-atTime-${atTime.fold("none")(_.millis.toString)}") .runWith(Sink.seq) .map(CypherQueryResult(columns, _))(ExecutionContext.parasitic) }) } } ~ cypherNodesPost.implementedByAsyncWithRequestTimeout(_._2) { case ((atTime, _, namespaceParam, query), t) => graph.requiredGraphIsReadyFuture { val ns = namespaceFromParam(namespaceParam) ifNamespaceFound(ns)(catchCypherException { val (results, isReadOnly, _) = queryCypherNodes(query, ns, atTime) // TODO read canContainAllNodeScan results .via(Util.completionTimeoutOpt(t, allowTimeout = isReadOnly)) .named(s"cypher-nodes-query-atTime-${atTime.fold("none")(_.millis.toString)}") .runWith(Sink.seq) }) } } ~ cypherEdgesPost.implementedByAsyncWithRequestTimeout(_._2) { case ((atTime, _, namespaceParam, query), t) => graph.requiredGraphIsReadyFuture { val ns = namespaceFromParam(namespaceParam) ifNamespaceFound(ns)(catchCypherException { val (results, isReadOnly, _) = queryCypherEdges(query, ns, atTime) // TODO read canContainAllNodeScan results .via(Util.completionTimeoutOpt(t, allowTimeout = isReadOnly)) .named(s"cypher-edges-query-atTime-${atTime.fold("none")(_.millis.toString)}") .runWith(Sink.seq) }) } } } final val queryUiRoutes: Route = { gremlinApiRoute ~ cypherApiRoute } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/routes/QuineAppOpenApiDocs.scala ================================================ package com.thatdot.quine.app.routes import java.net.URL import org.apache.pekko.http.scaladsl.server.Route import endpoints4s.openapi.model._ import com.thatdot.common.logging.Log.LogConfig import com.thatdot.quine.app.BuildInfo import com.thatdot.quine.app.config.QuineConfig import com.thatdot.quine.app.util.OpenApiRenderer import com.thatdot.quine.graph.BaseGraph import com.thatdot.quine.model.QuineIdProvider import com.thatdot.quine.routes._ /** The OpenAPI docs for our API * * @param idProvider the Quine ID provider (relevant for serialization of IDs and examples) */ final class QuineAppOpenApiDocs(val idProvider: QuineIdProvider)(implicit protected val logConfig: LogConfig) extends DebugOpsRoutes with AlgorithmRoutes with AdministrationRoutes with QueryUiRoutes with QueryUiConfigurationRoutes with IngestRoutes with StandingQueryRoutes with endpoints4s.openapi.Endpoints with endpoints4s.openapi.JsonEntitiesFromSchemas with com.thatdot.quine.app.routes.exts.ServerQuineEndpoints with com.thatdot.quine.routes.exts.OpenApiEntitiesWithExamples with com.thatdot.quine.routes.exts.OpenApiAnySchema { private[this] val endpoints = List( buildInfo, config(QuineConfig().loadedConfigJson), readinessProbe, livenessProbe, metrics, shutdown, shardSizes, requestNodeSleep, graphHashCode, debugOpsGet, debugOpsVerbose, debugOpsEdgesGet, debugOpsHalfEdgesGet, debugOpsPropertyGet, // non-readonly debugOps (intentionally left registered but undocumented, QU-1045: // debugOpsPost, // debugOpsDelete, // debugOpsEdgesPut, // debugOpsEdgeDelete, // debugOpsPropertyPut, // debugOpsPropertyDelete, algorithmSaveRandomWalks, algorithmRandomWalk, cypherPost, cypherNodesPost, cypherEdgesPost, gremlinPost, gremlinNodesPost, gremlinEdgesPost, queryUiSampleQueries, updateQueryUiSampleQueries, queryUiQuickQueries, updateQueryUiQuickQueries, queryUiAppearance, updateQueryUiAppearance, updateQueryUiAppearance, ingestStreamList, ingestStreamStart, ingestStreamStop, ingestStreamLookup, ingestStreamPause, ingestStreamUnpause, standingList, standingIssue, standingAddOut, standingRemoveOut, standingCancel, standingGet, standingList, standingPropagate, ) val api: OpenApi = openApi( Info(title = "Quine API", version = BuildInfo.version).withDescription( Some( """The following is autogenerated from the OpenAPI specification [`openapi.json`]({{openapi_url}}) |and is included in Quine as fully interactive documentation. When running |Quine, you can issue API calls directly from the embedded documentation pages. | |For docs, guides, and tutorials, please visit """.stripMargin, ), ), )( endpoints: _*, ) } /** The Pekko HTTP implementation of routes serving up the OpenAPI specification * of our API * * @param graph the Quine graph */ final case class QuineAppOpenApiDocsRoutes(graph: BaseGraph, url: URL)(implicit protected val logConfig: LogConfig) extends endpoints4s.pekkohttp.server.Endpoints with endpoints4s.pekkohttp.server.JsonEntitiesFromEncodersAndDecoders { private val relativePathsApi = new QuineAppOpenApiDocs(graph.idProvider).api private val absolutePathsApi = relativePathsApi.withServers(Seq(Server(url.toString))) val route: Route = { val docEndpoint = endpoint( get( path / "docs" / "openapi.json" /? qs[Option[Boolean]]( "relative", Some("Whether to use relative paths in the rendered API spec. Defaults to false."), ), ), ok( jsonResponse[endpoints4s.openapi.model.OpenApi]( OpenApiRenderer(isEnterprise = false).stringEncoder, ), ), ) docEndpoint.implementedBy { //noinspection MatchToPartialFunction relative => relative match { case Some(true) => relativePathsApi case _ => absolutePathsApi } } } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/routes/QuineAppRoutes.scala ================================================ package com.thatdot.quine.app.routes import java.net.URL import scala.concurrent.ExecutionContext import scala.util.{Failure, Success, Try} import org.apache.pekko.actor.ActorSystem import org.apache.pekko.http.scaladsl.model.{HttpEntity, StatusCodes} import org.apache.pekko.http.scaladsl.server.Directives._ import org.apache.pekko.http.scaladsl.server.{Directives, Route} import org.apache.pekko.util.Timeout import org.webjars.WebJarAssetLocator import com.thatdot.common.logging.Log.{LazySafeLogging, LogConfig, Safe, SafeLoggableInterpolator} import com.thatdot.common.quineid.QuineId import com.thatdot.quine.app.config.BaseConfig import com.thatdot.quine.app.routes.websocketquinepattern.WebSocketQuinePatternServer import com.thatdot.quine.app.v2api.{OssApiMethods, V2OssRoutes} import com.thatdot.quine.app.{BaseApp, BuildInfo, QuineApp} import com.thatdot.quine.graph._ import com.thatdot.quine.gremlin.GremlinQueryRunner /** Main webserver routes for Quine * * This is responsible for serving up the REST API as well as static resources. * * @param graph underlying graph * @param quineApp quine application state * @param config current application config * @param uri The url from which these routes will be served (used for docs generation) * @param timeout timeout */ class QuineAppRoutes( val graph: LiteralOpsGraph with AlgorithmGraph with CypherOpsGraph with StandingQueryOpsGraph, val quineApp: BaseApp with AdministrationRoutesState with QueryUiConfigurationState with StandingQueryStoreV1 with IngestStreamState, val config: BaseConfig, val uri: URL, val timeout: Timeout, )(implicit val ec: ExecutionContext, protected val logConfig: LogConfig) extends BaseAppRoutes with QueryUiRoutesImpl with WebSocketQueryProtocolServer with QueryUiConfigurationRoutesImpl with DebugRoutesImpl with AlgorithmRoutesImpl with AdministrationRoutesImpl with IngestRoutesImpl with StandingQueryRoutesV1Impl with exts.ServerEntitiesWithExamples with com.thatdot.quine.routes.exts.CirceJsonAnySchema with LazySafeLogging { // //override val app: BaseApp with StandingQueryStore with IngestStreamState = ??? implicit val system: ActorSystem = graph.system val currentConfig = config.loadedConfigJson private val webSocketQuinePatternServer = new WebSocketQuinePatternServer(system) val version = BuildInfo.version val gremlin: GremlinQueryRunner = GremlinQueryRunner(graph)(timeout) val webJarAssetLocator = new WebJarAssetLocator() override def hostIndex(qid: QuineId): Int = 0 override def namespaceExists(namespace: String): Boolean = graph.getNamespaces.contains(namespaceFromString(namespace)) lazy val staticFilesRoute: Route = { Directives.pathEndOrSingleSlash { getFromResource("web/quine-ui.html") } ~ Directives.path("dashboard" | "docs" | "v2docs") { getFromResource("web/quine-ui.html") } ~ Directives.path("quine-ui-startup.js") { getJsWithInjectedConfig("web/quine-ui-startup.js", config.defaultApiVersion == "v2") } ~ Directives.path("browserconfig.xml") { getFromResource("web/browserconfig.xml") } ~ Directives.path("favicon.svg") { redirect("favicon.ico", StatusCodes.PermanentRedirect) } ~ Directives.path("favicon.ico") { getFromResource("web/favicon.ico") } ~ Directives.path("apple-touch-icon.png") { getFromResource("web/apple-touch-icon.png") } ~ Directives.path("favicon-32x32.png") { getFromResource("web/favicon-32x32.png") } ~ Directives.path("favicon-16x16.png") { getFromResource("web/favicon-16x16.png") } ~ Directives.path("site.webmanifest") { getFromResource("web/site.webmanifest") } ~ Directives.path("safari-pinned-tab.svg") { getFromResource("web/safari-pinned-tab.svg") } ~ Directives.extractUnmatchedPath { path => Try(webJarAssetLocator.getFullPath(path.toString)) match { case Success(fullPath) => getFromResource(fullPath) case Failure(_: IllegalArgumentException) => reject case Failure(err) => failWith(err) } } } /** OpenAPI route */ lazy val openApiRoute: Route = QuineAppOpenApiDocsRoutes(graph, uri).route private val namespacesUnsupportedRoute = parameter("namespace")(_ => complete(StatusCodes.BadRequest, HttpEntity("Namespaces not supported"))) /** Rest API route */ lazy val apiRoute: Route = { val enableLanguageServerRoute: Boolean = sys.props.get("ls.enabled").flatMap(_.toBooleanOption).getOrElse(false) val v1Routes = { namespacesUnsupportedRoute ~ queryUiRoutes ~ queryProtocolWS ~ (if (enableLanguageServerRoute) webSocketQuinePatternServer.languageServerWebsocketRoute else reject) ~ queryUiConfigurationRoutes ~ debugRoutes ~ algorithmRoutes ~ administrationRoutes ~ ingestRoutes ~ standingQueryRoutes } // Always serve both V1 and V2 routes val v2Route = new V2OssRoutes( new OssApiMethods(graph.asInstanceOf[GraphService], quineApp.asInstanceOf[QuineApp], config, timeout), ).v2Routes(ingestOnly = false) logger.info(safe"API V1 and V2 endpoints available (UI default: ${Safe(config.defaultApiVersion)})") v1Routes ~ v2Route } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/routes/StandingQueryInterfaceV2.scala ================================================ package com.thatdot.quine.app.routes import scala.concurrent.Future import com.thatdot.quine.app.v2api.definitions.query.{standing => V2ApiStanding} import com.thatdot.quine.graph.{NamespaceId, StandingQueryId} trait StandingQueryInterfaceV2 { def addStandingQueryV2( queryName: String, inNamespace: NamespaceId, standingQueryDefinition: V2ApiStanding.StandingQuery.StandingQueryDefinition, ): Future[StandingQueryInterfaceV2.Result] def cancelStandingQueryV2( queryName: String, inNamespace: NamespaceId, ): Future[Option[V2ApiStanding.StandingQuery.RegisteredStandingQuery]] def addStandingQueryOutputV2( queryName: String, outputName: String, inNamespace: NamespaceId, standingQueryResultWorkflow: V2ApiStanding.StandingQueryResultWorkflow, ): Future[StandingQueryInterfaceV2.Result] def removeStandingQueryOutputV2( queryName: String, outputName: String, inNamespace: NamespaceId, ): Future[Option[V2ApiStanding.StandingQueryResultWorkflow]] def getStandingQueriesV2(inNamespace: NamespaceId): Future[List[V2ApiStanding.StandingQuery.RegisteredStandingQuery]] def getStandingQueryV2( queryName: String, inNamespace: NamespaceId, ): Future[Option[V2ApiStanding.StandingQuery.RegisteredStandingQuery]] def getStandingQueryIdV2(queryName: String, inNamespace: NamespaceId): Option[StandingQueryId] } object StandingQueryInterfaceV2 { sealed trait Result object Result { case object Success extends Result case class AlreadyExists(name: String) extends Result case class NotFound(name: String) extends Result } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/routes/StandingQueryRoutesV1Impl.scala ================================================ package com.thatdot.quine.app.routes import scala.concurrent.{ExecutionContext, Future} import org.apache.pekko.http.scaladsl.model.sse.ServerSentEvent import org.apache.pekko.http.scaladsl.model.ws import org.apache.pekko.http.scaladsl.server.Directives._ import org.apache.pekko.http.scaladsl.server.{Route, ValidationRejection} import org.apache.pekko.stream.scaladsl.{Flow, Sink} import org.apache.pekko.stream.{Materializer, OverflowStrategy} import org.apache.pekko.util.Timeout import cats.data.NonEmptyList import endpoints4s.{Invalid, Valid} import com.thatdot.common.logging.Log.LogConfig import com.thatdot.quine.app.model.ingest.util.KafkaSettingsValidator import com.thatdot.quine.app.model.ingest.util.KafkaSettingsValidator.ErrorString import com.thatdot.quine.exceptions.NamespaceNotFoundException import com.thatdot.quine.graph.cypher.CypherException import com.thatdot.quine.graph.{ InvalidQueryPattern, StandingQueryOpsGraph, StandingQueryResult, StandingQueryResultStructure, } import com.thatdot.quine.{routes => V1} /** The Pekko HTTP implementation of [[V1.StandingQueryRoutes]] */ trait StandingQueryRoutesV1Impl extends V1.StandingQueryRoutes with endpoints4s.circe.JsonSchemas with com.thatdot.quine.app.routes.exts.PekkoQuineEndpoints with com.thatdot.quine.app.routes.exts.circe.JsonEntitiesFromSchemas { implicit def graph: StandingQueryOpsGraph implicit def timeout: Timeout implicit def materializer: Materializer implicit protected def logConfig: LogConfig def quineApp: StandingQueryStoreV1 private def validateOutputDef(outputDef: V1.StandingQueryResultOutputUserDef): Option[NonEmptyList[ErrorString]] = outputDef match { case k: V1.StandingQueryResultOutputUserDef.WriteToKafka => KafkaSettingsValidator.validateProperties(k.kafkaProperties) case _ => None } private val standingIssueRoute = standingIssue.implementedByAsync { case (name, namespaceParam, query) => graph.requiredGraphIsReadyFuture { try quineApp .addStandingQuery(name, namespaceFromParam(namespaceParam), query) .map { case false => Left(endpoints4s.Invalid(s"There is already a standing query named '$name'")) case true => Right(Some(())) }(graph.nodeDispatcherEC) .recoverWith { case _: NamespaceNotFoundException => Future.successful(Right(None)) }(graph.nodeDispatcherEC) catch { case iqp: InvalidQueryPattern => Future.successful(Left(endpoints4s.Invalid(iqp.message))) case cypherException: CypherException => Future.successful(Left(endpoints4s.Invalid(cypherException.pretty))) } } } private val standingRemoveOutRoute = standingRemoveOut.implementedByAsync { case (name, outputName, namespaceParam) => graph.requiredGraphIsReadyFuture { quineApp.removeStandingQueryOutput(name, outputName, namespaceFromParam(namespaceParam)) } } private val standingCancelRoute = standingCancel.implementedByAsync { case (name: String, namespaceParam) => graph.requiredGraphIsReadyFuture { quineApp.cancelStandingQuery(name, namespaceFromParam(namespaceParam)) } } private val standingGetRoute = standingGet.implementedByAsync { case (queryName, namespaceParam) => graph.requiredGraphIsReadyFuture { quineApp.getStandingQuery(queryName, namespaceFromParam(namespaceParam)) } } private val standingAddOutRoute = standingAddOut.implementedByAsync { case (name, outputName, namespaceParam, sqResultOutput) => graph.requiredGraphIsReadyFuture { validateOutputDef(sqResultOutput) match { case Some(errors) => Future.successful( Some(Left(Invalid(s"Cannot create output `$outputName`: ${errors.toList.mkString(",")}"))), ) case None => quineApp .addStandingQueryOutput(name, outputName, namespaceFromParam(namespaceParam), sqResultOutput) .map { _.map { case false => Left(endpoints4s.Invalid(s"There is already a standing query output named '$outputName'")) case true => Right(()) } }(graph.shardDispatcherEC) } } } private val standingGetWebsocketRoute = (standing / standingName /? namespace).directive { case Valid((name, namespaceParam)) => quineApp .getStandingQueryId(name, namespaceFromParam(namespaceParam)) .flatMap(sqid => graph .standingQueries(namespaceFromParam(namespaceParam)) // Silently ignores SQs in any absent namespace, returning `None` .flatMap((sq: StandingQueryOpsGraph#NamespaceStandingQueries) => sq.standingResultsHub(sqid)), ) match { case None => reject(ValidationRejection("No Standing Query with the provided name was found")) case Some(source) => handleWebSocketMessages( Flow .fromSinkAndSource( Sink.ignore, source .buffer(size = 128, overflowStrategy = OverflowStrategy.dropHead) // todo: Verify this is the correct behavior and it shouldn't depend on some configuration option somewhere .map((r: StandingQueryResult) => ws.TextMessage(r.toJson(StandingQueryResultStructure.WithMetaData()).noSpaces), ), ) .named(s"sq-results-websocket-for-$name"), ) } case Invalid(nameValidationErrors) => // ValidationRejection is a safe "semantics violated" rejection -- but this case should not be reachable anyway reject(nameValidationErrors.map(ValidationRejection(_)): _*) } private val standingGetResultsRoute: Route = (standing / standingName / "results" /? namespace).directive { case Valid((name, namespaceParam)) => quineApp .getStandingQueryId(name, namespaceFromParam(namespaceParam)) .flatMap(sqid => // Silently ignores any SQs in an absent namespace, returning `None` graph.standingQueries(namespaceFromParam(namespaceParam)).flatMap(_.standingResultsHub(sqid)), ) match { case None => reject(ValidationRejection("No Standing Query with the provided name was found")) case Some(source) => Util.sseRoute( source .map(sqResult => ServerSentEvent( // todo: Verify this is the correct behavior and it shouldn't depend on some configuration option somewhere data = sqResult.toJson(StandingQueryResultStructure.WithMetaData()).noSpaces, eventType = Some(if (sqResult.meta.isPositiveMatch) "result" else "cancellation"), id = Some(sqResult.dataHashCode.toString), ), ), ) } case Invalid(nameValidationErrors) => // ValidationRejection is a safe "semantics violated" rejection -- but this case should not be reachable anyway reject(nameValidationErrors.map(ValidationRejection(_)): _*) } private val standingListRoute = standingList.implementedByAsync { namespaceParam => graph.requiredGraphIsReadyFuture { quineApp.getStandingQueries(namespaceFromParam(namespaceParam)) } } private val standingPropagateRoute = standingPropagate.implementedByAsync { case (wakeUpNodes, par, namespaceParam) => graph.requiredGraphIsReadyFuture { graph .standingQueries(namespaceFromParam(namespaceParam)) .fold(Future.successful[Option[Unit]](None)) { _.propagateStandingQueries(Some(par).filter(_ => wakeUpNodes)).map(_ => Some(()))(ExecutionContext.parasitic) } } } final val standingQueryRoutes: Route = { standingIssueRoute ~ standingAddOutRoute ~ standingRemoveOutRoute ~ standingCancelRoute ~ standingGetWebsocketRoute ~ standingGetResultsRoute ~ standingGetRoute ~ standingListRoute ~ standingPropagateRoute } } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/routes/StandingQueryStoreV1.scala ================================================ package com.thatdot.quine.app.routes import scala.concurrent.Future import com.thatdot.quine.graph.{NamespaceId, StandingQueryId} import com.thatdot.quine.routes.{RegisteredStandingQuery, StandingQueryDefinition, StandingQueryResultOutputUserDef} trait StandingQueryStoreV1 { def addStandingQuery(queryName: String, inNamespace: NamespaceId, query: StandingQueryDefinition): Future[Boolean] def cancelStandingQuery(queryName: String, inNamespace: NamespaceId): Future[Option[RegisteredStandingQuery]] def addStandingQueryOutput( queryName: String, outputName: String, inNamespace: NamespaceId, sqResultOutput: StandingQueryResultOutputUserDef, ): Future[Option[Boolean]] def removeStandingQueryOutput( queryName: String, outputName: String, inNamespace: NamespaceId, ): Future[Option[StandingQueryResultOutputUserDef]] def getStandingQueries(inNamespace: NamespaceId): Future[List[RegisteredStandingQuery]] def getStandingQuery(queryName: String, inNamespace: NamespaceId): Future[Option[RegisteredStandingQuery]] def getStandingQueryId(queryName: String, inNamespace: NamespaceId): Option[StandingQueryId] } ================================================ FILE: quine/src/main/scala/com/thatdot/quine/app/routes/Util.scala ================================================ package com.thatdot.quine.app.routes import scala.annotation.unused import scala.concurrent.duration.{Duration, DurationInt, FiniteDuration} import org.apache.pekko.NotUsed import org.apache.pekko.http.scaladsl.model.HttpHeader import org.apache.pekko.http.scaladsl.model.headers.{CacheDirectives, RawHeader, `Cache-Control`} import org.apache.pekko.http.scaladsl.model.sse.ServerSentEvent import org.apache.pekko.http.scaladsl.server import org.apache.pekko.http.scaladsl.server.Directives.{complete, respondWithHeader, respondWithHeaders} import org.apache.pekko.stream.scaladsl.{Flow, Source} object Util { /** Given a stream of ServerSentEvents, produce a pekko-http Route to stream results from behind * a reverse proxy (assuming the proxy allows for long-running http/1.1 connections and respects * cache headers + X-Accel-Buffering) * @see https://serverfault.com/questions/801628/for-server-sent-events-sse-what-nginx-proxy-configuration-is-appropriate * @param events the serversentevents stream to lift to a pekko route * @return the constructed route */ def sseRoute(events: Source[ServerSentEvent, NotUsed]): server.Route = respondWithHeaders( `Cache-Control`(CacheDirectives.`no-cache`), RawHeader("X-Accel-Buffering", "no"), ) { // reverse proxy friendly headers // this implicit allows marshalling a Source[ServerSentEvent] to an SSE endpoint import org.apache.pekko.http.scaladsl.marshalling.sse.EventStreamMarshalling.toEventStream complete { events // promptly reply with _something_, so the client event stream can be opened .prepend(Source.single(ServerSentEvent.heartbeat)) // pekko defaults to 20sec, firefox's default http request timeout is 15sec // most importantly, this keeps reverse proxies from dropping the keepalive connection over http/1.1 .keepAlive(10.seconds, () => ServerSentEvent.heartbeat) .named("sse-server-flow") } } /** Constant values for use in Content Security Policy (CSP) headers. Abstracted to mitigate the * risk of introducing a security issue due to a silly typo. */ private case object CspConstants { val self = "'self'" val none = "'none'" val inline = "'unsafe-inline'" val eval = "'unsafe-eval'" @unused val any = "'*'" val anyDataBlob = "data:" @unused val anyHttp = "http:" @unused val anyHttps = "https:" } /** Constants describing the frame embedding settings (to mitigate the risk of clickjacking attacks). * These should be kept in sync with one another. * When both X-Frame-Options and a CSP directive for `frame-ancestors` are set, modern browsers should, * per specification, prefer the CSP setting -- but older browsers may not have full CSP support. * * The current implementation encodes a same-origin embed policy -- that is, the UI pages may be embedded * only by a page served at the same domain, port, and protocol. This allows for embedding of the UI in * environments serving simple reverse proxies, without requiring the reverse proxy to manage manipulating * the CSP or X-Frames-Options headers. * * @see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Frame-Options * @see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Security-Policy/frame-ancestors * @see https://caniuse.com/mdn-http_headers_content-security-policy_frame-ancestors */ private case object FrameEmbedSettings { import CspConstants._ val legacyFrameOptionsHeader: HttpHeader = RawHeader(com.google.common.net.HttpHeaders.X_FRAME_OPTIONS, "SAMEORIGIN") val modernCspSetting: (String, Vector[String]) = "frame-ancestors" -> Vector(self) } /** Route-hardening operations, implicitly available via {{{import RouteHardeningOps.syntax._}}}. *
* Consider improving these implementations if and when https://github.com/akka/akka-http/issues/155 ideas are * implemented in `pekko-http` (consider writing and offering the necessary changes to the library). */ trait RouteHardeningOps { /** Harden the underlying route against XSS by providing a Content Security Policy * * @param underlying the route to protect * @return the augmented route * @see https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/CSP */ private def xssHarden(underlying: server.Route): server.Route = respondWithHeader( RawHeader( com.google.common.net.HttpHeaders.CONTENT_SECURITY_POLICY, { import CspConstants._ val Csp = Map( "default-src" -> Vector(self), // in general, allow resources when they match the same origin policy "script-src" -> Vector(self), // only allow scripts that match the same origin policy "object-src" -> Vector(none), // don't allow , , or "style-src" -> Vector(self, inline), // allow scripts that match same origin or are provided inline "img-src" -> Vector( // allow images that match same origin or are provided as data: blobs self, anyDataBlob, ), "media-src" -> Vector(none), // don't allow