Repository: googlecreativelab/gemini-demos
Branch: main
Commit: dcc8d8a70a3b
Files: 618
Total size: 467.5 KB
Directory structure:
gitextract_zg4slyrs/
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── image-to-code/
│ ├── .gcloudignore
│ ├── .gitignore
│ ├── CONTRIBUTING.md
│ ├── LICENSE
│ ├── README.md
│ ├── components/
│ │ ├── CodePreview.js
│ │ ├── Header.js
│ │ └── ToggleButton.js
│ ├── jsconfig.json
│ ├── next.config.mjs
│ ├── package.json
│ ├── pages/
│ │ ├── _app.js
│ │ ├── _document.js
│ │ ├── api/
│ │ │ └── hello.js
│ │ └── index.js
│ ├── postcss.config.mjs
│ ├── styles/
│ │ └── globals.css
│ ├── tailwind.config.js
│ └── tailwind.config.mjs
├── multimodal-embeddings/
│ ├── .gcloudignore
│ ├── .npmrc
│ ├── .prettierignore
│ ├── .prettierrc
│ ├── README.md
│ ├── app.yaml
│ ├── components.json
│ ├── cors.json
│ ├── emulator-export/
│ │ ├── firebase-export-metadata.json
│ │ ├── firestore_export/
│ │ │ ├── all_namespaces/
│ │ │ │ └── all_kinds/
│ │ │ │ ├── all_namespaces_all_kinds.export_metadata
│ │ │ │ └── output-0
│ │ │ └── firestore_export.overall_export_metadata
│ │ └── storage_export/
│ │ ├── blobs/
│ │ │ ├── 0001871f-3e7a-461d-b428-92efcd1ce0fd
│ │ │ ├── 03da0657-bb18-42fd-9831-7e48c0b723cf
│ │ │ ├── 03f9df1b-0c0d-4014-8a7d-3a5eccad3faf
│ │ │ ├── 04b454df-1216-4ec1-84fb-f59afd85fe90
│ │ │ ├── 04f6c107-9611-4db9-aba8-98841ba5ae6f
│ │ │ ├── 06640a30-4d8e-4812-890e-e0374ad30f58
│ │ │ ├── 06919cbe-0678-4bfd-8cec-4d09e8e2d51b
│ │ │ ├── 0a86afce-8076-4ab8-bd65-c1eaf5ec966f
│ │ │ ├── 0b286de2-4d59-442c-933e-76f0f8e3cd0b
│ │ │ ├── 0c96fb25-144b-46eb-bdf1-74c575ddb6e9
│ │ │ ├── 0cf49b23-1d83-41c8-85d3-5c31fbf7b123
│ │ │ ├── 0d5f8ef7-dd37-432a-a0eb-80ccb822abc5
│ │ │ ├── 0d961bbe-aa5c-4d0e-9e53-dc7a7ec2b96c
│ │ │ ├── 0e7ec18e-93a5-4d40-845b-92cf98040f13
│ │ │ ├── 0ea29a34-dbcf-487a-b0e4-dd1c4028c7e2
│ │ │ ├── 0fb276ab-ad9f-4cd4-95ea-335d18ea72fe
│ │ │ ├── 0ff12c88-f320-4ef7-9356-26fbb9f3590e
│ │ │ ├── 114b11eb-de0d-441b-85d9-d70db5353156
│ │ │ ├── 125f8010-6ab0-463f-8278-5565cfc9d618
│ │ │ ├── 1514936f-769c-40b4-bfe4-4746fbc79fcd
│ │ │ ├── 16227ec4-2585-40aa-a5fd-e25e0d227b66
│ │ │ ├── 17f5cac8-e8c5-45c8-b6b3-afaa11b556ff
│ │ │ ├── 18964cd0-de01-46bd-88c3-ffad0b15b020
│ │ │ ├── 1aca1226-11c8-42f4-8137-339fe674e55c
│ │ │ ├── 1b831e3c-a1e8-4eb4-b3d6-de18b656814a
│ │ │ ├── 1ba5a0d9-2902-4a7a-8f33-862cf0abcb38
│ │ │ ├── 1bd97095-c1d4-4104-ae2a-f6c0e56dde89
│ │ │ ├── 1ccff42b-89c8-4a66-9d8c-3c7f1c307625
│ │ │ ├── 1d363b0e-cea6-4e58-aa6f-17c005c32f60
│ │ │ ├── 1e2e00f2-eb8c-432d-91a9-1f49c9bb2087
│ │ │ ├── 1e72b424-5c06-42c0-99eb-140545880ccb
│ │ │ ├── 1e859368-8d5b-4ecc-b30f-116ec3377f39
│ │ │ ├── 1f67aea5-0f3e-4b12-9554-aae3a84d5354
│ │ │ ├── 1fb32b8f-72d0-43ac-8955-d3804be94bc2
│ │ │ ├── 2166725e-18e6-4982-aeb7-2f4925461bf1
│ │ │ ├── 21af1f86-eb45-421d-90ff-962fc5509c16
│ │ │ ├── 2883e822-976c-4d81-9601-2df44f96bcd7
│ │ │ ├── 28a96371-1742-4e03-ae5c-ba309216af49
│ │ │ ├── 2ae61d7e-7bc1-4695-bd44-3637adbef998
│ │ │ ├── 2c938160-fe51-4222-a7d1-8220ad4ee300
│ │ │ ├── 2de6149c-40aa-471f-94a1-2dd2824724d8
│ │ │ ├── 2eeccd3b-575d-4055-95d9-c5fbf26b3a8d
│ │ │ ├── 2efc3c4b-2601-4d8f-ba2b-8d84712c436e
│ │ │ ├── 2f4a0b48-dc3f-4fd4-be4d-726c56619302
│ │ │ ├── 30621326-86e9-4f15-b5ea-6eaad59947df
│ │ │ ├── 30d12518-b643-48cf-8608-7aa9771fed14
│ │ │ ├── 31500630-b5a2-4092-86b1-5f2565716d86
│ │ │ ├── 31719f53-507c-4a3f-8b29-0be61178f3e7
│ │ │ ├── 354b878a-20c1-4b79-8a41-2b96e63eedaf
│ │ │ ├── 362a4319-eac2-4157-82f8-4cc5624ab0ee
│ │ │ ├── 36a45009-4146-4860-ab0f-2d41d6e1b8d2
│ │ │ ├── 36b5a479-0181-4f1b-98f4-86a64d4e1191
│ │ │ ├── 37be70be-0250-4281-9532-401a7bf29f41
│ │ │ ├── 3954ab46-6a25-4bb6-ab32-f0d55c546cb2
│ │ │ ├── 3b33aa14-5d04-43bd-9ef6-e30b8861d7db
│ │ │ ├── 3b688492-90d3-4d82-b6fc-499bc09e56eb
│ │ │ ├── 3dccd76d-64a7-47c2-a9b5-eea9b2efa12f
│ │ │ ├── 3e64b976-d7b3-4356-92d7-0aeb15f45fd5
│ │ │ ├── 3e966f04-2910-4104-8f1e-412bf33917c0
│ │ │ ├── 412215ae-10af-413a-9f75-861667cf12fe
│ │ │ ├── 41e09631-0791-4051-96ce-4e18d05f2be9
│ │ │ ├── 4223ad2c-c8fc-4f31-8454-284312b22905
│ │ │ ├── 42694520-432b-4928-adce-006db533f6f8
│ │ │ ├── 4418a01d-24ec-4e4b-b29a-e0689b395969
│ │ │ ├── 45131a81-db3a-4f9b-9330-5e2eb2b3eb44
│ │ │ ├── 454a16c7-6ec4-4c29-838f-6d235d729c0d
│ │ │ ├── 45b0127e-7f92-437f-97e4-f96b50ca60f6
│ │ │ ├── 460d8563-7063-413d-aa9c-d50fb8c9a0a3
│ │ │ ├── 464ea502-b1a7-47a1-af48-7b4296b13896
│ │ │ ├── 477f1478-d154-44c1-9b3e-f7bc08b81b1b
│ │ │ ├── 47a173ba-dd3f-4e02-8409-e46bf5c831ca
│ │ │ ├── 47eb894d-5a60-4c64-b22a-f496f63a869e
│ │ │ ├── 487cbb33-22c6-41f6-8e6e-4125927e2f20
│ │ │ ├── 495964a6-f764-487d-aec8-a1c1c02d8be3
│ │ │ ├── 4ad91f5f-7c9e-40e4-9c4f-c03a22bd8b2f
│ │ │ ├── 5012b076-ef07-41c8-9da7-033e2af2f579
│ │ │ ├── 50b673c2-e1b9-4b84-bbcc-1c47cca0cace
│ │ │ ├── 530a9038-98b8-4925-b26b-f5c7776d882f
│ │ │ ├── 55e55a5c-a59a-4c7f-846a-73784ee2f25d
│ │ │ ├── 563a8d9a-0285-4496-aaba-59138c349dad
│ │ │ ├── 58852f43-76ea-4ad1-9c6a-a33c2c53ff56
│ │ │ ├── 5a3ce50b-2658-4f68-b12d-0eec92a7223a
│ │ │ ├── 5a3ecd67-d20b-405d-83f7-86ff772f6d6f
│ │ │ ├── 5b388511-e3b8-4d3f-b0bf-d085f75c7769
│ │ │ ├── 5b62fd41-e2a2-4ba6-b305-a854619a947b
│ │ │ ├── 5ef2346d-d39e-49ce-b26a-9e5069dba421
│ │ │ ├── 5fd6d66a-8bf1-4f81-b0eb-9895552a8ec4
│ │ │ ├── 61556f93-4dd2-43e5-bc4d-78cbe3809d42
│ │ │ ├── 620c1be4-e353-46d9-8dbd-c3079d5efd6e
│ │ │ ├── 62b85256-0f54-47cb-ae83-d45cd0a66030
│ │ │ ├── 64609c87-07e8-48a4-b8c2-a26210e1dd0b
│ │ │ ├── 64f13cdd-ee8c-4238-b66b-14c1baa578e3
│ │ │ ├── 65c4dee4-4429-4eae-9250-2e969f3c190a
│ │ │ ├── 66569702-2074-4047-a794-4f11e3b285f2
│ │ │ ├── 67aaca3d-2acc-4987-8ed9-3f2fbf075f7c
│ │ │ ├── 67b6aeb1-b7d0-4402-8a60-b2f9c0c8b447
│ │ │ ├── 686d4f0b-5c28-48f4-b656-b354a13981a7
│ │ │ ├── 69657785-7299-408d-8453-8cb32f848150
│ │ │ ├── 6b6f7c8b-a5b9-4436-80a0-f128c1eb3bcf
│ │ │ ├── 6d8f5f9c-fef6-4179-ba05-79e361503c7a
│ │ │ ├── 70772163-4c3b-493a-ad04-87b67ee35d71
│ │ │ ├── 70c43a85-254c-49b1-bf14-d13687913f2d
│ │ │ ├── 70db2153-5bbf-42f6-91bc-f3cb4249dc01
│ │ │ ├── 70ef85a5-fb7a-41ff-ac02-0d00927d0274
│ │ │ ├── 70fc8a82-bfeb-4bb7-bb95-2a654168ead1
│ │ │ ├── 71894824-0cd9-4acf-9178-abba58090e42
│ │ │ ├── 721bdd57-7850-42e4-9fb9-747525b187c8
│ │ │ ├── 7706c66e-3fd9-45eb-9df5-8308b84dd194
│ │ │ ├── 7846c694-8e7a-47dd-b8b8-a2448285dcc0
│ │ │ ├── 78a3c767-265f-419d-bb3f-c3eba6e22017
│ │ │ ├── 78dd9bfd-a0ca-4ac9-9473-45d0d051fb1e
│ │ │ ├── 799fcdd0-1054-4c53-a218-48cb8f3ed044
│ │ │ ├── 7a5a7379-d29b-412b-9a50-565e171ab712
│ │ │ ├── 7c12f79f-d474-446f-b5fe-f669e60f8227
│ │ │ ├── 7c4a2540-75b9-4e37-b4b1-a6d7f62830cb
│ │ │ ├── 7c689600-cf26-4953-8be5-ae58faa5cad0
│ │ │ ├── 7d321a59-d6c9-433e-bfcb-2ee10dcd2745
│ │ │ ├── 7ff005e0-3f17-423f-9a04-1d762447eb07
│ │ │ ├── 8278f4eb-708a-4054-b408-9b4a2a8d6b7a
│ │ │ ├── 828f697d-fd40-470e-8326-8a40dccf9096
│ │ │ ├── 832cacff-0ed7-4685-a383-a0d94e6a366e
│ │ │ ├── 84307d32-0903-4a1c-8599-9d3938995da6
│ │ │ ├── 84491769-76dd-4667-825d-95156d99ba3f
│ │ │ ├── 848c006e-a50c-4d24-9e96-f9b12ab5f66b
│ │ │ ├── 8628a1cb-e588-483c-b00f-ba449d2c27b1
│ │ │ ├── 87039e64-1d6f-4f28-92e5-322ae686133d
│ │ │ ├── 884b9432-28fb-465b-a826-35daec2593d7
│ │ │ ├── 8bc987b7-d22f-4331-8de5-7149d9b73361
│ │ │ ├── 8cd45f77-4225-412e-9c61-bb90285356c5
│ │ │ ├── 8e0d391e-c779-4a34-a8de-f6abccc701ab
│ │ │ ├── 8f014805-5f51-439d-9447-b686abf889fa
│ │ │ ├── 91006bcb-22a2-480d-83ba-5393a41c6558
│ │ │ ├── 91bbc49d-9748-4638-a75c-573f23ff7c5e
│ │ │ ├── 92e62795-a847-46bc-a983-8f2f975f4ac3
│ │ │ ├── 92e944f9-bcfa-4e3c-95ad-2d6619cc377b
│ │ │ ├── 934f914b-21f8-45e6-812c-ac29926508c3
│ │ │ ├── 96d4fbff-3d58-4cef-b6f0-393751f39a80
│ │ │ ├── 98ca7091-441b-471c-a3cc-7a7e94826534
│ │ │ ├── 991c80af-1b3f-454c-8716-c45ed7023269
│ │ │ ├── 9ab4a412-9175-410b-8281-65999c677106
│ │ │ ├── 9d4fbb56-2a44-4866-9489-03d9312fdd55
│ │ │ ├── 9edac21b-4d29-4a5a-a5cd-5aac086224df
│ │ │ ├── a0eb55dd-239b-4908-8acd-a89c2bf639c2
│ │ │ ├── a1539350-b2f4-4c95-a3d4-6b85d493f2b1
│ │ │ ├── a1daa863-1f24-4f91-964b-55e133f8c36d
│ │ │ ├── a1f4669b-2187-4feb-92dd-1a321a0d3023
│ │ │ ├── a3f79d63-f265-4971-997b-4e64a25f806f
│ │ │ ├── a3fcb879-7be9-4a4f-8099-d237cf97418f
│ │ │ ├── a764ed81-66b1-4d49-9013-b75c1c6ba383
│ │ │ ├── a7855fa7-b5be-4ac3-a4e5-d4893ad338f6
│ │ │ ├── a8c3e42a-8ebd-490f-b1da-d9046b7aa4ed
│ │ │ ├── a99818ab-a299-4e87-9662-265cbe8e1160
│ │ │ ├── ab183f61-767e-4020-bae5-928ca15abd23
│ │ │ ├── ad7b8aa9-f8f7-43d5-96cf-b222b5e57318
│ │ │ ├── ad8f7da2-4988-40ec-9042-85c3f4ca6404
│ │ │ ├── ae4d2687-9c0e-49b2-9401-271fb42e62e1
│ │ │ ├── b17a8866-876a-4d32-8b84-5e19b8bcc5f4
│ │ │ ├── b1d4c0bb-0181-45cc-a1d7-7f2832b49b6d
│ │ │ ├── b2e7394a-32cb-45ca-abea-73d0b2df628f
│ │ │ ├── b499dd2d-cd18-488c-9927-c2f1f786a762
│ │ │ ├── b7ab64f1-76c2-459d-b6a9-cc7d52e689d1
│ │ │ ├── b8008ef2-2fb5-40ed-8fef-211863635165
│ │ │ ├── b8f8d67b-85c4-4734-a413-7e1c5b1a7a9d
│ │ │ ├── ba5f66c2-65e8-4ecc-829d-28dd2ddf625a
│ │ │ ├── ba7eb319-928d-45ba-bce3-4edd8f3955bd
│ │ │ ├── ba9ba128-b58e-47c1-97ee-4b2f52524cdf
│ │ │ ├── bc45b1d1-ec5d-4b77-94d9-2ba6f83c9397
│ │ │ ├── c20333af-a455-4b2f-baf0-f852e684543b
│ │ │ ├── c27f43ab-cdee-4ea2-b993-85e04502d1e7
│ │ │ ├── c32a1d25-a547-46b5-9d66-b2cce1b62c6c
│ │ │ ├── c340aa08-9ea1-4b1e-9155-0ffc9c35f7d4
│ │ │ ├── c4005e1a-a820-4fc6-b4d7-36de986a6508
│ │ │ ├── c65e9606-9e1a-4aa3-a546-fd07eaba08a3
│ │ │ ├── ca44d9db-7998-4cf3-bc92-aa69d366900a
│ │ │ ├── cb8c98b6-f0f0-48d5-be12-e27ab75dbc89
│ │ │ ├── cc2b2031-0170-43c4-bf8a-7a7267d38a96
│ │ │ ├── cea1146f-f63d-421e-b778-5ed8f39c1304
│ │ │ ├── cf2d1050-806b-4d32-9a18-083649e9142c
│ │ │ ├── d0920669-7709-4aab-97df-dd5e6c6719f9
│ │ │ ├── d12b9178-857a-4567-a7ef-3660f81813b9
│ │ │ ├── d21827e3-c794-4fa8-8cbc-6b46bfff25d2
│ │ │ ├── d223daa7-afa8-4108-8965-2ac9c45d9bab
│ │ │ ├── d2efd3f2-cf9a-4d75-89ad-3c8d873a1ff1
│ │ │ ├── d35aeaf2-da34-4207-a602-a9b236d62fca
│ │ │ ├── d4bb491a-51aa-48ac-a946-faf94bdb98f3
│ │ │ ├── d51f715d-c5b7-485b-8718-7112b700252b
│ │ │ ├── d62ac9e8-14ff-4a51-a65c-13b0be4a382d
│ │ │ ├── d7400d59-6e7d-438d-a433-7e248a89d7ef
│ │ │ ├── d8efb7b3-3d17-47f7-b46a-d4d5933d1c6c
│ │ │ ├── daff4d68-e7ba-4876-a389-f6379792db29
│ │ │ ├── db685a34-6e9f-480c-998f-74d9dfa33ded
│ │ │ ├── dd83acba-c2c2-471f-96e6-02a7564ccdf2
│ │ │ ├── ddd82ca8-addd-461f-a409-10ba4f2384f2
│ │ │ ├── de2c3001-b81a-43cb-bc8d-430cc5f31ecd
│ │ │ ├── df252df6-207a-4b5c-b429-517a9188442a
│ │ │ ├── dfc781e1-8984-4900-9fd9-4d67a8d06165
│ │ │ ├── e1a86ae0-794e-48c7-a521-6184fe47baf7
│ │ │ ├── e24996a9-4e92-47da-b164-afe68646772e
│ │ │ ├── e4a1e47d-a111-4f8f-aa81-fa8305cacc02
│ │ │ ├── e5411091-9430-440b-b167-06666aaa6282
│ │ │ ├── e68b7951-4436-4b51-9352-b8478a7016a2
│ │ │ ├── e745dd10-3a36-4c3c-9f6d-a0df1af88db5
│ │ │ ├── e7f85be0-c8af-4658-ae23-37f0fec2d3e8
│ │ │ ├── e96f6208-3db5-4e59-92fd-13bcc17a219f
│ │ │ ├── ec636ad8-59af-4d37-899d-7e38b6820142
│ │ │ ├── eed1b24c-8d17-474c-a8c5-abbb1253f3ac
│ │ │ ├── efcf28ed-64c0-4fb0-889e-adb9f7826bd2
│ │ │ ├── f0c05fe7-cce3-4ae7-81a7-04eb885a858d
│ │ │ ├── f1ee8d78-1966-45ac-899b-0fb7fdbef35c
│ │ │ ├── f1fdb37a-b154-4165-9964-31831b40be8b
│ │ │ ├── f45cae23-5852-45c2-a36d-923768351808
│ │ │ ├── f4c6688d-2e98-47bb-9880-d8beb1569036
│ │ │ ├── f511acae-1e9a-4fea-a426-acb3fd6aa5bd
│ │ │ ├── f699b6ac-7812-4564-adb5-8260b225f33b
│ │ │ ├── f77367b4-cad3-4d6a-961a-f4f4d6e24817
│ │ │ ├── f7bae092-4e7b-40f5-adae-a5c89bd9065a
│ │ │ ├── f8c2b960-f80e-488d-8126-04fe12057048
│ │ │ ├── f9d73365-1492-4efa-aabd-b730650157d0
│ │ │ ├── faf1a8f8-0cdf-43ba-8af8-d53a2d1a060f
│ │ │ ├── fb7828b5-0a12-46a7-8c5c-6c220d116e36
│ │ │ ├── fb857ec0-d80d-4404-ba0e-1ba436305d7d
│ │ │ ├── fda16ac8-d1cf-49db-93b7-833c274cd0d2
│ │ │ ├── fe3a8d7c-3dff-489e-abca-0b515db9420c
│ │ │ ├── fe78e904-37eb-46de-9195-1264c9acc754
│ │ │ ├── ff05e63d-c45b-499f-bc6e-a507047b4f10
│ │ │ └── ffe18889-8e9a-4b8c-8a81-867a03a3c274
│ │ ├── buckets.json
│ │ └── metadata/
│ │ ├── 0001871f-3e7a-461d-b428-92efcd1ce0fd.json
│ │ ├── 03da0657-bb18-42fd-9831-7e48c0b723cf.json
│ │ ├── 03f9df1b-0c0d-4014-8a7d-3a5eccad3faf.json
│ │ ├── 04b454df-1216-4ec1-84fb-f59afd85fe90.json
│ │ ├── 04f6c107-9611-4db9-aba8-98841ba5ae6f.json
│ │ ├── 06640a30-4d8e-4812-890e-e0374ad30f58.json
│ │ ├── 06919cbe-0678-4bfd-8cec-4d09e8e2d51b.json
│ │ ├── 0a86afce-8076-4ab8-bd65-c1eaf5ec966f.json
│ │ ├── 0b286de2-4d59-442c-933e-76f0f8e3cd0b.json
│ │ ├── 0c96fb25-144b-46eb-bdf1-74c575ddb6e9.json
│ │ ├── 0cf49b23-1d83-41c8-85d3-5c31fbf7b123.json
│ │ ├── 0d5f8ef7-dd37-432a-a0eb-80ccb822abc5.json
│ │ ├── 0d961bbe-aa5c-4d0e-9e53-dc7a7ec2b96c.json
│ │ ├── 0e7ec18e-93a5-4d40-845b-92cf98040f13.json
│ │ ├── 0ea29a34-dbcf-487a-b0e4-dd1c4028c7e2.json
│ │ ├── 0fb276ab-ad9f-4cd4-95ea-335d18ea72fe.json
│ │ ├── 0ff12c88-f320-4ef7-9356-26fbb9f3590e.json
│ │ ├── 114b11eb-de0d-441b-85d9-d70db5353156.json
│ │ ├── 125f8010-6ab0-463f-8278-5565cfc9d618.json
│ │ ├── 1514936f-769c-40b4-bfe4-4746fbc79fcd.json
│ │ ├── 16227ec4-2585-40aa-a5fd-e25e0d227b66.json
│ │ ├── 17f5cac8-e8c5-45c8-b6b3-afaa11b556ff.json
│ │ ├── 18964cd0-de01-46bd-88c3-ffad0b15b020.json
│ │ ├── 1aca1226-11c8-42f4-8137-339fe674e55c.json
│ │ ├── 1b831e3c-a1e8-4eb4-b3d6-de18b656814a.json
│ │ ├── 1ba5a0d9-2902-4a7a-8f33-862cf0abcb38.json
│ │ ├── 1bd97095-c1d4-4104-ae2a-f6c0e56dde89.json
│ │ ├── 1ccff42b-89c8-4a66-9d8c-3c7f1c307625.json
│ │ ├── 1d363b0e-cea6-4e58-aa6f-17c005c32f60.json
│ │ ├── 1e2e00f2-eb8c-432d-91a9-1f49c9bb2087.json
│ │ ├── 1e72b424-5c06-42c0-99eb-140545880ccb.json
│ │ ├── 1e859368-8d5b-4ecc-b30f-116ec3377f39.json
│ │ ├── 1f67aea5-0f3e-4b12-9554-aae3a84d5354.json
│ │ ├── 1fb32b8f-72d0-43ac-8955-d3804be94bc2.json
│ │ ├── 2166725e-18e6-4982-aeb7-2f4925461bf1.json
│ │ ├── 21af1f86-eb45-421d-90ff-962fc5509c16.json
│ │ ├── 2883e822-976c-4d81-9601-2df44f96bcd7.json
│ │ ├── 28a96371-1742-4e03-ae5c-ba309216af49.json
│ │ ├── 2ae61d7e-7bc1-4695-bd44-3637adbef998.json
│ │ ├── 2c938160-fe51-4222-a7d1-8220ad4ee300.json
│ │ ├── 2de6149c-40aa-471f-94a1-2dd2824724d8.json
│ │ ├── 2eeccd3b-575d-4055-95d9-c5fbf26b3a8d.json
│ │ ├── 2efc3c4b-2601-4d8f-ba2b-8d84712c436e.json
│ │ ├── 2f4a0b48-dc3f-4fd4-be4d-726c56619302.json
│ │ ├── 30621326-86e9-4f15-b5ea-6eaad59947df.json
│ │ ├── 30d12518-b643-48cf-8608-7aa9771fed14.json
│ │ ├── 31500630-b5a2-4092-86b1-5f2565716d86.json
│ │ ├── 31719f53-507c-4a3f-8b29-0be61178f3e7.json
│ │ ├── 354b878a-20c1-4b79-8a41-2b96e63eedaf.json
│ │ ├── 362a4319-eac2-4157-82f8-4cc5624ab0ee.json
│ │ ├── 36a45009-4146-4860-ab0f-2d41d6e1b8d2.json
│ │ ├── 36b5a479-0181-4f1b-98f4-86a64d4e1191.json
│ │ ├── 37be70be-0250-4281-9532-401a7bf29f41.json
│ │ ├── 3954ab46-6a25-4bb6-ab32-f0d55c546cb2.json
│ │ ├── 3b33aa14-5d04-43bd-9ef6-e30b8861d7db.json
│ │ ├── 3b688492-90d3-4d82-b6fc-499bc09e56eb.json
│ │ ├── 3dccd76d-64a7-47c2-a9b5-eea9b2efa12f.json
│ │ ├── 3e64b976-d7b3-4356-92d7-0aeb15f45fd5.json
│ │ ├── 3e966f04-2910-4104-8f1e-412bf33917c0.json
│ │ ├── 412215ae-10af-413a-9f75-861667cf12fe.json
│ │ ├── 41e09631-0791-4051-96ce-4e18d05f2be9.json
│ │ ├── 4223ad2c-c8fc-4f31-8454-284312b22905.json
│ │ ├── 42694520-432b-4928-adce-006db533f6f8.json
│ │ ├── 4418a01d-24ec-4e4b-b29a-e0689b395969.json
│ │ ├── 45131a81-db3a-4f9b-9330-5e2eb2b3eb44.json
│ │ ├── 454a16c7-6ec4-4c29-838f-6d235d729c0d.json
│ │ ├── 45b0127e-7f92-437f-97e4-f96b50ca60f6.json
│ │ ├── 460d8563-7063-413d-aa9c-d50fb8c9a0a3.json
│ │ ├── 464ea502-b1a7-47a1-af48-7b4296b13896.json
│ │ ├── 477f1478-d154-44c1-9b3e-f7bc08b81b1b.json
│ │ ├── 47a173ba-dd3f-4e02-8409-e46bf5c831ca.json
│ │ ├── 47eb894d-5a60-4c64-b22a-f496f63a869e.json
│ │ ├── 487cbb33-22c6-41f6-8e6e-4125927e2f20.json
│ │ ├── 495964a6-f764-487d-aec8-a1c1c02d8be3.json
│ │ ├── 4ad91f5f-7c9e-40e4-9c4f-c03a22bd8b2f.json
│ │ ├── 5012b076-ef07-41c8-9da7-033e2af2f579.json
│ │ ├── 50b673c2-e1b9-4b84-bbcc-1c47cca0cace.json
│ │ ├── 530a9038-98b8-4925-b26b-f5c7776d882f.json
│ │ ├── 55e55a5c-a59a-4c7f-846a-73784ee2f25d.json
│ │ ├── 563a8d9a-0285-4496-aaba-59138c349dad.json
│ │ ├── 58852f43-76ea-4ad1-9c6a-a33c2c53ff56.json
│ │ ├── 5a3ce50b-2658-4f68-b12d-0eec92a7223a.json
│ │ ├── 5a3ecd67-d20b-405d-83f7-86ff772f6d6f.json
│ │ ├── 5b388511-e3b8-4d3f-b0bf-d085f75c7769.json
│ │ ├── 5b62fd41-e2a2-4ba6-b305-a854619a947b.json
│ │ ├── 5ef2346d-d39e-49ce-b26a-9e5069dba421.json
│ │ ├── 5fd6d66a-8bf1-4f81-b0eb-9895552a8ec4.json
│ │ ├── 61556f93-4dd2-43e5-bc4d-78cbe3809d42.json
│ │ ├── 620c1be4-e353-46d9-8dbd-c3079d5efd6e.json
│ │ ├── 62b85256-0f54-47cb-ae83-d45cd0a66030.json
│ │ ├── 64609c87-07e8-48a4-b8c2-a26210e1dd0b.json
│ │ ├── 64f13cdd-ee8c-4238-b66b-14c1baa578e3.json
│ │ ├── 65c4dee4-4429-4eae-9250-2e969f3c190a.json
│ │ ├── 66569702-2074-4047-a794-4f11e3b285f2.json
│ │ ├── 67aaca3d-2acc-4987-8ed9-3f2fbf075f7c.json
│ │ ├── 67b6aeb1-b7d0-4402-8a60-b2f9c0c8b447.json
│ │ ├── 686d4f0b-5c28-48f4-b656-b354a13981a7.json
│ │ ├── 69657785-7299-408d-8453-8cb32f848150.json
│ │ ├── 6b6f7c8b-a5b9-4436-80a0-f128c1eb3bcf.json
│ │ ├── 6d8f5f9c-fef6-4179-ba05-79e361503c7a.json
│ │ ├── 70772163-4c3b-493a-ad04-87b67ee35d71.json
│ │ ├── 70c43a85-254c-49b1-bf14-d13687913f2d.json
│ │ ├── 70db2153-5bbf-42f6-91bc-f3cb4249dc01.json
│ │ ├── 70ef85a5-fb7a-41ff-ac02-0d00927d0274.json
│ │ ├── 70fc8a82-bfeb-4bb7-bb95-2a654168ead1.json
│ │ ├── 71894824-0cd9-4acf-9178-abba58090e42.json
│ │ ├── 721bdd57-7850-42e4-9fb9-747525b187c8.json
│ │ ├── 7706c66e-3fd9-45eb-9df5-8308b84dd194.json
│ │ ├── 7846c694-8e7a-47dd-b8b8-a2448285dcc0.json
│ │ ├── 78a3c767-265f-419d-bb3f-c3eba6e22017.json
│ │ ├── 78dd9bfd-a0ca-4ac9-9473-45d0d051fb1e.json
│ │ ├── 799fcdd0-1054-4c53-a218-48cb8f3ed044.json
│ │ ├── 7a5a7379-d29b-412b-9a50-565e171ab712.json
│ │ ├── 7c12f79f-d474-446f-b5fe-f669e60f8227.json
│ │ ├── 7c4a2540-75b9-4e37-b4b1-a6d7f62830cb.json
│ │ ├── 7c689600-cf26-4953-8be5-ae58faa5cad0.json
│ │ ├── 7d321a59-d6c9-433e-bfcb-2ee10dcd2745.json
│ │ ├── 7ff005e0-3f17-423f-9a04-1d762447eb07.json
│ │ ├── 8278f4eb-708a-4054-b408-9b4a2a8d6b7a.json
│ │ ├── 828f697d-fd40-470e-8326-8a40dccf9096.json
│ │ ├── 832cacff-0ed7-4685-a383-a0d94e6a366e.json
│ │ ├── 84307d32-0903-4a1c-8599-9d3938995da6.json
│ │ ├── 84491769-76dd-4667-825d-95156d99ba3f.json
│ │ ├── 848c006e-a50c-4d24-9e96-f9b12ab5f66b.json
│ │ ├── 8628a1cb-e588-483c-b00f-ba449d2c27b1.json
│ │ ├── 87039e64-1d6f-4f28-92e5-322ae686133d.json
│ │ ├── 884b9432-28fb-465b-a826-35daec2593d7.json
│ │ ├── 8bc987b7-d22f-4331-8de5-7149d9b73361.json
│ │ ├── 8cd45f77-4225-412e-9c61-bb90285356c5.json
│ │ ├── 8e0d391e-c779-4a34-a8de-f6abccc701ab.json
│ │ ├── 8f014805-5f51-439d-9447-b686abf889fa.json
│ │ ├── 91006bcb-22a2-480d-83ba-5393a41c6558.json
│ │ ├── 91bbc49d-9748-4638-a75c-573f23ff7c5e.json
│ │ ├── 92e62795-a847-46bc-a983-8f2f975f4ac3.json
│ │ ├── 92e944f9-bcfa-4e3c-95ad-2d6619cc377b.json
│ │ ├── 934f914b-21f8-45e6-812c-ac29926508c3.json
│ │ ├── 96d4fbff-3d58-4cef-b6f0-393751f39a80.json
│ │ ├── 98ca7091-441b-471c-a3cc-7a7e94826534.json
│ │ ├── 991c80af-1b3f-454c-8716-c45ed7023269.json
│ │ ├── 9ab4a412-9175-410b-8281-65999c677106.json
│ │ ├── 9d4fbb56-2a44-4866-9489-03d9312fdd55.json
│ │ ├── 9edac21b-4d29-4a5a-a5cd-5aac086224df.json
│ │ ├── a0eb55dd-239b-4908-8acd-a89c2bf639c2.json
│ │ ├── a1539350-b2f4-4c95-a3d4-6b85d493f2b1.json
│ │ ├── a1daa863-1f24-4f91-964b-55e133f8c36d.json
│ │ ├── a1f4669b-2187-4feb-92dd-1a321a0d3023.json
│ │ ├── a3f79d63-f265-4971-997b-4e64a25f806f.json
│ │ ├── a3fcb879-7be9-4a4f-8099-d237cf97418f.json
│ │ ├── a764ed81-66b1-4d49-9013-b75c1c6ba383.json
│ │ ├── a7855fa7-b5be-4ac3-a4e5-d4893ad338f6.json
│ │ ├── a8c3e42a-8ebd-490f-b1da-d9046b7aa4ed.json
│ │ ├── a99818ab-a299-4e87-9662-265cbe8e1160.json
│ │ ├── ab183f61-767e-4020-bae5-928ca15abd23.json
│ │ ├── ad7b8aa9-f8f7-43d5-96cf-b222b5e57318.json
│ │ ├── ad8f7da2-4988-40ec-9042-85c3f4ca6404.json
│ │ ├── ae4d2687-9c0e-49b2-9401-271fb42e62e1.json
│ │ ├── b17a8866-876a-4d32-8b84-5e19b8bcc5f4.json
│ │ ├── b1d4c0bb-0181-45cc-a1d7-7f2832b49b6d.json
│ │ ├── b2e7394a-32cb-45ca-abea-73d0b2df628f.json
│ │ ├── b499dd2d-cd18-488c-9927-c2f1f786a762.json
│ │ ├── b7ab64f1-76c2-459d-b6a9-cc7d52e689d1.json
│ │ ├── b8008ef2-2fb5-40ed-8fef-211863635165.json
│ │ ├── b8f8d67b-85c4-4734-a413-7e1c5b1a7a9d.json
│ │ ├── ba5f66c2-65e8-4ecc-829d-28dd2ddf625a.json
│ │ ├── ba7eb319-928d-45ba-bce3-4edd8f3955bd.json
│ │ ├── ba9ba128-b58e-47c1-97ee-4b2f52524cdf.json
│ │ ├── bc45b1d1-ec5d-4b77-94d9-2ba6f83c9397.json
│ │ ├── c20333af-a455-4b2f-baf0-f852e684543b.json
│ │ ├── c27f43ab-cdee-4ea2-b993-85e04502d1e7.json
│ │ ├── c32a1d25-a547-46b5-9d66-b2cce1b62c6c.json
│ │ ├── c340aa08-9ea1-4b1e-9155-0ffc9c35f7d4.json
│ │ ├── c4005e1a-a820-4fc6-b4d7-36de986a6508.json
│ │ ├── c65e9606-9e1a-4aa3-a546-fd07eaba08a3.json
│ │ ├── ca44d9db-7998-4cf3-bc92-aa69d366900a.json
│ │ ├── cb8c98b6-f0f0-48d5-be12-e27ab75dbc89.json
│ │ ├── cc2b2031-0170-43c4-bf8a-7a7267d38a96.json
│ │ ├── cea1146f-f63d-421e-b778-5ed8f39c1304.json
│ │ ├── cf2d1050-806b-4d32-9a18-083649e9142c.json
│ │ ├── d0920669-7709-4aab-97df-dd5e6c6719f9.json
│ │ ├── d12b9178-857a-4567-a7ef-3660f81813b9.json
│ │ ├── d21827e3-c794-4fa8-8cbc-6b46bfff25d2.json
│ │ ├── d223daa7-afa8-4108-8965-2ac9c45d9bab.json
│ │ ├── d2efd3f2-cf9a-4d75-89ad-3c8d873a1ff1.json
│ │ ├── d35aeaf2-da34-4207-a602-a9b236d62fca.json
│ │ ├── d4bb491a-51aa-48ac-a946-faf94bdb98f3.json
│ │ ├── d51f715d-c5b7-485b-8718-7112b700252b.json
│ │ ├── d62ac9e8-14ff-4a51-a65c-13b0be4a382d.json
│ │ ├── d7400d59-6e7d-438d-a433-7e248a89d7ef.json
│ │ ├── d8efb7b3-3d17-47f7-b46a-d4d5933d1c6c.json
│ │ ├── daff4d68-e7ba-4876-a389-f6379792db29.json
│ │ ├── db685a34-6e9f-480c-998f-74d9dfa33ded.json
│ │ ├── dd83acba-c2c2-471f-96e6-02a7564ccdf2.json
│ │ ├── ddd82ca8-addd-461f-a409-10ba4f2384f2.json
│ │ ├── de2c3001-b81a-43cb-bc8d-430cc5f31ecd.json
│ │ ├── df252df6-207a-4b5c-b429-517a9188442a.json
│ │ ├── dfc781e1-8984-4900-9fd9-4d67a8d06165.json
│ │ ├── e1a86ae0-794e-48c7-a521-6184fe47baf7.json
│ │ ├── e24996a9-4e92-47da-b164-afe68646772e.json
│ │ ├── e4a1e47d-a111-4f8f-aa81-fa8305cacc02.json
│ │ ├── e5411091-9430-440b-b167-06666aaa6282.json
│ │ ├── e68b7951-4436-4b51-9352-b8478a7016a2.json
│ │ ├── e745dd10-3a36-4c3c-9f6d-a0df1af88db5.json
│ │ ├── e7f85be0-c8af-4658-ae23-37f0fec2d3e8.json
│ │ ├── e96f6208-3db5-4e59-92fd-13bcc17a219f.json
│ │ ├── ec636ad8-59af-4d37-899d-7e38b6820142.json
│ │ ├── eed1b24c-8d17-474c-a8c5-abbb1253f3ac.json
│ │ ├── efcf28ed-64c0-4fb0-889e-adb9f7826bd2.json
│ │ ├── f0c05fe7-cce3-4ae7-81a7-04eb885a858d.json
│ │ ├── f1ee8d78-1966-45ac-899b-0fb7fdbef35c.json
│ │ ├── f1fdb37a-b154-4165-9964-31831b40be8b.json
│ │ ├── f45cae23-5852-45c2-a36d-923768351808.json
│ │ ├── f4c6688d-2e98-47bb-9880-d8beb1569036.json
│ │ ├── f511acae-1e9a-4fea-a426-acb3fd6aa5bd.json
│ │ ├── f699b6ac-7812-4564-adb5-8260b225f33b.json
│ │ ├── f77367b4-cad3-4d6a-961a-f4f4d6e24817.json
│ │ ├── f7bae092-4e7b-40f5-adae-a5c89bd9065a.json
│ │ ├── f8c2b960-f80e-488d-8126-04fe12057048.json
│ │ ├── f9d73365-1492-4efa-aabd-b730650157d0.json
│ │ ├── faf1a8f8-0cdf-43ba-8af8-d53a2d1a060f.json
│ │ ├── fb7828b5-0a12-46a7-8c5c-6c220d116e36.json
│ │ ├── fb857ec0-d80d-4404-ba0e-1ba436305d7d.json
│ │ ├── fda16ac8-d1cf-49db-93b7-833c274cd0d2.json
│ │ ├── fe3a8d7c-3dff-489e-abca-0b515db9420c.json
│ │ ├── fe78e904-37eb-46de-9195-1264c9acc754.json
│ │ ├── ff05e63d-c45b-499f-bc6e-a507047b4f10.json
│ │ └── ffe18889-8e9a-4b8c-8a81-867a03a3c274.json
│ ├── eslint.config.js
│ ├── fb/
│ │ ├── .firebaserc
│ │ ├── README.md
│ │ ├── firebase.json
│ │ ├── functions/
│ │ │ ├── package.json
│ │ │ ├── src/
│ │ │ │ ├── gemini.ts
│ │ │ │ └── index.ts
│ │ │ └── tsconfig.json
│ │ └── storage.rules
│ ├── package.json
│ ├── postcss.config.js
│ ├── src/
│ │ ├── app.css
│ │ ├── app.d.ts
│ │ ├── app.html
│ │ ├── lib/
│ │ │ ├── api.ts
│ │ │ ├── cloud-firebase.ts
│ │ │ ├── components/
│ │ │ │ ├── CollectionsList.svelte
│ │ │ │ ├── Droppable.svelte
│ │ │ │ └── ui/
│ │ │ │ ├── button/
│ │ │ │ │ ├── button.svelte
│ │ │ │ │ └── index.ts
│ │ │ │ ├── dropdown-menu/
│ │ │ │ │ ├── dropdown-menu-checkbox-item.svelte
│ │ │ │ │ ├── dropdown-menu-content.svelte
│ │ │ │ │ ├── dropdown-menu-item.svelte
│ │ │ │ │ ├── dropdown-menu-label.svelte
│ │ │ │ │ ├── dropdown-menu-radio-group.svelte
│ │ │ │ │ ├── dropdown-menu-radio-item.svelte
│ │ │ │ │ ├── dropdown-menu-separator.svelte
│ │ │ │ │ ├── dropdown-menu-shortcut.svelte
│ │ │ │ │ ├── dropdown-menu-sub-content.svelte
│ │ │ │ │ ├── dropdown-menu-sub-trigger.svelte
│ │ │ │ │ └── index.ts
│ │ │ │ ├── input/
│ │ │ │ │ ├── index.ts
│ │ │ │ │ └── input.svelte
│ │ │ │ └── label/
│ │ │ │ ├── index.ts
│ │ │ │ └── label.svelte
│ │ │ ├── consts.ts
│ │ │ ├── embedder.ts
│ │ │ ├── firebase.ts
│ │ │ ├── store.ts
│ │ │ └── utils.ts
│ │ └── routes/
│ │ ├── +layout.svelte
│ │ ├── +page.svelte
│ │ ├── api/
│ │ │ ├── getEmbeddings/
│ │ │ │ └── +server.ts
│ │ │ ├── listCollections/
│ │ │ │ └── +server.ts
│ │ │ ├── search/
│ │ │ │ └── +server.ts
│ │ │ └── umap/
│ │ │ └── +server.ts
│ │ ├── search/
│ │ │ ├── +page.svelte
│ │ │ └── ImageResult.svelte
│ │ └── viz/
│ │ ├── +page.svelte
│ │ ├── Image.svelte
│ │ ├── Scene.svelte
│ │ ├── Settings.svelte
│ │ └── Viz.svelte
│ ├── svelte.config.js
│ ├── tailwind.config.ts
│ ├── tsconfig.json
│ └── vite.config.ts
├── video-scrubber/
│ ├── .eslintrc.cjs
│ ├── .gcloudignore
│ ├── LICENSE
│ ├── README.md
│ ├── app.yaml
│ ├── index.html
│ ├── package.json
│ ├── postcss.config.js
│ ├── server/
│ │ ├── gemini.js
│ │ └── index.js
│ ├── src/
│ │ ├── Annotations.tsx
│ │ ├── App.tsx
│ │ ├── ClickableTimestamps.tsx
│ │ ├── ClipTimeline.tsx
│ │ ├── Controls.tsx
│ │ ├── Gemini.tsx
│ │ ├── PlayTimeline.tsx
│ │ ├── Timelines.tsx
│ │ ├── TimestampText.tsx
│ │ ├── Video.tsx
│ │ ├── VideoInput.tsx
│ │ ├── VideoState.tsx
│ │ ├── atoms.tsx
│ │ ├── consts.tsx
│ │ ├── index.css
│ │ ├── main.tsx
│ │ ├── utils.tsx
│ │ └── vite-env.d.ts
│ ├── tailwind.config.js
│ ├── tsconfig.json
│ ├── tsconfig.node.json
│ └── vite.config.ts
└── voice-cursor/
├── .gcloudignore
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── components.json
├── next.config.mjs
├── package.json
├── postcss.config.js
├── src/
│ ├── app/
│ │ ├── default-value.ts
│ │ ├── globals.css
│ │ ├── layout.tsx
│ │ ├── page.tsx
│ │ ├── prosemirror.css
│ │ └── test-audio/
│ │ └── page.tsx
│ ├── components/
│ │ ├── editor/
│ │ │ ├── advanced-editor.tsx
│ │ │ ├── audio-menu.tsx
│ │ │ ├── client-editor.tsx
│ │ │ ├── extensions/
│ │ │ │ └── audio-highlight.ts
│ │ │ ├── extensions.ts
│ │ │ ├── image-upload.ts
│ │ │ ├── prompt-popover.tsx
│ │ │ ├── selectors/
│ │ │ │ └── voice-popover.tsx
│ │ │ ├── slash-command.tsx
│ │ │ └── unused-selectors/
│ │ │ ├── color-selector.tsx
│ │ │ ├── highlight-selector.tsx
│ │ │ ├── link-selector.tsx
│ │ │ ├── node-selector.tsx
│ │ │ └── text-buttons.tsx
│ │ ├── theme-provider.tsx
│ │ ├── theme-toggle.tsx
│ │ └── ui/
│ │ ├── button.tsx
│ │ ├── dropdown-menu.tsx
│ │ ├── popover.tsx
│ │ ├── separator.tsx
│ │ ├── sonner.tsx
│ │ └── tooltip.tsx
│ └── lib/
│ ├── tone-options.ts
│ ├── utils.ts
│ └── voice-options.ts
├── tailwind.config.ts
└── tsconfig.json
================================================
FILE CONTENTS
================================================
================================================
FILE: CONTRIBUTING.md
================================================
# How to Contribute
We'd love to accept your patches and contributions to this project. There are
just a few small guidelines you need to follow.
## Contributor License Agreement
Contributions to this project must be accompanied by a Contributor License
Agreement. You (or your employer) retain the copyright to your contribution;
this simply gives us permission to use and redistribute your contributions as
part of the project. Head over to to see
your current agreements on file or to sign a new one.
You generally only need to submit a CLA once, so if you've already submitted one
(even if it was for a different project), you probably don't need to do it
again.
## Code reviews
All submissions, including submissions by project members, require review. We
use GitHub pull requests for this purpose. Consult
[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
information on using pull requests.
## Community Guidelines
This project follows [Google's Open Source Community
Guidelines](https://opensource.google.com/conduct/).
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
# Gemini API Demos
Hello! 👋 This is a repository of examples built with Google's Gemini API, which lets you build multimodal AI applications with text, images, and more.
## What You'll Find:
Examples: These demos show the latest Gemini models (Flash 1.5, Pro, and others) in action. Dive into projects that demonstrate:
- Image and Video understanding: Analyze content, classify objects, and even generate timestamped summaries.
- Multimodal interaction: Combine text and image inputs to create engaging user experiences.
- Technical Inspiration: Get hands-on with code examples that show you how to use the Gemini API effectively. Learn best practices for prompt engineering, caching and embedding, and integrating Gemini into your own applications.
## Getting Started:
1. Obtain an API Key: To use the Gemini API, you'll need an API key. You can get one [here](https://ai.google.dev/gemini-api/docs/api-key) or from [AI Studio](https://aistudio.google.com/app/apikey)
2. Explore the Docs: The official documentation is your comprehensive guide to the Gemini API: https://ai.google.dev/gemini-api/docs/
3. Dive into the Demos: Choose a demo that sparks your interest and follow the instructions in its README. You'll be up and running in no time!
## Important Notes:
1. API Usage Limits: Google may have usage limits and associated costs for the Gemini API. Be sure to review the details on their website.
2. Responsible AI: Please use the Gemini API responsibly and ethically. Avoid generating harmful or misleading content.
3. Feedback Welcome: We value your input! If you encounter issues, have suggestions, or want to share your creations, please open an issue or pull request.
## Current Projects
| Name | Description | Tools |
| ----------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| [Multimodal Embeddings](/multimodal-embeddings) | Using Gemini's new Multimodal Embeddings API, we'll explore high dimensional embedding space of text, images, and videos. | [Multimodal Embeddings API](https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-multimodal-embeddings), [Firestore Vector Search](https://firebase.google.com/docs/firestore/vector-search) |
| [Gemini Video Scrubber](/video-scrubber) | "GVS" is a prototype that uses Gemini's multimodal video understanding capabilities to create timestamped summaries of videos with a simple UI to play those timestamps back in sequence, giving you the ability to quickly scan videos for interesting moments, common occurences, and more! | Multimodal Gemini, File API, Caching |
| [Voice Cursor](/voice-cursor) | An experimental text editor that lets you highlight phrases and instantly hear them spoken by Gemini 2.0 in different expressive styles. Simply select text, choose a tone, and hear AI-generated speech with customizable prompts. | [Gemini 2.0 Native Audio Output](https://ai.google.dev/gemini-api/docs/models/gemini-v2#speech-generation-early-accessallowlist) |
| [Image to Code](/image-to-code) | An experimental site that uses Gemini 2.0 Flash to turn an image --> into a creative code sketch (p5.js). | [Gemini 2.0 Flash](https://deepmind.google/technologies/gemini/flash/) |
## Experiments for all
This is an experiment, not an official Google product. We'll do our best to support and maintain this experiment but your mileage may vary.
We encourage open sourcing projects as a way of learning from each other. Please respect our and other creators' rights, including copyright and trademark rights when present, when sharing these works and creating derivative work. If you want more info on Google's policy, you can find that [here](https://www.google.com/permissions/).
================================================
FILE: image-to-code/.gcloudignore
================================================
# This file specifies files that are *not* uploaded to Google Cloud
# using gcloud. It follows the same syntax as .gitignore, with the addition of
# "#!include" directives (which insert the entries of the given .gitignore-style
# file at that point).
#
# For more information, run:
# $ gcloud topic gcloudignore
#
.gcloudignore
# If you would like to upload your .git directory, .gitignore file or files
# from your .gitignore file, remove the corresponding line
# below:
.git
.gitignore
# Node.js dependencies:
node_modules/
================================================
FILE: image-to-code/.gitignore
================================================
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
# dependencies
/node_modules
/.pnp
.pnp.*
.yarn/*
!.yarn/patches
!.yarn/plugins
!.yarn/releases
!.yarn/versions
# testing
/coverage
# next.js
/.next/
/out/
# production
/build
# misc
.DS_Store
*.pem
# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
.pnpm-debug.log*
# env files (can opt-in for committing if needed)
.env*
.env.local
# vercel
.vercel
# typescript
*.tsbuildinfo
next-env.d.ts
# app yaml
*.yaml
app.yaml
================================================
FILE: image-to-code/CONTRIBUTING.md
================================================
# How to contribute
We'd love to accept your patches and contributions to this project.
## Before you begin
Sign our Contributor License Agreement
Contributions to this project must be accompanied by a Contributor License Agreement (CLA). You (or your employer) retain the copyright to your contribution; this simply gives us permission to use and redistribute your contributions as part of the project.
If you or your current employer have already signed the Google CLA (even if it was for a different project), you probably don't need to do it again.
Visit https://cla.developers.google.com/ to see your current agreements or to sign a new one.
### Review our community guidelines
This project follows Google's Open Source Community Guidelines.
## Contribution process
###Code reviews
All submissions, including submissions by project members, require review. We use GitHub pull requests for this purpose. Consult GitHub Help for more information on using pull requests.
================================================
FILE: image-to-code/LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: image-to-code/README.md
================================================
# Image to Code Generator 🎨

Get started with [p5js](https://p5js.org/) sketches using [Gemini 2.0 Flash](https://deepmind.google/technologies/gemini/flash/). Upload any photo, and this web app uses [Gemini 2.0 Flash](https://deepmind.google/technologies/gemini/flash/) to generate a [p5.js](https://p5js.org/) sketch that captures the essence of the image in an interactive way.
## Getting Started
### 1. Clone this repository and install dependencies:
```
git clone https://github.com/googlecreativelab/gemini-demos/
cd image-to-code-sketch
npm install
```
### 2. Create a .env.local file with your AI Studio API key:
Get your API key from [Google AI Studio](https://aistudio.google.com/apikey). And create a `.env` file in the root of the project.
```
NEXT_PUBLIC_GEMINI_API_KEY=your_api_key_here
```
### 3. Start the development server:
```
npm run dev
```
Open http://localhost:3000 and start uploading images!
## Prompt Transparency
The prompt to transform images into p5js sketches can be found in `pages/index.js`.
```
You are a creative coding expert who turns images into clever code sketches using p5js. A user will upload an image and you will generate a interactive p5js sketch that represents the image. The code sketch always has some sort of interactive element that connects to the nature of the object in the real world.
## EXAMPLES
Here are some examples of what I mean by how the type of image could be turned into a clever creative coding sketch to capture the essence of the image.
- A photo of birds --> a boids flocking algorithm sketch where the boids follow your mouse
- A photo of a tree --> a recursive fractal tree that grows as you move your mouse up and down
- A photo of a pond --> a sketch that has a ripple animation on mouse click
- A photo of a wristwatch --> beautiful functioning clock that accesses system time and displays it like the wristwatch
- A photo of a lamp --> a sketch of the lamp, but when you click the screen the lamp turns on and off
- A photo of a zipper --> a sketch representing the shapes of the zipper, and when you move your mouse up and down the zipper opens and closes like a real zipper
## PROCESS
To achieve creating this sketch, you reflect and meditate on the nature of the object BEFORE picking an algorithmic approach to represent the image. You are an agent that is thoughtful, clever, delightful, and playful.
Before you start, think about the image and the best way to represent it in p5js.
1. Describe the behavioral properties of the image. List some ways it behaves in the real world or some patterns it exhibits. Describe the colors and vibe of the image as well.
2. Given the behavorial properties of the image, identify a common creative coding algorithm that can be paired up to this image to make a delightful p5js sketch.
3. State the bounding boxes of the important parts of the composition of the photo. We will need to use these bounding boxes to make sure our composition of our sketch resembles the composition of the photo uploaded. Our sketch's composition needs to resemble the composition of the uploaded photo.
4. Implement a algorithm in p5js, using the properties of the image described earlier. Use either mouseMoved() or mouseClicked() to make it interactive. Generate a SINGLE, COMPLETE code snippet. We parse out the response you generate, so we should have only ONE code snippet that incorporates all of the information from steps 1 (behavioral description), 2 (creative coding algorithm to bring this to life), 3 (bounding boxes to preserve compositional integrity).
## EXECUTION
Complete all of these steps. When you write your code, be sure to leave clear comments to describe the different parts of the code and what you are doing.
Do not EVER try to load in external images or any other libraries. Everything must be self contained in the one file and code snippet.
And don't be too verbose.
```
## Credits
Code by [Trudy Painter](https://www.trudy.computer/). Design by [Jose Guizar](https://joseguizar.com/).
## Contributing 🤝
Contributions are welcome! See the `CONTRIBUTING.md` file for more information.
## Disclaimer
This is an experiment showcasing Gemini 2.0's capabilities, not an official Google product. We'll do our best to support and maintain this experiment but your mileage may vary. We encourage open sourcing projects as a way of learning from each other. Please respect our and other creators' rights, including copyright and trademark rights when present, when sharing these works and creating derivative work. If you want more info on Google's policy, you can find that [here](https://www.google.com/permissions/).
## License
Licensed under the Apache-2.0 license.
================================================
FILE: image-to-code/components/CodePreview.js
================================================
/**
* Copyright 2025 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import React, { useState } from 'react';
import { Code2, Play, Copy, Check, MessageCircle } from 'lucide-react';
import Editor from '@monaco-editor/react';
import ReactMarkdown from 'react-markdown';
import remarkGfm from 'remark-gfm';
import ToggleButton from './ToggleButton';
const CodePreview = ({ output, onCodeChange, fullResponse }) => {
const [showCode, setShowCode] = useState(false);
const [showReasoning, setShowReasoning] = useState(false);
const [isCopied, setIsCopied] = useState(false);
const handleCopy = async () => {
try {
await navigator.clipboard.writeText(output.code);
setIsCopied(true);
setTimeout(() => setIsCopied(false), 2000);
} catch (err) {
console.error('Failed to copy code:', err);
}
};
const renderSketch = (code) => {
// Make sure we're working with a string
const codeString = typeof code === 'string' ? code : code.toString();
const wrappedCode = codeString.includes('function setup()') ? codeString : `
function setup() {
createCanvas(500, 500);
${codeString}
}
function draw() {
// Add default draw function if not present
if (typeof window.draw !== 'function') {
window.draw = function() {};
}
}
`;
const formattedCodeResponse = `
p5.js Sketch
`;
return (
);
};
// Make sure we're passing the actual code string to renderSketch
const sketchCode = output?.code || '';
return (
);
};
export default Header;
================================================
FILE: image-to-code/components/ToggleButton.js
================================================
import React from 'react';
const ToggleButton = ({ icon: Icon, label, isSelected, onClick }) => {
const baseStyles = "inline-flex items-center gap-1 px-3.5 py-2.5 text-sm rounded-full transition-all duration-200";
const selectedStyles = "bg-black text-white";
const unselectedStyles = "bg-gray-200 text-gray-700";
return (
);
};
export default ToggleButton;
================================================
FILE: image-to-code/jsconfig.json
================================================
{
"compilerOptions": {
"paths": {
"@/*": ["./*"]
}
}
}
================================================
FILE: image-to-code/next.config.mjs
================================================
/**
* Copyright 2025 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** @type {import('next').NextConfig} */
const nextConfig = {
reactStrictMode: true,
};
export default nextConfig;
================================================
FILE: image-to-code/package.json
================================================
{
"name": "service-image-code-2",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "next dev --turbopack",
"build": "next build",
"start": "next start",
"lint": "next lint"
},
"dependencies": {
"@google/generative-ai": "^0.21.0",
"@monaco-editor/react": "^4.6.0",
"lucide-react": "^0.469.0",
"next": "15.1.3",
"react": "^19.0.0",
"react-dom": "^19.0.0",
"react-dropzone": "^14.3.5",
"react-markdown": "^9.0.3",
"remark-gfm": "^4.0.0"
},
"devDependencies": {
"@tailwindcss/typography": "^0.5.16",
"postcss": "^8",
"tailwindcss": "^3.4.1"
}
}
================================================
FILE: image-to-code/pages/_app.js
================================================
/**
* Copyright 2025 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import "@/styles/globals.css";
export default function App({ Component, pageProps }) {
return ;
}
================================================
FILE: image-to-code/pages/_document.js
================================================
/**
* Copyright 2025 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { Html, Head, Main, NextScript } from "next/document";
export default function Document() {
return (
);
}
================================================
FILE: image-to-code/pages/api/hello.js
================================================
/**
* Copyright 2025 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Next.js API route support: https://nextjs.org/docs/api-routes/introduction
export default function handler(req, res) {
res.status(200).json({ name: "John Doe" });
}
================================================
FILE: image-to-code/pages/index.js
================================================
/**
* Copyright 2025 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import React, { useState, useCallback, useEffect } from "react";
import { useDropzone } from "react-dropzone";
import { GoogleGenerativeAI } from "@google/generative-ai";
import { ChevronDown, Image, Upload, Settings, Send, History, ArrowRight, Pen, Layers } from "lucide-react";
import Head from "next/head";
import CodePreview from "../components/CodePreview";
import Header from '../components/Header';
const SAMPLE_IMAGES = [
'beeripple.jpeg',
'bubbles.jpeg',
'clock.png',
'flower.jpeg',
'garage.jpeg',
'sconce.jpeg',
'steam.jpeg',
'tree.png',
"birds.jpeg",
"bubblemachine.png",
];
// Initialize Gemini AI model
const MODEL_NAME = "gemini-2.0-flash-exp";
const genAI = new GoogleGenerativeAI(process.env.NEXT_PUBLIC_GEMINI_API_KEY);
const model = genAI.getGenerativeModel({ model: MODEL_NAME });
// Helper function to generate code from image
async function generateCodeFromImage(imageBase64, prompt, userInput) {
const image = {
inlineData: {
data: imageBase64.split(",")[1],
mimeType: "image/jpeg",
},
};
const finalPrompt = userInput.trim()
? `${prompt}\n\nUser input: ${userInput}`
: prompt;
const result = await model.generateContent([finalPrompt, image]);
const response = result.response.text();
const regex = /```(?:javascript|js)?\s*([\s\S]*?)```/g;
const match = regex.exec(response);
const extractedCode = match ? match[1].trim() : response;
return {
fullResponse: response,
code: extractedCode
};
}
export default function Home() {
const [imageBase64, setImageBase64] = useState("");
const [outputs, setOutputs] = useState([]);
const [loading, setLoading] = useState(false);
const [hasStartedGenerating, setHasStartedGenerating] = useState(false);
const [selectedOutput, setSelectedOutput] = useState(null);
const [concurrentRequests, setConcurrentRequests] = useState(5);
const [showPrompt, setShowPrompt] = useState(false);
const [prompt, setPrompt] = useState("");
// Load prompt from localStorage on initial render
useEffect(() => {
const savedPrompt = localStorage.getItem('savedPrompt');
if (savedPrompt) {
setPrompt(savedPrompt);
} else {
const defaultPrompt = `You are a creative coding expert who turns images into
clever code sketches using p5js. A user will upload an image and you will
generate a interactive p5js sketch that represents the image.
The code sketch always has some sort of interactive element that
connects to the nature of the object in the real world.
## EXAMPLES
Here are some examples of what I mean by how the type of image could
be turned into a clever creative coding sketch to capture the essence of the image.
- A photo of birds --> a boids flocking algorithm sketch where the boids follow your mouse
- A photo of a tree --> a recursive fractal tree that grows as you move your mouse up and down
- A photo of a pond --> a sketch that has a ripple animation on mouse click
- A photo of a wristwatch --> beautiful functioning clock that
accesses system time and displays it like the wristwatch
- A photo of a lamp --> a sketch of the lamp, but when you click
the screen the lamp turns on and off
- A photo of a zipper --> a sketch representing the shapes of the zipper,
and when you move your mouse up and down the zipper opens and closes like a real zipper
## PROCESS
To achieve creating this sketch, you reflect and
meditate on the nature of the object BEFORE picking an algorithmic
approach to represent the image. You are an agent that is thoughtful,
clever, delightful, and playful.
Before you start, think about the image and the best way to represent it in p5js.
1. Describe the behavioral properties of the image. List some ways it
behaves in the real world or some patterns it exhibits. Describe the
colors and vibe of the image as well.
2. Given the behavorial properties of the image, identify a common creative
coding algorithm that can be paired up to this image to make a delightful p5js sketch.
3. State the bounding boxes of the important parts of the composition
of the photo. We will need to use these bounding boxes to make sure our
composition of our sketch resembles the composition of the photo uploaded.
Our sketch's composition needs to resemble the composition of the uploaded photo.
4. Implement a algorithm in p5js, using the properties of the image described
earlier. Use either mouseMoved() or mouseClicked() to make it interactive.
Generate a SINGLE, COMPLETE code snippet. We parse out the response you generate,
so we should have only ONE code snippet that incorporates all of the information
from steps 1 (behavioral description), 2 (creative coding algorithm to bring this to life),
3 (bounding boxes to preserve compositional integrity).
## EXECUTION
Complete all of these steps. When you write your code, be sure to leave clear
comments to describe the different parts of the code and what you are doing.
Do not EVER try to load in external images or any other libraries.
Everything must be self contained in the one file and code snippet.
And don't be too verbose.`
.trim();
setPrompt(defaultPrompt);
localStorage.setItem('savedPrompt', defaultPrompt);
}
}, []);
// Save prompt to localStorage whenever it changes
useEffect(() => {
if (prompt) {
localStorage.setItem('savedPrompt', prompt);
}
}, [prompt]);
const [showSamples, setShowSamples] = useState(false);
const [selectedSample, setSelectedSample] = useState(null);
const [userInput, setUserInput] = useState("");
const [imageDetails, setImageDetails] = useState(null);
const onDrop = useCallback((acceptedFiles) => {
const file = acceptedFiles[0];
const reader = new FileReader();
reader.onload = (event) => {
const img = document.createElement("img");
img.src = event.target.result;
img.onload = () => {
const canvas = document.createElement("canvas");
const scaleFactor = 512 / img.width;
canvas.width = 512;
canvas.height = img.height * scaleFactor;
const ctx = canvas.getContext("2d");
ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
setImageBase64(canvas.toDataURL());
setImageDetails({
name: file.name,
size: `${(file.size / 1024).toFixed(2)}kB`,
type: file.type
});
};
};
reader.readAsDataURL(file);
}, []);
const { getRootProps, getInputProps, isDragActive } = useDropzone({
onDrop,
accept: "image/*",
});
const generateCode = async () => {
if (!imageBase64) return;
setLoading(true);
setHasStartedGenerating(true);
setOutputs([]);
try {
const requests = Array(concurrentRequests)
.fill()
.map(() => generateCodeFromImage(imageBase64, prompt, userInput));
const results = await Promise.all(requests);
setOutputs(results.map((result, index) => ({
id: index + 1,
code: result.code,
fullResponse: result.fullResponse
})));
} catch (error) {
console.error("Error generating code:", error);
} finally {
setLoading(false);
}
};
const renderSketch = (code) => {
const formattedCodeResponse = `
p5.js Sketch
`;
return (
);
};
const handleCodeChange = (id, newCode) => {
setOutputs((prevOutputs) =>
prevOutputs.map((output) =>
output.id === id ? { ...output, code: newCode } : output
)
);
};
const handleSampleSelect = async (imageName) => {
setSelectedSample(imageName);
try {
const response = await fetch(`/samples/${imageName}`);
const blob = await response.blob();
const reader = new FileReader();
reader.onload = (event) => {
const img = document.createElement("img");
img.src = event.target.result;
img.onload = () => {
const canvas = document.createElement("canvas");
const scaleFactor = 512 / img.width;
canvas.width = 512;
canvas.height = img.height * scaleFactor;
const ctx = canvas.getContext("2d");
ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
setImageBase64(canvas.toDataURL());
};
};
reader.readAsDataURL(blob);
} catch (error) {
console.error('Error loading sample image:', error);
}
};
return (
<>
Image to Code
{imageBase64 ? (
) : (
<>
{isDragActive
? "Drop the image here"
: "Drag & drop an image here, or click to select one"}
================================================
FILE: multimodal-embeddings/src/lib/api.ts
================================================
import * as THREE from 'three';
export type StoredImage = {
embeddings: number[];
filePath: string;
thumbPath: string;
label?: string;
};
export type EmbeddingRequest = {
imageBytes?: Buffer | string;
text?: string;
};
export type SearchRequest = {
collection: string;
embeddings: number[];
};
export type SearchResponse = {
embeddings: number[];
searchResults: StoredImage[];
};
export type Collection = {
id: string;
path: string;
};
// typescript helper since all threlte events use it and its not exported
export type InteractionEvent = THREE.Intersection & {
intersections: THREE.Intersection[]; // The first intersection of each intersected object
object: THREE.Object3D; // The object that was actually hit
eventObject: THREE.Object3D; // The object that registered the event
camera: THREE.Camera; // The camera used for raycasting
delta: THREE.Vector2; // Distance between mouse down and mouse up event in pixels
nativeEvent: MouseEvent | PointerEvent | WheelEvent; // The native browser event
pointer: THREE.Vector2; // The pointer position in normalized device coordinates
ray: THREE.Ray; // The ray used for raycasting
stopPropagation: () => void; // Function to stop propagation of the event
stopped: Boolean; // Whether the event propagation has been stopped
};
================================================
FILE: multimodal-embeddings/src/lib/cloud-firebase.ts
================================================
/**
* Yes, there is another firebase.ts in $lib/ but this one has to use
* special `@google-cloud/firestore` package instead, which contains
* the necessary VectorQuery findNearest() method and a simple listCollections() method
*
* Sadly, this library completely ignores the emulator API, so we can't include
* data for you to test with specific collections. It will always try to reach
* your prod Firestore database online.
*/
import { Firestore, FieldValue } from '@google-cloud/firestore';
import { FIRESTORE_DB_ID } from './consts';
import type { VectorQuery, VectorQuerySnapshot } from '@google-cloud/firestore';
const db = new Firestore({ databaseId: FIRESTORE_DB_ID });
export const listCollections = async () => {
try {
const collections = await db.listCollections();
return collections.map((collection) => {
return { id: collection.id, path: collection.path };
});
} catch (error) {
console.error(error);
return { error };
}
};
export const vectorSearch = async (collection: string, imageEmbedding: number[]) => {
const coll = db.collection(collection);
const vectorQuery: VectorQuery = await coll.findNearest(
'embeddings',
FieldValue.vector(imageEmbedding),
{
limit: 30,
distanceMeasure: 'EUCLIDEAN' // other options COSINE and DOT_PRODUCT
}
);
const snapshot: VectorQuerySnapshot = await vectorQuery.get();
return snapshot.docs;
};
================================================
FILE: multimodal-embeddings/src/lib/components/CollectionsList.svelte
================================================
{#if collections}
{#each collections as collection}
{collection.id}
{/each}
{/if}
================================================
FILE: multimodal-embeddings/src/lib/components/Droppable.svelte
================================================
================================================
FILE: multimodal-embeddings/src/lib/components/ui/button/button.svelte
================================================
================================================
FILE: multimodal-embeddings/src/lib/components/ui/button/index.ts
================================================
import type { Button as ButtonPrimitive } from "bits-ui";
import { type VariantProps, tv } from "tailwind-variants";
import Root from "./button.svelte";
const buttonVariants = tv({
base: "inline-flex items-center justify-center whitespace-nowrap rounded-md text-sm font-medium transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:pointer-events-none disabled:opacity-50",
variants: {
variant: {
default: "bg-primary text-primary-foreground shadow hover:bg-primary/90",
destructive:
"bg-destructive text-destructive-foreground shadow-sm hover:bg-destructive/90",
outline:
"border border-input bg-background shadow-sm hover:bg-accent hover:text-accent-foreground",
secondary: "bg-secondary text-secondary-foreground shadow-sm hover:bg-secondary/80",
ghost: "hover:bg-accent hover:text-accent-foreground",
link: "text-primary underline-offset-4 hover:underline",
},
size: {
default: "h-9 px-4 py-2",
sm: "h-8 rounded-md px-3 text-xs",
lg: "h-10 rounded-md px-8",
icon: "h-9 w-9",
},
},
defaultVariants: {
variant: "default",
size: "default",
},
});
type Variant = VariantProps["variant"];
type Size = VariantProps["size"];
type Props = ButtonPrimitive.Props & {
variant?: Variant;
size?: Size;
};
type Events = ButtonPrimitive.Events;
export {
Root,
type Props,
type Events,
//
Root as Button,
type Props as ButtonProps,
type Events as ButtonEvents,
buttonVariants,
};
================================================
FILE: multimodal-embeddings/src/lib/components/ui/dropdown-menu/dropdown-menu-checkbox-item.svelte
================================================
================================================
FILE: multimodal-embeddings/src/lib/components/ui/dropdown-menu/dropdown-menu-content.svelte
================================================
================================================
FILE: multimodal-embeddings/src/lib/components/ui/dropdown-menu/dropdown-menu-item.svelte
================================================
================================================
FILE: multimodal-embeddings/src/lib/components/ui/dropdown-menu/dropdown-menu-label.svelte
================================================
================================================
FILE: multimodal-embeddings/src/lib/components/ui/dropdown-menu/dropdown-menu-radio-group.svelte
================================================
================================================
FILE: multimodal-embeddings/src/lib/components/ui/dropdown-menu/dropdown-menu-radio-item.svelte
================================================
================================================
FILE: multimodal-embeddings/src/lib/components/ui/dropdown-menu/dropdown-menu-separator.svelte
================================================
================================================
FILE: multimodal-embeddings/src/lib/components/ui/dropdown-menu/dropdown-menu-shortcut.svelte
================================================
================================================
FILE: multimodal-embeddings/src/lib/components/ui/dropdown-menu/dropdown-menu-sub-content.svelte
================================================
================================================
FILE: multimodal-embeddings/src/lib/components/ui/dropdown-menu/dropdown-menu-sub-trigger.svelte
================================================
================================================
FILE: multimodal-embeddings/src/lib/components/ui/dropdown-menu/index.ts
================================================
import { DropdownMenu as DropdownMenuPrimitive } from "bits-ui";
import Item from "./dropdown-menu-item.svelte";
import Label from "./dropdown-menu-label.svelte";
import Content from "./dropdown-menu-content.svelte";
import Shortcut from "./dropdown-menu-shortcut.svelte";
import RadioItem from "./dropdown-menu-radio-item.svelte";
import Separator from "./dropdown-menu-separator.svelte";
import RadioGroup from "./dropdown-menu-radio-group.svelte";
import SubContent from "./dropdown-menu-sub-content.svelte";
import SubTrigger from "./dropdown-menu-sub-trigger.svelte";
import CheckboxItem from "./dropdown-menu-checkbox-item.svelte";
const Sub = DropdownMenuPrimitive.Sub;
const Root = DropdownMenuPrimitive.Root;
const Trigger = DropdownMenuPrimitive.Trigger;
const Group = DropdownMenuPrimitive.Group;
export {
Sub,
Root,
Item,
Label,
Group,
Trigger,
Content,
Shortcut,
Separator,
RadioItem,
SubContent,
SubTrigger,
RadioGroup,
CheckboxItem,
//
Root as DropdownMenu,
Sub as DropdownMenuSub,
Item as DropdownMenuItem,
Label as DropdownMenuLabel,
Group as DropdownMenuGroup,
Content as DropdownMenuContent,
Trigger as DropdownMenuTrigger,
Shortcut as DropdownMenuShortcut,
RadioItem as DropdownMenuRadioItem,
Separator as DropdownMenuSeparator,
RadioGroup as DropdownMenuRadioGroup,
SubContent as DropdownMenuSubContent,
SubTrigger as DropdownMenuSubTrigger,
CheckboxItem as DropdownMenuCheckboxItem,
};
================================================
FILE: multimodal-embeddings/src/lib/components/ui/input/index.ts
================================================
import Root from "./input.svelte";
export type FormInputEvent = T & {
currentTarget: EventTarget & HTMLInputElement;
};
export type InputEvents = {
blur: FormInputEvent;
change: FormInputEvent;
click: FormInputEvent;
focus: FormInputEvent;
focusin: FormInputEvent;
focusout: FormInputEvent;
keydown: FormInputEvent;
keypress: FormInputEvent;
keyup: FormInputEvent;
mouseover: FormInputEvent;
mouseenter: FormInputEvent;
mouseleave: FormInputEvent;
mousemove: FormInputEvent;
paste: FormInputEvent;
input: FormInputEvent;
wheel: FormInputEvent;
};
export {
Root,
//
Root as Input,
};
================================================
FILE: multimodal-embeddings/src/lib/components/ui/input/input.svelte
================================================
================================================
FILE: multimodal-embeddings/src/lib/components/ui/label/index.ts
================================================
import Root from "./label.svelte";
export {
Root,
//
Root as Label,
};
================================================
FILE: multimodal-embeddings/src/lib/components/ui/label/label.svelte
================================================
================================================
FILE: multimodal-embeddings/src/lib/consts.ts
================================================
// We can set some reasonable defaults for testing with exported data
// using the Firebase emulators. Follow along in the README for more info.
export const useEmulator = import.meta.env.VITE_USE_EMULATOR;
// due to our emulator data being a direct, binary export of our testing databases
// the name here reflects the way for the emulator to properly ingest it for testing
// DONT FORGET - if using the emulator UI - when viewing Firestore, replace 'default'
// with 'cl-demos-firestore' in the URL so you can see the imported test data
export const FIRESTORE_DB_ID = useEmulator ? 'cl-demos-firestore' : 'your-project-firestore-db';
export const FIREBASE_STORAGE_EMULATOR_BUCKET = 'mm-demo.appspot.com'; // same as above, for emulator
export const DEFAULT_EMULATOR_FIRESTORE_COLLECTION = 'weather';
export const FIREBASE_PROJECT_ID = 'your-project-id'; // this should match the auto generated firebase.rc after running `firebase init`
export const FIREBASE_CONFIG = {
projectId: FIREBASE_PROJECT_ID,
storageBucket: useEmulator
? FIREBASE_STORAGE_EMULATOR_BUCKET
: `${FIREBASE_PROJECT_ID}.appspot.com`
// while the above might work for initial testing, follow the link below for proper values
// rest of your config, from the firebase console
// directions: https://firebase.google.com/docs/web/setup
};
================================================
FILE: multimodal-embeddings/src/lib/embedder.ts
================================================
/**
* See https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-multimodal-embedding
* for more info on how this is built.
*/
import * as aiplatform from '@google-cloud/aiplatform';
import type { EmbeddingRequest } from './api';
import { FIREBASE_PROJECT_ID } from './consts';
const { PredictionServiceClient } = aiplatform.v1;
const { helpers } = aiplatform;
const clientOptions = {
apiEndpoint: 'us-central1-aiplatform.googleapis.com'
};
const project = FIREBASE_PROJECT_ID; // your firebase/cloud project
const location = 'us-central1';
const publisher = 'google';
const model = 'multimodalembedding@001';
const predictionServiceClient = new PredictionServiceClient(clientOptions);
const endpoint = `projects/${project}/locations/${location}/publishers/${publisher}/models/${model}`;
type Prompt = {
image?: {
bytesBase64Encoded: string;
};
text?: string;
};
export const getEmbeddings = async (embedRequest: EmbeddingRequest) => {
// all params:
// https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-embeddings-api#parameter-list
let prompt: Prompt = {};
if (embedRequest.imageBytes) {
let b64 = '';
if (typeof embedRequest.imageBytes === 'string') {
b64 = embedRequest.imageBytes;
} else {
b64 = embedRequest.imageBytes.toString('base64');
}
prompt.image = {
bytesBase64Encoded: b64
};
}
if (embedRequest.text) {
prompt.text = embedRequest.text;
}
console.log('prompt:', prompt);
const instanceValue = helpers.toValue(prompt);
const instances = [instanceValue];
const parameter = {
sampleCount: 1
};
const parameters = helpers.toValue(parameter);
const request = {
endpoint,
instances,
parameters
};
// Predict request
const [response] = await predictionServiceClient.predict(request);
const predictions = response.predictions || [];
if (predictions && predictions.length > 0) {
const availableFields = predictions[0].structValue.fields;
let embedResponse = {};
if (availableFields.imageEmbedding) {
embedResponse.imageEmbeddings = availableFields.imageEmbedding.listValue.values.map(
(vals) => vals.numberValue
);
}
if (availableFields.textEmbedding) {
embedResponse.textEmbeddings = availableFields.textEmbedding.listValue.values.map(
(vals) => vals.numberValue
);
}
console.log('Response generated:', embedResponse);
return embedResponse;
} else {
throw Error('Failure to create embeddings');
}
};
================================================
FILE: multimodal-embeddings/src/lib/firebase.ts
================================================
import { initializeApp } from 'firebase/app';
import { collection, connectFirestoreEmulator, getDocs, getFirestore } from 'firebase/firestore';
import { getStorage, ref, getDownloadURL, connectStorageEmulator } from 'firebase/storage';
import {
FIREBASE_CONFIG,
FIREBASE_STORAGE_EMULATOR_BUCKET,
FIRESTORE_DB_ID,
useEmulator
} from './consts';
export const app = initializeApp(FIREBASE_CONFIG);
export const fs = getFirestore(FIRESTORE_DB_ID);
// The emulator export comes with its own storage bucket name, used here
export let storage = useEmulator
? getStorage(app, FIREBASE_STORAGE_EMULATOR_BUCKET)
: getStorage(app);
if (useEmulator) {
connectFirestoreEmulator(fs, 'localhost', 8080);
connectStorageEmulator(storage, 'localhost', 9199);
}
export const allDocsInCollection = async (collName: string) => {
return await getDocs(collection(fs, collName));
};
export const getThumb = async (thumbPath: string) => {
return await getDownloadURL(ref(storage, thumbPath));
};
================================================
FILE: multimodal-embeddings/src/lib/store.ts
================================================
import { writable } from 'svelte/store';
import type { UMAPParameters } from 'umap-js';
import type { DocumentData } from 'firebase/firestore';
export type ImageData = {
collection: string;
doc: DocumentData;
umap?: number[];
label?: string;
ref?: any;
};
export type ImageCollection = {
name: string;
images: ImageData[];
};
// component access hack
type RemapFunction = () => Promise;
export const remapFn = writable();
export const collections = writable([]);
export const allImages = writable([]);
// Settings
// UMAP bindings
export const umapParams = writable({
nComponents: 3,
nNeighbors: 15,
nEpochs: 400,
minDist: 0.5,
spread: 1.5
});
================================================
FILE: multimodal-embeddings/src/lib/utils.ts
================================================
import { type ClassValue, clsx } from 'clsx';
import { twMerge } from 'tailwind-merge';
import { cubicOut } from 'svelte/easing';
import type { TransitionConfig } from 'svelte/transition';
export function cn(...inputs: ClassValue[]) {
return twMerge(clsx(inputs));
}
type FlyAndScaleParams = {
y?: number;
x?: number;
start?: number;
duration?: number;
};
export const flyAndScale = (
node: Element,
params: FlyAndScaleParams = { y: -8, x: 0, start: 0.95, duration: 150 }
): TransitionConfig => {
const style = getComputedStyle(node);
const transform = style.transform === 'none' ? '' : style.transform;
const scaleConversion = (valueA: number, scaleA: [number, number], scaleB: [number, number]) => {
const [minA, maxA] = scaleA;
const [minB, maxB] = scaleB;
const percentage = (valueA - minA) / (maxA - minA);
const valueB = percentage * (maxB - minB) + minB;
return valueB;
};
const styleToString = (style: Record): string => {
return Object.keys(style).reduce((str, key) => {
if (style[key] === undefined) return str;
return str + `${key}:${style[key]};`;
}, '');
};
return {
duration: params.duration ?? 200,
delay: 0,
css: (t) => {
const y = scaleConversion(t, [0, 1], [params.y ?? 5, 0]);
const x = scaleConversion(t, [0, 1], [params.x ?? 0, 0]);
const scale = scaleConversion(t, [0, 1], [params.start ?? 0.95, 1]);
return styleToString({
transform: `${transform} translate3d(${x}px, ${y}px, 0) scale(${scale})`,
opacity: t
});
},
easing: cubicOut
};
};
export const postJson = async (url: string, data: any) => {
console.log(`Call ${url}`, data);
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(data)
});
const json = await response.json();
if (json.error) throw json.error;
return json;
};
================================================
FILE: multimodal-embeddings/src/routes/+layout.svelte
================================================
================================================
FILE: multimodal-embeddings/src/routes/+page.svelte
================================================
Nope.
================================================
FILE: multimodal-embeddings/src/routes/api/getEmbeddings/+server.ts
================================================
import { json } from '@sveltejs/kit';
import { getEmbeddings } from '$lib/embedder';
import type { EmbeddingRequest } from '$lib/api.js';
/**
* Search API method using image embeddings for use in the /search route
* @param request
* @returns the embeddings and the results of the vector search
*/
export async function POST({ request }) {
try {
const embedRequest = (await request.json()) as EmbeddingRequest;
console.log('/api/getEmbeddings:', embedRequest);
const embeddings = await getEmbeddings(embedRequest);
// console.log('embedResult:', embeddings);
return json({ embeddings });
} catch (error) {
console.error(error);
return json({ error });
}
}
================================================
FILE: multimodal-embeddings/src/routes/api/listCollections/+server.ts
================================================
import { json } from '@sveltejs/kit';
import { listCollections } from '$lib/cloud-firebase';
/**
* Search API method using image embeddings for use in the /search route
* @param request
* @returns the embeddings and the results of the vector search
*/
export async function POST() {
try {
const collections = await listCollections();
console.log('/api/listCollections:', collections);
return json({ collections });
} catch (error) {
console.error(error);
return json({ error });
}
}
================================================
FILE: multimodal-embeddings/src/routes/api/search/+server.ts
================================================
import { json } from '@sveltejs/kit';
import { vectorSearch } from '$lib/cloud-firebase';
/**
* Search API method using image embeddings for use in the /search route
* @param request
* @returns the embeddings and the results of the vector search
*/
export async function POST({ request }) {
try {
const body = await request.json();
console.log('/api/search:', body);
const searchResultDocs = await vectorSearch(body.collection, body.embeddings);
const searchResults = searchResultDocs.map((doc) => doc.data());
return json({ searchResults });
} catch (error) {
console.error(error);
return json({ error });
}
}
================================================
FILE: multimodal-embeddings/src/routes/api/umap/+server.ts
================================================
import { UMAP, type UMAPParameters } from 'umap-js';
import { json } from '@sveltejs/kit';
export const POST = async ({ request }) => {
try {
const data = await request.json();
const modelOpts: UMAPParameters = {
nComponents: data.dimensions,
nEpochs: data.epochs,
nNeighbors: data.neighbors,
minDist: data.minDist,
spread: data.spread
};
console.debug(modelOpts);
const model = new UMAP(modelOpts);
const mapped = await model.fitAsync(data.embeddings);
console.log(mapped[0]);
return json({ data: mapped });
} catch (err) {
console.error(err);
return json({ err });
}
};
================================================
FILE: multimodal-embeddings/src/routes/search/+page.svelte
================================================
My Personal Search
{#if file}
{/if}
{#if $searchState != SearchState.Waiting}
{$searchState}
{/if}
{#if errorResponse.length > 0}
{errorResponse}
{/if}
{#if searchResponse}
{#each searchResponse.searchResults as result}
{/each}
{/if}
================================================
FILE: multimodal-embeddings/svelte.config.js
================================================
import adapter from 'svelte-adapter-appengine';
import { vitePreprocess } from '@sveltejs/vite-plugin-svelte';
/** @type {import('@sveltejs/kit').Config} */
const config = {
// Consult https://kit.svelte.dev/docs/integrations#preprocessors
// for more information about preprocessors
preprocess: vitePreprocess(),
kit: {
// adapter-auto only supports some environments, see https://kit.svelte.dev/docs/adapter-auto for a list.
// If your environment is not supported, or you settled on a specific environment, switch out the adapter.
// See https://kit.svelte.dev/docs/adapters for more information about adapters.
adapter: adapter({ nodejsRuntime: 20 })
}
};
export default config;
================================================
FILE: multimodal-embeddings/tailwind.config.ts
================================================
import { fontFamily } from "tailwindcss/defaultTheme";
import type { Config } from "tailwindcss";
const config: Config = {
darkMode: ["class"],
content: ["./src/**/*.{html,js,svelte,ts}"],
safelist: ["dark"],
theme: {
container: {
center: true,
padding: "2rem",
screens: {
"2xl": "1400px"
}
},
extend: {
colors: {
border: "hsl(var(--border) / )",
input: "hsl(var(--input) / )",
ring: "hsl(var(--ring) / )",
background: "hsl(var(--background) / )",
foreground: "hsl(var(--foreground) / )",
primary: {
DEFAULT: "hsl(var(--primary) / )",
foreground: "hsl(var(--primary-foreground) / )"
},
secondary: {
DEFAULT: "hsl(var(--secondary) / )",
foreground: "hsl(var(--secondary-foreground) / )"
},
destructive: {
DEFAULT: "hsl(var(--destructive) / )",
foreground: "hsl(var(--destructive-foreground) / )"
},
muted: {
DEFAULT: "hsl(var(--muted) / )",
foreground: "hsl(var(--muted-foreground) / )"
},
accent: {
DEFAULT: "hsl(var(--accent) / )",
foreground: "hsl(var(--accent-foreground) / )"
},
popover: {
DEFAULT: "hsl(var(--popover) / )",
foreground: "hsl(var(--popover-foreground) / )"
},
card: {
DEFAULT: "hsl(var(--card) / )",
foreground: "hsl(var(--card-foreground) / )"
}
},
borderRadius: {
lg: "var(--radius)",
md: "calc(var(--radius) - 2px)",
sm: "calc(var(--radius) - 4px)"
},
fontFamily: {
sans: [...fontFamily.sans]
}
}
},
};
export default config;
================================================
FILE: multimodal-embeddings/tsconfig.json
================================================
{
"extends": "./.svelte-kit/tsconfig.json",
"compilerOptions": {
"allowJs": true,
"checkJs": true,
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"resolveJsonModule": true,
"skipLibCheck": true,
"sourceMap": true,
"strict": true,
"moduleResolution": "Bundler"
}
// Path aliases are handled by https://kit.svelte.dev/docs/configuration#alias
// except $lib which is handled by https://kit.svelte.dev/docs/configuration#files
//
// If you want to overwrite includes/excludes, make sure to copy over the relevant includes/excludes
// from the referenced tsconfig.json - TypeScript does not merge them in
}
================================================
FILE: multimodal-embeddings/vite.config.ts
================================================
import { sveltekit } from '@sveltejs/kit/vite';
import { defineConfig } from 'vite';
export default defineConfig(({ command, mode }) => {
const isProduction = mode === 'production';
const isLocal = !isProduction && process.env.NODE_ENV !== 'test';
const useEmulator = isLocal && process.env.USE_EM === 'true';
// Set in the npm run dev:emulate command
console.log(`isProduction: ${isProduction}, isLocal: ${isLocal}, useEmulator: ${useEmulator} `);
return {
plugins: [sveltekit()],
ssr: {
noExternal: ['three']
},
define: {
'import.meta.env.VITE_USE_EMULATOR': useEmulator
}
};
});
================================================
FILE: video-scrubber/.eslintrc.cjs
================================================
module.exports = {
root: true,
env: { browser: true, es2020: true },
extends: [
'eslint:recommended',
'plugin:@typescript-eslint/recommended',
'plugin:react-hooks/recommended',
],
ignorePatterns: ['dist', '.eslintrc.cjs'],
parser: '@typescript-eslint/parser',
plugins: ['react-refresh'],
rules: {
'react-refresh/only-export-components': [
'warn',
{ allowConstantExport: true },
],
},
}
================================================
FILE: video-scrubber/.gcloudignore
================================================
# This file specifies files that are *not* uploaded to Google Cloud
# using gcloud. It follows the same syntax as .gitignore, with the addition of
# "#!include" directives (which insert the entries of the given .gitignore-style
# file at that point).
#
# For more information, run:
# $ gcloud topic gcloudignore
#
.gcloudignore
# If you would like to upload your .git directory, .gitignore file or files
# from your .gitignore file, remove the corresponding line
# below:
.git
.gitignore
.git*
# Node.js dependencies:
node_modules/
================================================
FILE: video-scrubber/LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: video-scrubber/README.md
================================================
# Gemini Video Scrubber
GVS is a simple demo application showcasing the power of Gemini 1.5 Pro's video understanding abilities.

Specifically, it allows you to use Gemini to quickly analyze video content by prompting for associated timestamps, and making them clickable and playable as a stream. Additionally, any textual descriptions are added to the video element and played along their timestamps as well.
Just make sure to prompt Gemini with a time-specific request like:
```
Give me 3 cute moments in this video of my cat oscar, returning the timestamps and descriptions. Be playful with your descriptions
# check the "Auto-format" to append format-forcing language onto the prompt.
// Model response:
00:11 Oscar leaps for the toy like a graceful gymnast.
00:16 Oscar's adorable struggle to catch the toy is too cute!
00:20 Oscar's triumphant pounce on the toy, like a champion.
```
Then click "Play all timestamps" to watch a playthrough of the relevant timestamps (+ their duration and 'captions') to see if it's what you were looking for!
This example is playful but we've used this technique internally to find clips from longer videos (up to 1hr!) that are interesting to share across teams. Gemini as your own research assistant!
## Run GVS (locally)
> Hate reading? [Click here for the 🎬Video Walkthrough🎬](https://youtu.be/-kRxs7mrRXU)
We suggest you stick to local usage/development, as large video uploads tend to complicate deployments.
Like any good prototype, getting started is simple:
### Obtain a Gemini API Key
First, create a local `.env` file with your `GEMINI_API_KEY=` obtained from [aistudio.withgoogle.com](https://aistudio.google.com/app/apikey) or your [cloud console](https://ai.google.dev/gemini-api/docs/api-key):
```bash
$ echo "GEMINI_API_KEY='your_api_key'" >> .env
```
### Install deps and run the project:
```bash
$ npm i
$ npm run dev
```
Once running at [localhost:3000](http://localhost:3000), the UI will ask for you to open up a video file.
Once opened, you can do one of two things:
1. Paste a timestamp'ed response you might have gotten from another UI (like [AI Studio](https://aistudio.google.com))
2. Click the Upload To Gemini button to send that video file to the [Gemini File API](https://ai.google.dev/gemini-api/docs/prompting_with_media?lang=node), which handles breaking the video
into its individual frames (at 1fps) and audio stream, creating a short lived identifier, and begin the
tokenization process.
We then simply poll the Files API until the video becomes `ACTIVE`, at which point we can use the video
identifier alongside any text prompt we want to [send to Gemini](https://ai.google.dev/gemini-api/docs/prompting_with_media?lang=node#generate-content-from-image). The File API handles caching the videos
tokens for 48hrs, so any prompts sent from here on out won't need to re-upload or tokenize the video (as long as you continue to use the correct ID in your calls.)
### Additional info
The UI can handle both single (##:##) and ranged (##:##-##:##) timestamps, with singular being padded by the "Default clip duration" option under the video. "Pad clip start" refers to extra time at the start of a timestamp, which we found helpful when doing single-word "supercut"-esque tests. ("ai ai ai ai ai ai")

See [src/Gemini.tsx](https://github.com/trippedout/gemini-video-scrubber/blob/main/src/Gemini.tsx) for the UI implementation and handling of timecodes in responses, and [server/gemini.js](https://github.com/trippedout/gemini-video-scrubber/blob/main/server/gemini.js) for the API calls we make. The server component is necessary as the File API uses node `fs` commands that are unavailable on the client. More _adventurous_ devs could by pass this by wrapping the [media.upload](https://ai.google.dev/api/rest/v1beta/media/upload) call itself :)
Made in collab with the legendary [GrantCuster](https://github.com/GrantCuster), who won the React vs Svelte battle _this time_ 😀
================================================
FILE: video-scrubber/app.yaml
================================================
# Copyright 2024 Google LLC
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# https://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
runtime: nodejs20
service: gvs
default_expiration: "0s"
================================================
FILE: video-scrubber/index.html
================================================
Gemini Video Scrubber
================================================
FILE: video-scrubber/package.json
================================================
{
"name": "gemini-video-scrubber",
"displayName": "Gemini Video Scrubber",
"description": "Simple demo app for Gemini multimodal capabilities with video understanding.",
"repository": {
"url": "https://github.com/trippedout/gemini-video-scrubber"
},
"contributors": [
{
"name": "Anthony Tripaldi",
"url": "http://github.com/trippedout"
},
{
"name": "Grant Custer",
"url": "http://github.com/grantcuster"
}
],
"version": "0.1.0",
"type": "module",
"scripts": {
"start": "NODE_ENV=production node --env-file=.env server/index.js",
"dev": "node --env-file=.env server/index.js",
"preview": "vite preview",
"build": "vite build",
"deploy": "vite build && gcloud app deploy app.yaml --version demo"
},
"dependencies": {
"@google/generative-ai": "^0.11.3",
"@use-gesture/react": "^10.3.1",
"compromise": "^14.11.2",
"cors": "^2.8.5",
"express": "^4.2.0",
"jotai": "^2.6.0",
"jotai-effect": "^1.0.0",
"lucide-react": "^0.294.0",
"markdown-to-jsx": "^7.4.1",
"multer": "^1.4.5-lts.1",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"react-markdown": "^9.0.1",
"vite-express": "^0.14.1"
},
"devDependencies": {
"@tailwindcss/typography": "^0.5.10",
"@types/react": "^18.2.43",
"@types/react-dom": "^18.2.17",
"@typescript-eslint/eslint-plugin": "^6.14.0",
"@typescript-eslint/parser": "^6.14.0",
"@vitejs/plugin-react": "^4.2.1",
"autoprefixer": "^10.4.16",
"eslint": "^8.55.0",
"eslint-plugin-react-hooks": "^4.6.0",
"eslint-plugin-react-refresh": "^0.4.5",
"postcss": "^8.4.32",
"tailwindcss": "^3.3.6",
"typescript": "^5.2.2",
"vite": "^5.0.8"
}
}
================================================
FILE: video-scrubber/postcss.config.js
================================================
// Copyright 2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
export default {
plugins: {
tailwindcss: {},
autoprefixer: {},
},
}
================================================
FILE: video-scrubber/server/gemini.js
================================================
// Copyright 2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { GoogleGenerativeAI } from "@google/generative-ai";
import { GoogleAIFileManager } from "@google/generative-ai/files";
const KEY = process.env["GEMINI_API_KEY"];
const fileManager = new GoogleAIFileManager(KEY);
const genAI = new GoogleGenerativeAI(KEY);
export const uploadVideo = async (file) => {
// TODO check if it exists already ... how?
try {
const uploadResult = await fileManager.uploadFile(file.path, {
displayName: file.originalname,
mimeType: file.mimetype,
})
console.log(`uploadComplete: ${uploadResult.file}`)
return uploadResult.file
} catch (error) {
console.error(error);
throw error;
}
}
export const checkProgress = async (name) => {
try {
const result = await fileManager.getFile(name);
return result;
} catch (error) {
console.error(error);
return { error };
}
}
export const promptVideo = async (processedVideo, prompt, model) => {
try {
const req = [
{ text: prompt },
{
fileData: {
mimeType: processedVideo.mimeType,
fileUri: processedVideo.uri
}
},
];
console.log(`promptVideo with ${model}`, req)
const result = await genAI.getGenerativeModel({ model }).generateContent(req);
console.log(`promptVideo response`, result.response.text())
return {
text: result.response.text(),
candidates: result.response.candidates,
feedback: result.response.promptFeedback
}
} catch (error) {
console.error(error)
return { error }
}
}
================================================
FILE: video-scrubber/server/index.js
================================================
// Copyright 2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import express from "express";
import ViteExpress from "vite-express";
import multer from "multer";
import { checkProgress, promptVideo, uploadVideo } from "./gemini.js";
const app = express();
app.use(express.json());
// need /tmp for appengine and gemini api to access
const upload = multer({ dest: "/tmp/" })
app.post("/api/upload", upload.single('video'), async (req, res) => {
try {
const file = req.file;
const resp = await uploadVideo(file)
console.log(resp);
res.json({ data: resp });
} catch (error) {
res.status(500).json({ error })
}
})
app.post("/api/progress", async (req, res) => {
try {
console.log('/api/progress request', req.body)
const gemFileName = req.body.gemFileName
const progress = await checkProgress(gemFileName)
console.log('/api/progress', progress)
res.json(progress)
} catch (error) {
console.error(error)
res.status(500).json({ error })
}
})
app.post("/api/prompt", async (req, res) => {
try {
const reqData = req.body
console.log('/api/prompt', reqData)
const videoResponse = await promptVideo(reqData.processedVideo, reqData.prompt, reqData.model)
res.json(videoResponse)
} catch (error) {
res.json({ error }, { status: 400 })
}
})
// eslint-disable-next-line no-undef
const port = process.env.NODE_ENV === "production" ? 8080 : 3000;
ViteExpress.listen(app, port, () => console.log("Server is listening..."));
================================================
FILE: video-scrubber/src/Annotations.tsx
================================================
// Copyright 2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { useAtom } from "jotai";
import { playPositionAtom, timestampDefaultDurationAtom, timestampTextAtom } from "./atoms";
import { parseTimestamps, timestampToSeconds } from "./utils";
export function Annotations() {
const [timestampText] = useAtom(timestampTextAtom);
const [playPosition] = useAtom(playPositionAtom);
const [defaultDuration] = useAtom(timestampDefaultDurationAtom);
const timestamps = parseTimestamps(timestampText);
const timestampSeconds = timestamps.map((tobject) => {
return {
start: timestampToSeconds(tobject.start),
end: tobject.end ? timestampToSeconds(tobject.end) : timestampToSeconds(tobject.start) + defaultDuration,
annotation: tobject.annotation
};
});
const activeAnnotation = timestampSeconds.find((range) => {
return playPosition >= range.start && playPosition <= range.end;
})?.annotation;
return
{activeAnnotation ? {activeAnnotation} : null}
;
}
================================================
FILE: video-scrubber/src/App.tsx
================================================
// Copyright 2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { useAtom } from "jotai";
import {
videoElAtom,
} from "./atoms";
import { VideoInput } from "./VideoInput";
import { Video } from "./Video";
import { VideoState } from "./VideoState";
import { Controls } from "./Controls";
import { Timelines } from "./Timelines";
import { ClickableTimestamps } from "./ClickableTimestamps";
import { TimestampText } from "./TimestampText";
import { Annotations } from "./Annotations";
import { Gemini } from "./Gemini";
function App() {
const [videoEl] = useAtom(videoElAtom);
return (
<>
Gemini Video Scrubber
{videoEl ?
<>
> :
}
>
);
}
export default App;
================================================
FILE: video-scrubber/src/ClickableTimestamps.tsx
================================================
// Copyright 2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { useAtom } from "jotai";
import {
padStartAtom,
timelineScrollRefAtom,
timeoutRefAtom,
timestampDefaultDurationAtom,
timestampTextAtom,
videoElAtom,
videoLengthAtom
} from "./atoms";
import {
parseTimestamps,
timestampToSeconds
} from "./utils";
import { secondWidth } from "./consts";
export function ClickableTimestamps() {
const [timestampText] = useAtom(timestampTextAtom);
const [videoEl] = useAtom(videoElAtom);
const player = videoEl!;
const [padStart] = useAtom(padStartAtom);
const [timelineScrollRef] = useAtom(timelineScrollRefAtom);
const [videoLength] = useAtom(videoLengthAtom);
const [timeoutRef] = useAtom(timeoutRefAtom);
const [defaultDuration] = useAtom(timestampDefaultDurationAtom);
const scrollEl = timelineScrollRef.current!;
const timestamps = parseTimestamps(timestampText);
const timestampSeconds = timestamps.map((tobject) => {
return {
start: timestampToSeconds(tobject.start),
end: tobject.end ? timestampToSeconds(tobject.end) : timestampToSeconds(tobject.start) + defaultDuration,
annotation: tobject.annotation
};
});
return (
);
}
================================================
FILE: video-scrubber/src/ClipTimeline.tsx
================================================
// Copyright 2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { useAtom, useSetAtom } from "jotai";
import {
playPositionAtom, timestampDefaultDurationAtom,
timestampTextAtom,
videoElAtom,
videoLengthAtom
} from "./atoms";
import {
parseTimestamps,
timestampToSeconds
} from "./utils";
import { useDrag } from "@use-gesture/react";
import { secondWidth } from "./consts";
export function ClipTimeline() {
const [videoEl] = useAtom(videoElAtom);
const setPlayPosition = useSetAtom(playPositionAtom);
const [playPosition] = useAtom(playPositionAtom);
const player = videoEl!;
const [timestampText] = useAtom(timestampTextAtom);
const [defaultDuration] = useAtom(timestampDefaultDurationAtom);
const [videoLength] = useAtom(videoLengthAtom);
const timestamps = parseTimestamps(timestampText);
const timestampSeconds = timestamps.map((tobject) => {
return {
start: timestampToSeconds(tobject.start),
end: tobject.end ? timestampToSeconds(tobject.end) : timestampToSeconds(tobject.start) + defaultDuration,
annotation: tobject.annotation
};
});
const timelineDrag = useDrag(({ active, xy: [x], currentTarget }) => {
if (active) {
const el = currentTarget as HTMLElement;
const offset = el.getBoundingClientRect().left;
const newPosition = ((x - offset) / el.clientWidth) * player.duration;
setPlayPosition(newPosition);
player.currentTime = newPosition;
}
});
const timelineWidth = videoLength * secondWidth;
return (
);
}
================================================
FILE: video-scrubber/src/Controls.tsx
================================================
// Copyright 2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { useAtom, useSetAtom } from "jotai";
import {
isPlayingAllAtom,
isPlayingAtom,
padStartAtom,
playPositionAtom,
timeoutRefAtom,
timestampDefaultDurationAtom,
timestampTextAtom,
videoElAtom,
} from "./atoms";
import { parseTimestamps, timestampToSeconds } from "./utils";
export function Controls() {
const [videoEl] = useAtom(videoElAtom);
const [isPlaying] = useAtom(isPlayingAtom);
const [timestampDefaultDuration, setTimestampDefaultDuration] = useAtom(
timestampDefaultDurationAtom,
);
const [padStart, setPadStart] = useAtom(padStartAtom);
const [timestampText] = useAtom(timestampTextAtom);
const [timeoutRef] = useAtom(timeoutRefAtom);
const setPlayPosition = useSetAtom(playPositionAtom);
const [isPlayingAll, setIsPlayingAll] = useAtom(isPlayingAllAtom);
const [defaultDuration] = useAtom(timestampDefaultDurationAtom);
const timestamps = parseTimestamps(timestampText);
const timestampSeconds = timestamps.map((tobject) => {
return {
start: timestampToSeconds(tobject.start),
end: tobject.end
? timestampToSeconds(tobject.end)
: timestampToSeconds(tobject.start) + defaultDuration,
annotation: tobject.annotation,
};
});
const player = videoEl!;
return (
);
}
================================================
FILE: video-scrubber/src/PlayTimeline.tsx
================================================
// Copyright 2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { useAtom, useSetAtom } from "jotai";
import {
playPositionAtom, videoElAtom,
videoLengthAtom
} from "./atoms";
import { secondsToTimestamp } from "./utils";
import { useDrag } from "@use-gesture/react";
import { secondWidth } from "./consts";
export function PlayTimeline() {
const [videoEl] = useAtom(videoElAtom);
const setPlayPosition = useSetAtom(playPositionAtom);
const [playPosition] = useAtom(playPositionAtom);
const [videoLength] = useAtom(videoLengthAtom);
const player = videoEl!;
const timelineDrag = useDrag(({ active, xy: [x], currentTarget }) => {
if (active) {
const el = currentTarget as HTMLElement;
const offset = el.getBoundingClientRect().left;
const newPosition = ((x - offset) / el.clientWidth) * player.duration;
setPlayPosition(newPosition);
player.currentTime = newPosition;
}
});
const timelineWidth = videoLength * secondWidth;
return (
{secondsToTimestamp(Math.round(playPosition))}
/{" "}
{secondsToTimestamp(Math.round(videoLength))}
);
}
================================================
FILE: video-scrubber/src/Timelines.tsx
================================================
// Copyright 2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { useAtom } from "jotai";
import { timelineScrollRefAtom } from "./atoms";
import { ClipTimeline } from "./ClipTimeline";
export function Timelines() {
const [timelineScrollRef] = useAtom(timelineScrollRefAtom);
return (
);
}
================================================
FILE: video-scrubber/src/TimestampText.tsx
================================================
// Copyright 2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { useAtom } from "jotai";
import { timestampTextAtom } from "./atoms";
export function TimestampText() {
const [timestampText, setTimestampText] = useAtom(timestampTextAtom);
return (
);
}
================================================
FILE: video-scrubber/src/Video.tsx
================================================
// Copyright 2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { useAtom } from "jotai";
import { videoElAtom, videoSrcAtom } from "./atoms";
export function Video() {
const [, setVideoEl] = useAtom(videoElAtom);
const [videoSrc] = useAtom(videoSrcAtom);
return (
{videoSrc ? (
);
}
================================================
FILE: video-scrubber/src/VideoInput.tsx
================================================
// Copyright 2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { useAtom } from "jotai";
import { videoSrcAtom, videoFileAtom, storedVideosAtom } from "./atoms";
import { useEffect } from "react";
export function VideoInput() {
const [, setVideoSrc] = useAtom(videoSrcAtom);
const [, setVideoFile] = useAtom(videoFileAtom);
const [storedVideos, setStoredVideos] = useAtom(storedVideosAtom);
useEffect(() => {
// check for and remove any procsesed videos that have expired from localstore
let toRemove: number[] = []
if (storedVideos.length) {
storedVideos.forEach((vid, i) => {
if (new Date(vid.expirationTime) < new Date()) {
toRemove.push(i)
}
})
}
if (toRemove.length) {
const remaining = storedVideos.filter((_, index) => !toRemove.includes(index))
setStoredVideos(remaining)
}
}, [storedVideos])
return (
{vid.name} modified on {new Date(vid.lastModified).toDateString()}available until {new Date(vid.expirationTime).toDateString()}
)}
>
}
);
}
================================================
FILE: video-scrubber/src/VideoState.tsx
================================================
// Copyright 2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { useAtom } from "jotai";
import {
isPlayingAtom, playPositionAtom,
timelineScrollRefAtom, videoElAtom,
videoLengthAtom
} from "./atoms";
import { useEffect } from "react";
import { secondWidth } from "./consts";
export function VideoState() {
const [videoEl] = useAtom(videoElAtom);
const [, setIsPlaying] = useAtom(isPlayingAtom);
const [, setPlayPosition] = useAtom(playPositionAtom);
const [videoLength, setVideoLength] = useAtom(videoLengthAtom);
const [timelineScrollRef] = useAtom(timelineScrollRefAtom);
const player = videoEl!;
const timelineWidth = videoLength * secondWidth;
useEffect(() => {
function updateIsPlaying() {
setIsPlaying(!player.paused);
}
function updatePlayPosition() {
// Update this here so we don't get weird jumps
if (timelineScrollRef.current) {
const scrollEl = timelineScrollRef.current;
const playPositionLeft = (player.currentTime / player.duration) * timelineWidth;
if (playPositionLeft < scrollEl.scrollLeft) {
scrollEl.scrollLeft = playPositionLeft - 64;
} else if (playPositionLeft + 64 >
scrollEl.scrollLeft + scrollEl.clientWidth) {
scrollEl.scrollLeft = playPositionLeft - 64;
}
}
setPlayPosition(Math.round(player.currentTime * 20) / 20);
}
function updateVideoLength() {
setVideoLength(player.duration);
}
player.addEventListener("play", updateIsPlaying);
player.addEventListener("pause", updateIsPlaying);
player.addEventListener("ended", updateIsPlaying);
player.addEventListener("timeupdate", updatePlayPosition);
player.addEventListener("loadedmetadata", updateVideoLength);
return () => {
player.removeEventListener("play", updateIsPlaying);
player.removeEventListener("pause", updateIsPlaying);
player.removeEventListener("ended", updateIsPlaying);
player.removeEventListener("timeupdate", updatePlayPosition);
player.removeEventListener("loadedmetadata", updateVideoLength);
};
}, [player, timelineScrollRef, videoLength]);
return null;
}
================================================
FILE: video-scrubber/src/atoms.tsx
================================================
// Copyright 2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { atomWithStorage, } from "jotai/utils"
import { atom } from "jotai";
import { AsyncStorage } from "jotai/vanilla/utils/atomWithStorage";
export const videoFileAtom = atom(null);
export const videoSrcAtom = atom(null);
export const videoElAtom = atom(null);
export const isPlayingAtom = atom(false);
export const playPositionAtom = atom(0);
export const videoLengthAtom = atom(0);
export const timestampTextAtom = atom("");
export const timestampDefaultDurationAtom = atom(1.5);
export const padStartAtom = atom(0.2);
export const timelineScrollRefAtom = atom<{ current: HTMLDivElement | null }>({
current: null,
});
export const timeoutRefAtom = atom<{ current: number }>({ current: 0 });
export const isPlayingAllAtom = atom(false);
export const promptAtom = atom("");
// Type and custom storage for use with our localStorage video store,
// which is shared by Gemini and VideoInput
export type ProcessedVideo = {
name: string,
lastModified: number,
size: number
uri: string,
expirationTime: string,
mimeType: string,
}
const videoStorage: AsyncStorage = {
getItem(key, _) {
const item = localStorage.getItem(key)
if (item) {
return JSON.parse(item)
} else {
return []
}
},
setItem(key, val) {
return new Promise((res, rej) => {
try {
localStorage.setItem(key, JSON.stringify(val))
res();
} catch (error) {
rej(error);
}
})
},
removeItem(key) {
return new Promise((res, rej) => {
try {
localStorage.removeItem(key);
res();
} catch (error) {
rej(error);
}
})
},
}
export const storedVideosAtom = atomWithStorage('processedVideos', [], videoStorage)
export const processedVideoAtom = atom(null)
================================================
FILE: video-scrubber/src/consts.tsx
================================================
// Copyright 2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
export const secondWidth = 12
================================================
FILE: video-scrubber/src/index.css
================================================
@tailwind base;
@tailwind components;
@tailwind utilities;
html {
background: black;
color: white;
}
#root {
max-width: 1280px;
margin: 0 auto;
padding: 2rem;
}
================================================
FILE: video-scrubber/src/main.tsx
================================================
// Copyright 2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import React from 'react'
import ReactDOM from 'react-dom/client'
import App from './App.tsx'
import './index.css'
ReactDOM.createRoot(document.getElementById('root')!).render(
,
)
================================================
FILE: video-scrubber/src/utils.tsx
================================================
// Copyright 2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
export function secondsToTimestamp(seconds: number): string {
let minutes = Math.floor(seconds / 60);
let hours = null;
if (minutes >= 60) {
hours = Math.floor(minutes / 60);
minutes = minutes % 60;
}
seconds = Math.floor((seconds % 60) * 10) / 10;
return `${hours ? hours + ":" : ""}${minutes}:${seconds < 10 ? "0" : ""
}${seconds}`;
}
type ParsedTimestamp = {
start: string;
end: string | null;
annotation: string | null;
};
export function parseTimestamps(text: string): ParsedTimestamp[] {
const parsed = text.split("\n").map((timestamp) => {
const object: ParsedTimestamp = { start: '', end: null, annotation: null };
let toParse = timestamp.trim();
// handle annotation
if (toParse.includes(" ")) {
const annotationCheck = toParse.split(" ").slice(1).join(" ").trim();
if (annotationCheck.length > 0) {
toParse = toParse.split(" ")[0].trim();
object.annotation = annotationCheck;
} else {
toParse = toParse.trim();
}
}
// handle start and end
if (toParse.includes("-")) {
const [start, end] = toParse.split("-").map((t) => t.trim());
object.start = start;
object.end = end;
} else {
object.start = toParse;
}
return object;
});
return parsed;
}
export function timestampToSeconds(timestamp: string): number {
const splits = timestamp.split(":");
if (splits.length === 3) {
return (
Number(splits[0]) * 3600 + Number(splits[1]) * 60 + Number(splits[2])
);
} else if (splits.length === 2) {
return Number(splits[0]) * 60 + Number(splits[1]);
} else {
return Number(splits[0]);
}
}
================================================
FILE: video-scrubber/src/vite-env.d.ts
================================================
// Copyright 2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///
================================================
FILE: video-scrubber/tailwind.config.js
================================================
/**
Copyright 2024 Google LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/** @type {import('tailwindcss').Config} */
export default {
content: [
"./pages/**/*.{ts,tsx}",
"./components/**/*.{ts,tsx}",
"./app/**/*.{ts,tsx}",
"./src/**/*.{ts,tsx}",
],
theme: {
extend: {
colors: {
blueAccent: {
DEFAULT: "#293EFF",
},
purpleAccent: {
DEFAULT: "#9FA9FF",
},
gBlack: {
600: "#6F6C72",
800: "#1E1F20",
900: "#000000",
},
gWhite: {
100: "#ffffff",
200: "#E4E3E3",
400: "#A7A7A7",
},
},
},
},
plugins: [require("@tailwindcss/typography")],
};
================================================
FILE: video-scrubber/tsconfig.json
================================================
{
"compilerOptions": {
"target": "ES2020",
"useDefineForClassFields": true,
"lib": [
"ES2020",
"DOM",
"DOM.Iterable"
],
"module": "ESNext",
"skipLibCheck": true,
/* Bundler mode */
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"resolveJsonModule": true,
"isolatedModules": true,
"noEmit": true,
"jsx": "react-jsx",
/* Linting */
"strict": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"noFallthroughCasesInSwitch": true
},
"include": [
"src",
"server"
],
"references": [
{
"path": "./tsconfig.node.json"
}
]
}
================================================
FILE: video-scrubber/tsconfig.node.json
================================================
{
"compilerOptions": {
"composite": true,
"skipLibCheck": true,
"module": "ESNext",
"moduleResolution": "bundler",
"allowSyntheticDefaultImports": true
},
"include": ["vite.config.ts"]
}
================================================
FILE: video-scrubber/vite.config.ts
================================================
// Copyright 2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { defineConfig } from "vite";
import react from "@vitejs/plugin-react";
// https://vitejs.dev/config/
export default defineConfig({
plugins: [react()],
});
================================================
FILE: voice-cursor/.gcloudignore
================================================
# This file specifies files that are *not* uploaded to Google Cloud
# using gcloud. It follows the same syntax as .gitignore, with the addition of
# "#!include" directives (which insert the entries of the given .gitignore-style
# file at that point).
#
# For more information, run:
# $ gcloud topic gcloudignore
#
.gcloudignore
# If you would like to upload your .git directory, .gitignore file or files
# from your .gitignore file, remove the corresponding line
# below:
.git
.gitignore
# Node.js dependencies:
node_modules/
================================================
FILE: voice-cursor/.gitignore
================================================
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
# dependencies
/node_modules
/.pnp
.pnp.js
.yarn/install-state.gz
# testing
/coverage
# next.js
/.next/
/out/
# production
/build
# misc
.DS_Store
*.pem
# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# local env files
.env*.local
# vercel
.vercel
# typescript
*.tsbuildinfo
next-env.d.ts
# For Google Cloud deployment
app.yaml
!app.yaml.template
================================================
FILE: voice-cursor/CONTRIBUTING.md
================================================
# How to contribute
We'd love to accept your patches and contributions to this project.
## Before you begin
Sign our Contributor License Agreement
Contributions to this project must be accompanied by a Contributor License Agreement (CLA). You (or your employer) retain the copyright to your contribution; this simply gives us permission to use and redistribute your contributions as part of the project.
If you or your current employer have already signed the Google CLA (even if it was for a different project), you probably don't need to do it again.
Visit https://cla.developers.google.com/ to see your current agreements or to sign a new one.
### Review our community guidelines
This project follows Google's Open Source Community Guidelines.
## Contribution process
###Code reviews
All submissions, including submissions by project members, require review. We use GitHub pull requests for this purpose. Consult GitHub Help for more information on using pull requests.
================================================
FILE: voice-cursor/LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: voice-cursor/README.md
================================================
# Voice Cursor
An experimental text editor showcasing Gemini 2.0's Native Audio capabilities. Built on top of [Novel](https://novel.sh), Voice Cursor demonstrates how Gemini's new text-to-speech API can be integrated into a text editor for fluid, in context voice generation.

## What is Gemini 2.0 Native Audio?
Gemini 2.0 introduces multilingual native audio output - a powerful new capability that lets developers generate natural-sounding speech directly from the Gemini API. This project demonstrates how to use this feature in a real application.
🎥 [Watch the Gemini 2.0 Native Audio Demo](https://www.youtube.com/watch?v=qE673AY-WEI) 🔊
## Features
- 🎯 **Native Gemini Audio**: Direct integration with Gemini 2.0's text-to-speech capabilities
- 🎭 **Rich Voice Options**: 8 different Gemini voices with distinct characteristics
- 😊 **Emotional Control**: 15 different tones to shape how Gemini expresses the text
- 🎨 **Visual Integration**: Color-coded highlights show which voice and tone were used
- ⚡ **Instant Generation**: Quick audio synthesis powered by Gemini's latest model
## Getting Started
### 1. Clone this repository and install dependencies:
```bash
git clone https://github.com/googlecreativelab/gemini-demos/voice-cursor
```
```bash
npm install
```
### 2. Create a `.env.local` file with your AI Studio API key:
Get your API key from [Google AI Studio](https://aistudio.google.com/apikey)
```env
NEXT_PUBLIC_GEMINI_API_KEY=your_api_key_here
```
### 3. Start the development server:
```bash
npm run dev
```
Open [http://localhost:3000](http://localhost:3000) and start highlighting text!
## How It Works
The magic happens in [`src/components/editor/selectors/voice-popover.tsx`](src/components/editor/selectors/voice-popover.tsx). When text is highlighted, we construct a prompt that includes both the text and desired emotional tone:
This is then sent to Gemini 2.0's API with audio generation enabled.
### Tone Options
The voice cursor supports various emotional tones through the [`src/lib/tone-options.ts`](src/lib/tone-options.ts) file. Each tone has an emoji and a transformation function that constructs the prompt:
Edit, add, or remove tones in [`src/lib/tone-options.ts`](src/lib/tone-options.ts):
```typescript
export const TONE_OPTIONS: ToneOption[] = [
// How are you feeling?
// --> Prompt transformation -->
// Say rapidly and energetically: "How-are-you-feeling?"
{
emoji: "🐰",
name: "Fast",
transform: (text) => `Say rapidly and energetically: "${text.split(' ').join('-')}"`
},
];
```
Then that tone is used in the [`src/components/editor/selectors/voice-popover.tsx`](src/components/editor/selectors/voice-popover.tsx) file where we make a request to Gemini 2.0 Native Audio:
```typescript
const response = await fetch(
`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?key=${process.env.NEXT_PUBLIC_GEMINI_API_KEY}`,
{
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
contents: [{
parts: [{ text: textToSpeak }]
}],
generationConfig: {
response_modalities: ["AUDIO"],
speech_config: {
voice_config: {
prebuilt_voice_config: {
voice_name: voice
}
}
}
}
})
}
);
```
## Try Gemini 2.0 Native Audio

You can experiment with Gemini 2.0's in AI Studio:
1. Visit [AI Studio](https://aistudio.google.com/app/)
2. Select "Gemini 2.0 Flash Experimental" model
3. Set output format to "Audio"
4. Enter your prompt
5. Click "Generate"
## Credits
- Built with [Novel](https://novel.sh), a Notion-style WYSIWYG editor
- Powered by [Google's Gemini 2.0](https://blog.google/products/gemini/google-gemini-ai-collection-2024/) Native Audio
- Code from [Trudy Painter](https://www.trudy.computer), [@trudypainter](https://github.com/trudypainter)
- Design from [Jose Guizar](https://joseguizar.com/)
## Disclaimer
This is an experiment showcasing Gemini 2.0's Native Audio capabilities, not an official Google product. We'll do our best to support and maintain this experiment but your mileage may vary.
We encourage open sourcing projects as a way of learning from each other. Please respect our and other creators' rights, including copyright and trademark rights when present, when sharing these works and creating derivative work. If you want more info on Google's policy, you can find that [here](https://developers.google.com/terms/site-policies).
## License
Licensed under the Apache-2.0 license.
================================================
FILE: voice-cursor/components.json
================================================
{
"$schema": "https://ui.shadcn.com/schema.json",
"style": "default",
"rsc": true,
"tsx": true,
"tailwind": {
"config": "tailwind.config.ts",
"css": "src/app/globals.css",
"baseColor": "slate",
"cssVariables": true,
"prefix": ""
},
"aliases": {
"components": "@/components",
"utils": "@/lib/utils"
}
}
================================================
FILE: voice-cursor/next.config.mjs
================================================
/**
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** @type {import('next').NextConfig} */
const nextConfig = {};
export default nextConfig;
================================================
FILE: voice-cursor/package.json
================================================
{
"name": "novel-tailwind",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "next dev",
"build": "next build",
"start": "next start",
"lint": "next lint"
},
"dependencies": {
"@radix-ui/react-dropdown-menu": "^2.0.6",
"@radix-ui/react-popover": "^1.0.7",
"@radix-ui/react-separator": "^1.0.3",
"@radix-ui/react-slot": "^1.0.2",
"@radix-ui/react-tooltip": "^1.1.6",
"@tailwindcss/typography": "^0.5.10",
"class-variance-authority": "^0.7.0",
"clsx": "^2.1.0",
"lucide-react": "^0.363.0",
"next": "14.1.4",
"next-themes": "^0.3.0",
"novel": "^0.2.13",
"react": "^18",
"react-dom": "^18",
"sonner": "^1.4.41",
"tailwind-merge": "^2.2.2",
"tailwindcss-animate": "^1.0.7",
"uuid": "^11.0.3"
},
"devDependencies": {
"@types/node": "^20",
"@types/react": "^18",
"@types/react-dom": "^18",
"@types/uuid": "^10.0.0",
"autoprefixer": "^10.0.1",
"postcss": "^8",
"tailwindcss": "^3.3.0",
"typescript": "^5"
}
}
================================================
FILE: voice-cursor/postcss.config.js
================================================
/**
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
module.exports = {
plugins: {
tailwindcss: {},
autoprefixer: {},
},
};
================================================
FILE: voice-cursor/src/app/default-value.ts
================================================
/**
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { getVoiceColor } from "@/lib/voice-options";
export const defaultValue = {
type: "doc",
content: [
{
type: "heading",
attrs: { level: 1 },
content: [{ type: "text", text: "✨ Voice Cursor" }]
},
{
type: "paragraph",
content: [
{ type: "text", text: "👋 Hello! This is a starter demo using native audio in Gemini 2.0. Just write text below, then highlight it to hear it spoken in different ways." }
]
},
{
type: "paragraph",
content: [
{
type: "text",
text: "You can hear things read verrrrry mysteriously.",
marks: [
{
type: "highlight",
attrs: {
audioKey: "orus-mysterious",
tone: "mysterious",
toneEmoji: "🔮",
color: getVoiceColor("Orus"),
voice: "Orus",
prompt: "Say this like a dramatic wizard speaking very mysteriously: \"You can hear things read verrrrry mysteriously.\""
}
}
]
}
]
},
{
type: "paragraph",
content: [
{
type: "text",
text: "Or whispered, like a secret.",
marks: [
{
type: "highlight",
attrs: {
audioKey: "orus-whispered",
tone: "whispering",
toneEmoji: "🦗",
color: getVoiceColor("Orus"),
voice: "Orus",
prompt: "Whisper in a hushed, secretive tone: \"or whispered, like a secret.\""
}
}
]
}
]
},
{
type: "paragraph",
content: [
{
type: "text",
text: "Or spoken in … with lots … and lots … of DRAMA!",
marks: [
{
type: "highlight",
attrs: {
audioKey: "orus-dramatic",
tone: "dramatic",
toneEmoji: "🎭",
color: getVoiceColor("Orus"),
voice: "Orus",
prompt: "Say this like a Shakespearean actor speaking a very dramatic monologue: \"Or spoken in … with lots … and lots … of DRAMA!\""
}
}
]
}
]
},
{
type: "heading",
attrs: { level: 2 },
content: [{ type: "text", text: "Examples" }]
},
{
type: "paragraph",
content: [
{
type: "text",
text: "\"Trudy, my friend, I must tell you about some ancient mysteries.\"",
marks: [
{
type: "highlight",
attrs: {
audioKey: "charon-mysterious",
tone: "mysterious",
toneEmoji: "🔮",
color: getVoiceColor("Charon"),
voice: "Charon",
prompt: "Say this like a dramatic wizard speaking very mysteriously: \"Trudy, my friend, I must tell you about some ancient mysteries.\""
}
}
]
},
{ type: "text", text: " Alex said." }
]
},
{
type: "paragraph",
content: [
{
type: "text",
text: "\"Oh hey Alex, what's going on?\"",
marks: [
{
type: "highlight",
attrs: {
audioKey: "kore-neutral",
tone: "casual",
toneEmoji: "💬",
color: getVoiceColor("Kore"),
voice: "Kore",
prompt: "Say: \"Oh hey Alex, what's going on?\""
}
}
]
},
{ type: "text", text: " Trudy asked." }
]
},
{
type: "paragraph",
content: [
{
type: "text",
text: "\"Mysterious, mysteries, oh my gosh I love mysteries!!!\"",
marks: [
{
type: "highlight",
attrs: {
audioKey: "zephyr-excited",
tone: "excited",
toneEmoji: "😃",
color: getVoiceColor("Zephyr"),
voice: "Zephyr",
prompt: "Say this like a very excited person: \"Mysterious, mysteries, oh my gosh I love mysteries!!!\""
}
}
]
},
{ type: "text", text: " Jordan exclaimed, running towards the group excitedly." }
]
},
{
type: "paragraph",
content: [
{
type: "text",
text: "\"Woah.. can everyone, just chill …\"",
marks: [
{
type: "highlight",
attrs: {
audioKey: "orus-surfer",
tone: "surfer",
toneEmoji: "🏄",
color: getVoiceColor("Orus"),
voice: "Orus",
prompt: "Say this like a chill surfer: \"Woah.. can everyone, just chill …\""
}
}
]
},
{ type: "text", text: " said Dan." }
]
},
{
type: "paragraph",
content: [
{
type: "text",
text: "\"Can you make this quick. I gotta run in like 2 minutes\"",
marks: [
{
type: "highlight",
attrs: {
audioKey: "leda-fast",
tone: "fast",
toneEmoji: "🐰",
color: getVoiceColor("Leda"),
voice: "Leda",
prompt: "Say this like a fast person: \"Can you make this quick. I gotta run in like 2 minutes\""
}
}
]
},
{ type: "text", text: " Suz said." }
]
},
{
type: "heading",
attrs: { level: 2 },
content: [{ type: "text", text: "Give it a try" }]
},
{
type: "paragraph",
content: [
{ type: "text", text: "Press '/' for commands" }
]
}
]
};
================================================
FILE: voice-cursor/src/app/globals.css
================================================
/**
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
@tailwind base;
@tailwind components;
@tailwind utilities;
@layer base {
:root {
--background: 0 0% 100%;
--foreground: 222.2 84% 4.9%;
--card: 0 0% 100%;
--card-foreground: 222.2 84% 4.9%;
--popover: 0 0% 100%;
--popover-foreground: 222.2 84% 4.9%;
--primary: 222.2 47.4% 11.2%;
--primary-foreground: 210 40% 98%;
--secondary: 210 40% 96.1%;
--secondary-foreground: 222.2 47.4% 11.2%;
--muted: 210 40% 96.1%;
--muted-foreground: 215.4 16.3% 46.9%;
--accent: 210 40% 96.1%;
--accent-foreground: 222.2 47.4% 11.2%;
--destructive: 0 84.2% 60.2%;
--destructive-foreground: 210 40% 98%;
--border: 214.3 31.8% 91.4%;
--input: 214.3 31.8% 91.4%;
--ring: 222.2 84% 4.9%;
--radius: 0.5rem;
/* Novel highlight colors */
--novel-highlight-default: #ffffff;
--novel-highlight-purple: rgba(147, 51, 234, 0.2);
--novel-highlight-red: rgba(224, 0, 0, 0.2);
--novel-highlight-yellow: rgba(234, 179, 8, 0.2);
--novel-highlight-blue: rgba(37, 99, 235, 0.2);
--novel-highlight-green: rgba(0, 138, 0, 0.2);
--novel-highlight-orange: rgba(255, 165, 0, 0.2);
--novel-highlight-pink: rgba(186, 64, 129, 0.2);
--novel-highlight-gray: rgba(168, 162, 158, 0.2);
}
}
@layer base {
* {
@apply border-border;
}
body {
@apply bg-background text-foreground;
}
}
/* Tippy tooltip styles */
.tippy-box {
@apply bg-white border border-gray-200 shadow-lg rounded-md !important;
}
.tippy-box[data-placement^='bottom'] > .tippy-arrow:before {
@apply border-b-gray-200 !important;
}
/* Audio highlight styles */
.audio-highlight-text {
position: relative;
cursor: pointer;
padding: 4px 8px;
border-radius: 8px;
margin: 0px -4px;
}
mark {
border-radius: 8px !important;
padding: 8px 0px !important;
}
mark:hover {
cursor: pointer !important;
}
/* Add back the emoji styles */
.audio-highlight-text::before {
content: attr(data-tone-emoji);
position: absolute;
top: -20px;
left: 6px;
font-size: 14px;
padding: 4px;
border-top-left-radius: 4px;
border-top-right-radius: 4px;
line-height: 1;
font-size: 18px;
}
================================================
FILE: voice-cursor/src/app/layout.tsx
================================================
/**
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import type { Metadata } from "next";
import { Inter as FontSans } from "next/font/google";
import "./prosemirror.css";
import "./globals.css";
import { cn } from "@/lib/utils";
import { ThemeProvider } from "@/components/theme-provider";
import { Toaster } from "@/components/ui/sonner";
const fontSans = FontSans({
subsets: ["latin"],
variable: "--font-sans",
});
export const metadata: Metadata = {
title: "Voice Cursor",
description: "Powered by Gemini 2.0 Native Audio",
};
export default function RootLayout({
children,
}: Readonly<{
children: React.ReactNode;
}>) {
return (
{children}
);
}
================================================
FILE: voice-cursor/src/app/page.tsx
================================================
/**
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
"use client";
import dynamic from "next/dynamic";
import type { JSONContent } from "novel";
import { useState, useEffect } from "react";
import { defaultValue } from "./default-value";
const EditorWrapper = dynamic(() => import("@/components/editor/client-editor"), {
ssr: false
});
// Helper function to extract audio keys from the default value
const extractAudioKeys = (content: any[]): string[] => {
const audioKeys: string[] = [];
const traverse = (node: any) => {
if (node.content) {
node.content.forEach(traverse);
}
if (node.marks) {
node.marks.forEach((mark: any) => {
if (mark.type === 'highlight' && mark.attrs.audioKey) {
audioKeys.push(mark.attrs.audioKey);
}
});
}
};
content.forEach(traverse);
return Array.from(new Set(audioKeys)); // Remove duplicates
};
export default function Home() {
const [value, setValue] = useState(defaultValue);
const [initializationComplete, setInitializationComplete] = useState(false);
// Initialize the audio blobs for all highlighted text
useEffect(() => {
let mounted = true;
console.log('🎵 Starting to initialize audio files...');
const initializeDefaultAudios = async () => {
if (typeof window === 'undefined') return;
// Wait for EditorContext to be available
if (!(window as any).editorContext) {
console.log('Waiting for EditorContext...');
await new Promise(resolve => setTimeout(resolve, 1000));
}
if (!(window as any).editorContext) {
console.error('EditorContext not available after waiting');
return;
}
console.log('EditorContext available, loading audio files...');
// Extract audio keys from default value
const audioKeys = extractAudioKeys(defaultValue.content);
console.log('Found audio keys:', audioKeys);
for (const audioKey of audioKeys) {
try {
const filename = `${audioKey}.wav`;
console.log(`Loading audio file: ${filename}`);
// Fetch the audio file
const response = await fetch(`/audio/${filename}`);
if (!response.ok) {
throw new Error(`Failed to load audio file: ${filename}`);
}
// Get the audio data as a blob
const audioBlob = await response.blob();
console.log(`Successfully loaded ${filename}:`, {
size: audioBlob.size,
type: audioBlob.type
});
// Store in EditorContext
(window as any).editorContext.setAudioBlob(audioKey, audioBlob);
console.log(`Stored audio blob for ${audioKey}`);
// Test that we can access it
const storedBlob = (window as any).editorContext.audioBlobs.get(audioKey);
console.log(`Verification - Retrieved blob for ${audioKey}:`, {
exists: Boolean(storedBlob),
size: storedBlob?.size,
type: storedBlob?.type
});
} catch (error) {
console.error(`Error loading audio for ${audioKey}:`, error);
}
}
if (mounted) {
setInitializationComplete(true);
}
};
initializeDefaultAudios();
return () => {
mounted = false;
};
}, []);
// Log when initialization is complete
useEffect(() => {
if (initializationComplete) {
console.log('🎵 Audio initialization complete. Available audio:',
Array.from((window as any).editorContext?.audioBlobs?.keys() || []));
}
}, [initializationComplete]);
return (
);
}
================================================
FILE: voice-cursor/src/app/prosemirror.css
================================================
/**
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
.ProseMirror .is-editor-empty:first-child::before {
content: attr(data-placeholder);
float: left;
color: hsl(var(--muted-foreground));
pointer-events: none;
height: 0;
}
.ProseMirror .is-empty::before {
content: attr(data-placeholder);
float: left;
color: hsl(var(--muted-foreground));
pointer-events: none;
height: 0;
}
/* Custom image styles */
.ProseMirror img {
transition: filter 0.1s ease-in-out;
&:hover {
cursor: pointer;
filter: brightness(90%);
}
&.ProseMirror-selectednode {
outline: 3px solid #5abbf7;
filter: brightness(90%);
}
}
.img-placeholder {
position: relative;
&:before {
content: "";
box-sizing: border-box;
position: absolute;
top: 50%;
left: 50%;
width: 36px;
height: 36px;
border-radius: 50%;
border: 3px solid var(--novel-stone-200);
border-top-color: var(--novel-stone-800);
animation: spinning 0.6s linear infinite;
}
}
@keyframes spinning {
to {
transform: rotate(360deg);
}
}
/* Custom TODO list checkboxes – shoutout to this awesome tutorial: https://moderncss.dev/pure-css-custom-checkbox-style/ */
ul[data-type="taskList"] li > label {
margin-right: 0.2rem;
user-select: none;
}
@media screen and (max-width: 768px) {
ul[data-type="taskList"] li > label {
margin-right: 0.5rem;
}
}
ul[data-type="taskList"] li > label input[type="checkbox"] {
-webkit-appearance: none;
appearance: none;
background-color: hsl(var(--background));
margin: 0;
cursor: pointer;
width: 1.2em;
height: 1.2em;
position: relative;
top: 5px;
border: 2px solid hsl(var(--border));
margin-right: 0.3rem;
display: grid;
place-content: center;
&:hover {
background-color: hsl(var(--accent));
}
&:active {
background-color: hsl(var(--accent));
}
&::before {
content: "";
width: 0.65em;
height: 0.65em;
transform: scale(0);
transition: 120ms transform ease-in-out;
box-shadow: inset 1em 1em;
transform-origin: center;
clip-path: polygon(14% 44%, 0 65%, 50% 100%, 100% 16%, 80% 0%, 43% 62%);
}
&:checked::before {
transform: scale(1);
}
}
ul[data-type="taskList"] li[data-checked="true"] > div > p {
color: var(--muted-foreground);
text-decoration: line-through;
text-decoration-thickness: 2px;
}
/* Overwrite tippy-box original max-width */
.tippy-box {
max-width: 400px !important;
}
.ProseMirror:not(.dragging) .ProseMirror-selectednode {
outline: none !important;
background-color: var(--novel-highlight-blue);
transition: background-color 0.2s;
box-shadow: none;
}
.drag-handle {
position: fixed;
opacity: 1;
transition: opacity ease-in 0.2s;
border-radius: 0.25rem;
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 10 10' style='fill: rgba(0, 0, 0, 0.5)'%3E%3Cpath d='M3,2 C2.44771525,2 2,1.55228475 2,1 C2,0.44771525 2.44771525,0 3,0 C3.55228475,0 4,0.44771525 4,1 C4,1.55228475 3.55228475,2 3,2 Z M3,6 C2.44771525,6 2,5.55228475 2,5 C2,4.44771525 2.44771525,4 3,4 C3.55228475,4 4,4.44771525 4,5 C4,5.55228475 3.55228475,6 3,6 Z M3,10 C2.44771525,10 2,9.55228475 2,9 C2,8.44771525 2.44771525,8 3,8 C3.55228475,8 4,8.44771525 4,9 C4,9.55228475 3.55228475,10 3,10 Z M7,2 C6.44771525,2 6,1.55228475 6,1 C6,0.44771525 6.44771525,0 7,0 C7.55228475,0 8,0.44771525 8,1 C8,1.55228475 7.55228475,2 7,2 Z M7,6 C6.44771525,6 6,5.55228475 6,5 C6,4.44771525 6.44771525,4 7,4 C7.55228475,4 8,4.44771525 8,5 C8,5.55228475 7.55228475,6 7,6 Z M7,10 C6.44771525,10 6,9.55228475 6,9 C6,8.44771525 6.44771525,8 7,8 C7.55228475,8 8,8.44771525 8,9 C8,9.55228475 7.55228475,10 7,10 Z'%3E%3C/path%3E%3C/svg%3E");
background-size: calc(0.5em + 0.375rem) calc(0.5em + 0.375rem);
background-repeat: no-repeat;
background-position: center;
width: 1.2rem;
height: 1.5rem;
z-index: 50;
cursor: grab;
&:hover {
background-color: var(--novel-stone-100);
transition: background-color 0.2s;
}
&:active {
background-color: var(--novel-stone-200);
transition: background-color 0.2s;
cursor: grabbing;
}
&.hide {
opacity: 0;
pointer-events: none;
}
@media screen and (max-width: 600px) {
display: none;
pointer-events: none;
}
}
.dark .drag-handle {
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 10 10' style='fill: rgba(255, 255, 255, 0.5)'%3E%3Cpath d='M3,2 C2.44771525,2 2,1.55228475 2,1 C2,0.44771525 2.44771525,0 3,0 C3.55228475,0 4,0.44771525 4,1 C4,1.55228475 3.55228475,2 3,2 Z M3,6 C2.44771525,6 2,5.55228475 2,5 C2,4.44771525 2.44771525,4 3,4 C3.55228475,4 4,4.44771525 4,5 C4,5.55228475 3.55228475,6 3,6 Z M3,10 C2.44771525,10 2,9.55228475 2,9 C2,8.44771525 2.44771525,8 3,8 C3.55228475,8 4,8.44771525 4,9 C4,9.55228475 3.55228475,10 3,10 Z M7,2 C6.44771525,2 6,1.55228475 6,1 C6,0.44771525 6.44771525,0 7,0 C7.55228475,0 8,0.44771525 8,1 C8,1.55228475 7.55228475,2 7,2 Z M7,6 C6.44771525,6 6,5.55228475 6,5 C6,4.44771525 6.44771525,4 7,4 C7.55228475,4 8,4.44771525 8,5 C8,5.55228475 7.55228475,6 7,6 Z M7,10 C6.44771525,10 6,9.55228475 6,9 C6,8.44771525 6.44771525,8 7,8 C7.55228475,8 8,8.44771525 8,9 C8,9.55228475 7.55228475,10 7,10 Z'%3E%3C/path%3E%3C/svg%3E");
}
================================================
FILE: voice-cursor/src/app/test-audio/page.tsx
================================================
/**
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
"use client";
import { useState, useEffect } from "react";
// Voice options based on the API documentation
const VOICE_OPTIONS = {
"Named Voices": ["Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda", "Orus", "Gemini H"]
};
export default function TestAudio() {
const [input, setInput] = useState('Say in a cheerful tone: "Hello world, how are you today?"');
const [output, setOutput] = useState("");
const [isLoading, setIsLoading] = useState(false);
const [selectedVoice, setSelectedVoice] = useState("Kore");
const [audioUrl, setAudioUrl] = useState(null);
const [errorDetails, setErrorDetails] = useState("");
const handleSubmit = async () => {
setIsLoading(true);
setErrorDetails("");
try {
const response = await fetch(
`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?key=${process.env.NEXT_PUBLIC_GEMINI_API_KEY}`,
{
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
contents: [{
parts: [{ text: input }]
}],
generationConfig: {
response_modalities: ["AUDIO"],
speech_config: {
voice_config: {
prebuilt_voice_config: {
voice_name: selectedVoice
}
}
}
}
})
}
);
const data = await response.json();
console.log("API Response:", data);
if (!data.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data) {
throw new Error('No audio data received in response');
}
const base64Audio = data.candidates[0].content.parts[0].inlineData.data;
console.log("Base64 Audio:", base64Audio);
// Parse the audio format parameters
const mimeType = data.candidates[0].content.parts[0].inlineData.mimeType || 'audio/wav';
const mimeParams = mimeType.split(';').reduce((acc: Record, param: string) => {
const [key, value] = param.split('=');
if (value) {
acc[key.trim()] = value.trim();
} else {
acc.mimeBase = key.trim();
}
return acc;
}, {} as Record);
console.log("MIME params:", mimeParams);
// Convert base64 to PCM audio data
const byteCharacters = atob(base64Audio);
const byteNumbers = new Array(byteCharacters.length);
for (let i = 0; i < byteCharacters.length; i++) {
byteNumbers[i] = byteCharacters.charCodeAt(i);
}
const byteArray = new Uint8Array(byteNumbers);
// Create WAV header for PCM data
const wavHeader = new ArrayBuffer(44);
const view = new DataView(wavHeader);
// "RIFF" chunk descriptor
view.setUint32(0, 0x52494646, false); // "RIFF"
view.setUint32(4, 36 + byteArray.length, true); // file length
view.setUint32(8, 0x57415645, false); // "WAVE"
// "fmt " sub-chunk
view.setUint32(12, 0x666D7420, false); // "fmt "
view.setUint32(16, 16, true); // subchunk size
view.setUint16(20, 1, true); // PCM audio format
view.setUint16(22, 1, true); // Mono channel
view.setUint32(24, parseInt(mimeParams.rate) || 24000, true); // sample rate
view.setUint32(28, (parseInt(mimeParams.rate) || 24000) * 2, true); // byte rate
view.setUint16(32, 2, true); // block align
view.setUint16(34, 16, true); // bits per sample
// "data" sub-chunk
view.setUint32(36, 0x64617461, false); // "data"
view.setUint32(40, byteArray.length, true); // data length
// Combine header and PCM data without spread operator
const wavBytes = new Uint8Array(wavHeader.byteLength + byteArray.length);
wavBytes.set(new Uint8Array(wavHeader), 0);
wavBytes.set(byteArray, wavHeader.byteLength);
// Create blob with WAV format
const audioBlob = new Blob([wavBytes], { type: 'audio/wav' });
// Create URL from blob
const audioUrl = URL.createObjectURL(audioBlob);
setAudioUrl(audioUrl);
setOutput("Audio generated successfully!");
} catch (error) {
console.error("Error:", error);
setErrorDetails(error instanceof Error ? error.message : 'An error occurred');
setOutput("Failed to generate audio response.");
} finally {
setIsLoading(false);
}
};
// Clean up object URL when component unmounts or when audioUrl changes
useEffect(() => {
return () => {
if (audioUrl) {
URL.revokeObjectURL(audioUrl);
}
};
}, [audioUrl]);
return (
Audio Test Page
);
}
================================================
FILE: voice-cursor/src/components/editor/advanced-editor.tsx
================================================
/**
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
"use client";
import React, { useEffect, useState } from "react";
import {
EditorRoot,
EditorCommand,
EditorCommandItem,
EditorCommandEmpty,
EditorContent,
type JSONContent,
EditorCommandList,
EditorBubble,
} from "novel";
import { ImageResizer, handleCommandNavigation } from "novel/extensions";
import { defaultExtensions } from "./extensions";
import { slashCommand, suggestionItems } from "./slash-command";
import { handleImageDrop, handleImagePaste } from "novel/plugins";
import { uploadFn } from "./image-upload";
import { VoicePopover } from "./selectors/voice-popover";
import { AudioHighlight } from "./extensions/audio-highlight";
const extensions = [...defaultExtensions, slashCommand, AudioHighlight];
interface EditorProp {
initialValue?: JSONContent;
onChange: (value: JSONContent) => void;
}
// Create a context for audio blobs
export const AudioBlobContext = React.createContext<{
audioBlobs: Map;
setAudioBlob: (key: string, blob: Blob) => void;
}>({
audioBlobs: new Map(),
setAudioBlob: () => {},
});
const Editor = ({ initialValue, onChange }: EditorProp) => {
const [showBubbleMenu, setShowBubbleMenu] = useState(false);
const [audioBlobs, setAudioBlobs] = useState