Repository: hortonworks/hive-testbench Branch: hdp3 Commit: 35dd91a1d614 Files: 302 Total size: 473.3 KB Directory structure: gitextract_i96bxtes/ ├── .gitignore ├── README.md ├── ddl-tpcds/ │ ├── bin_partitioned/ │ │ ├── add_constraints.sql │ │ ├── analyze.sql │ │ ├── call_center.sql │ │ ├── catalog_page.sql │ │ ├── catalog_returns.sql │ │ ├── catalog_sales.sql │ │ ├── customer.sql │ │ ├── customer_address.sql │ │ ├── customer_demographics.sql │ │ ├── date_dim.sql │ │ ├── household_demographics.sql │ │ ├── income_band.sql │ │ ├── inventory.sql │ │ ├── item.sql │ │ ├── promotion.sql │ │ ├── reason.sql │ │ ├── ship_mode.sql │ │ ├── store.sql │ │ ├── store_returns.sql │ │ ├── store_sales.sql │ │ ├── time_dim.sql │ │ ├── warehouse.sql │ │ ├── web_page.sql │ │ ├── web_returns.sql │ │ ├── web_sales.sql │ │ └── web_site.sql │ └── text/ │ ├── alltables.sql │ └── analyze_everything.sql ├── ddl-tpch/ │ ├── bin_flat/ │ │ ├── alltables.sql │ │ ├── analyze.sql │ │ ├── customer.sql │ │ ├── lineitem.sql │ │ ├── nation.sql │ │ ├── orders.sql │ │ ├── part.sql │ │ ├── partsupp.sql │ │ ├── region.sql │ │ └── supplier.sql │ └── bin_partitioned/ │ ├── analyze.sql │ ├── customer.sql │ ├── lineitem.sql │ ├── nation.sql │ ├── orders.sql │ ├── part.sql │ ├── partsupp.sql │ ├── region.sql │ └── supplier.sql ├── runSuite.pl ├── sample-queries-tpcds/ │ ├── README.md │ ├── query1.sql │ ├── query10.sql │ ├── query11.sql │ ├── query12.sql │ ├── query13.sql │ ├── query14.sql │ ├── query15.sql │ ├── query16.sql │ ├── query17.sql │ ├── query18.sql │ ├── query19.sql │ ├── query2.sql │ ├── query20.sql │ ├── query21.sql │ ├── query22.sql │ ├── query23.sql │ ├── query24.sql │ ├── query25.sql │ ├── query26.sql │ ├── query27.sql │ ├── query28.sql │ ├── query29.sql │ ├── query3.sql │ ├── query30.sql │ ├── query31.sql │ ├── query32.sql │ ├── query33.sql │ ├── query34.sql │ ├── query35.sql │ ├── query36.sql │ ├── query37.sql │ ├── query38.sql │ ├── query39.sql │ ├── query4.sql │ ├── query40.sql │ ├── query41.sql │ ├── query42.sql │ ├── query43.sql │ ├── query44.sql │ ├── query45.sql │ ├── query46.sql │ ├── query47.sql │ ├── query48.sql │ ├── query49.sql │ ├── query5.sql │ ├── query50.sql │ ├── query51.sql │ ├── query52.sql │ ├── query53.sql │ ├── query54.sql │ ├── query55.sql │ ├── query56.sql │ ├── query57.sql │ ├── query58.sql │ ├── query59.sql │ ├── query6.sql │ ├── query60.sql │ ├── query61.sql │ ├── query62.sql │ ├── query63.sql │ ├── query64.sql │ ├── query65.sql │ ├── query66.sql │ ├── query67.sql │ ├── query68.sql │ ├── query69.sql │ ├── query7.sql │ ├── query70.sql │ ├── query71.sql │ ├── query72.sql │ ├── query73.sql │ ├── query74.sql │ ├── query75.sql │ ├── query76.sql │ ├── query77.sql │ ├── query78.sql │ ├── query79.sql │ ├── query8.sql │ ├── query80.sql │ ├── query81.sql │ ├── query82.sql │ ├── query83.sql │ ├── query84.sql │ ├── query85.sql │ ├── query86.sql │ ├── query87.sql │ ├── query88.sql │ ├── query89.sql │ ├── query9.sql │ ├── query90.sql │ ├── query91.sql │ ├── query92.sql │ ├── query93.sql │ ├── query94.sql │ ├── query95.sql │ ├── query96.sql │ ├── query97.sql │ ├── query98.sql │ └── query99.sql ├── sample-queries-tpch/ │ ├── README.md │ ├── testbench-withATS.settings │ ├── testbench.settings │ ├── tpch_query1.sql │ ├── tpch_query10.sql │ ├── tpch_query11.sql │ ├── tpch_query12.sql │ ├── tpch_query13.sql │ ├── tpch_query14.sql │ ├── tpch_query15.sql │ ├── tpch_query16.sql │ ├── tpch_query17.sql │ ├── tpch_query18.sql │ ├── tpch_query19.sql │ ├── tpch_query2.sql │ ├── tpch_query20.sql │ ├── tpch_query21.sql │ ├── tpch_query22.sql │ ├── tpch_query3.sql │ ├── tpch_query4.sql │ ├── tpch_query5.sql │ ├── tpch_query6.sql │ ├── tpch_query7.sql │ ├── tpch_query8.sql │ └── tpch_query9.sql ├── settings/ │ ├── init.sql │ ├── load-flat.sql │ └── load-partitioned.sql ├── spark-queries-tpcds/ │ ├── LICENSE │ ├── README.md │ ├── q1.sql │ ├── q10.sql │ ├── q11.sql │ ├── q12.sql │ ├── q13.sql │ ├── q14a.sql │ ├── q14b.sql │ ├── q15.sql │ ├── q16.sql │ ├── q17.sql │ ├── q18.sql │ ├── q19.sql │ ├── q2.sql │ ├── q20.sql │ ├── q21.sql │ ├── q22.sql │ ├── q23a.sql │ ├── q23b.sql │ ├── q24a.sql │ ├── q24b.sql │ ├── q25.sql │ ├── q26.sql │ ├── q27.sql │ ├── q28.sql │ ├── q29.sql │ ├── q3.sql │ ├── q30.sql │ ├── q31.sql │ ├── q32.sql │ ├── q33.sql │ ├── q34.sql │ ├── q35.sql │ ├── q36.sql │ ├── q37.sql │ ├── q38.sql │ ├── q39a.sql │ ├── q39b.sql │ ├── q4.sql │ ├── q40.sql │ ├── q41.sql │ ├── q42.sql │ ├── q43.sql │ ├── q44.sql │ ├── q45.sql │ ├── q46.sql │ ├── q47.sql │ ├── q48.sql │ ├── q49.sql │ ├── q5.sql │ ├── q50.sql │ ├── q51.sql │ ├── q52.sql │ ├── q53.sql │ ├── q54.sql │ ├── q55.sql │ ├── q56.sql │ ├── q57.sql │ ├── q58.sql │ ├── q59.sql │ ├── q6.sql │ ├── q60.sql │ ├── q61.sql │ ├── q62.sql │ ├── q63.sql │ ├── q64.sql │ ├── q65.sql │ ├── q66.sql │ ├── q67.sql │ ├── q68.sql │ ├── q69.sql │ ├── q7.sql │ ├── q70.sql │ ├── q71.sql │ ├── q72.sql │ ├── q73.sql │ ├── q74.sql │ ├── q75.sql │ ├── q76.sql │ ├── q77.sql │ ├── q78.sql │ ├── q79.sql │ ├── q8.sql │ ├── q80.sql │ ├── q81.sql │ ├── q82.sql │ ├── q83.sql │ ├── q84.sql │ ├── q85.sql │ ├── q86.sql │ ├── q87.sql │ ├── q88.sql │ ├── q89.sql │ ├── q9.sql │ ├── q90.sql │ ├── q91.sql │ ├── q92.sql │ ├── q93.sql │ ├── q94.sql │ ├── q95.sql │ ├── q96.sql │ ├── q97.sql │ ├── q98.sql │ └── q99.sql ├── tpcds-build.sh ├── tpcds-gen/ │ ├── Makefile │ ├── README.md │ ├── patches/ │ │ ├── Darwin/ │ │ │ └── macosx.patch │ │ └── all/ │ │ ├── tpcds-buffered.patch │ │ ├── tpcds-strcpy.patch │ │ └── tpcds_misspelled_header_guard.patch │ ├── pom.xml │ └── src/ │ └── main/ │ └── java/ │ └── org/ │ └── notmysock/ │ └── tpcds/ │ └── GenTable.java ├── tpcds-setup.sh ├── tpch-build.sh ├── tpch-gen/ │ ├── Makefile │ ├── README.md │ ├── ddl/ │ │ ├── orc.sql │ │ └── text.sql │ ├── patches/ │ │ └── Darwin/ │ │ └── macosx.patch │ ├── pom.xml │ └── src/ │ └── main/ │ └── java/ │ └── org/ │ └── notmysock/ │ └── tpch/ │ └── GenTable.java └── tpch-setup.sh ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ target/ tpcds_kit.zip tpch_kit.zip *.sql.log derby.log ================================================ FILE: README.md ================================================ hive-testbench ============== A testbench for experimenting with Apache Hive at any data scale. Overview ======== The hive-testbench is a data generator and set of queries that lets you experiment with Apache Hive at scale. The testbench allows you to experience base Hive performance on large datasets, and gives an easy way to see the impact of Hive tuning parameters and advanced settings. Prerequisites ============= You will need: * Hadoop 2.2 or later cluster or Sandbox. * Apache Hive. * Between 15 minutes and 2 days to generate data (depending on the Scale Factor you choose and available hardware). * If you plan to generate 1TB or more of data, using Apache Hive 13+ to generate the data is STRONGLY suggested. Install and Setup ================= All of these steps should be carried out on your Hadoop cluster. - Step 1: Prepare your environment. In addition to Hadoop and Hive, before you begin ensure ```gcc``` is installed and available on your system path. If you system does not have it, install it using yum or apt-get. - Step 2: Decide which test suite(s) you want to use. hive-testbench comes with data generators and sample queries based on both the TPC-DS and TPC-H benchmarks. You can choose to use either or both of these benchmarks for experiementation. More information about these benchmarks can be found at the Transaction Processing Council homepage. - Step 3: Compile and package the appropriate data generator. For TPC-DS, ```./tpcds-build.sh``` downloads, compiles and packages the TPC-DS data generator. For TPC-H, ```./tpch-build.sh``` downloads, compiles and packages the TPC-H data generator. - Step 4: Decide how much data you want to generate. You need to decide on a "Scale Factor" which represents how much data you will generate. Scale Factor roughly translates to gigabytes, so a Scale Factor of 100 is about 100 gigabytes and one terabyte is Scale Factor 1000. Decide how much data you want and keep it in mind for the next step. If you have a cluster of 4-10 nodes or just want to experiment at a smaller scale, scale 1000 (1 TB) of data is a good starting point. If you have a large cluster, you may want to choose Scale 10000 (10 TB) or more. The notion of scale factor is similar between TPC-DS and TPC-H. If you want to generate a large amount of data, you should use Hive 13 or later. Hive 13 introduced an optimization that allows far more scalable data partitioning. Hive 12 and lower will likely crash if you generate more than a few hundred GB of data and tuning around the problem is difficult. You can generate text or RCFile data in Hive 13 and use it in multiple versions of Hive. - Step 5: Generate and load the data. The scripts ```tpcds-setup.sh``` and ```tpch-setup.sh``` generate and load data for TPC-DS and TPC-H, respectively. General usage is ```tpcds-setup.sh scale_factor [directory]``` or ```tpch-setup.sh scale_factor [directory]``` Some examples: Build 1 TB of TPC-DS data: ```./tpcds-setup.sh 1000``` Build 1 TB of TPC-H data: ```./tpch-setup.sh 1000``` Build 100 TB of TPC-DS data: ```./tpcds-setup.sh 100000``` Build 30 TB of text formatted TPC-DS data: ```FORMAT=textfile ./tpcds-setup 30000``` Build 30 TB of RCFile formatted TPC-DS data: ```FORMAT=rcfile ./tpcds-setup 30000``` Also check other parameters in setup scripts important one is BUCKET_DATA. - Step 6: Run queries. More than 50 sample TPC-DS queries and all TPC-H queries are included for you to try. You can use ```hive```, ```beeline``` or the SQL tool of your choice. The testbench also includes a set of suggested settings. This example assumes you have generated 1 TB of TPC-DS data during Step 5: ``` cd sample-queries-tpcds hive -i testbench.settings hive> use tpcds_bin_partitioned_orc_1000; hive> source query55.sql; ``` Note that the database is named based on the Data Scale chosen in step 3. At Data Scale 10000, your database will be named tpcds_bin_partitioned_orc_10000. At Data Scale 1000 it would be named tpch_flat_orc_1000. You can always ```show databases``` to get a list of available databases. Similarly, if you generated 1 TB of TPC-H data during Step 5: ``` cd sample-queries-tpch hive -i testbench.settings hive> use tpch_flat_orc_1000; hive> source tpch_query1.sql; ``` Feedback ======== If you have questions, comments or problems, visit the [Hortonworks Hive forum](http://hortonworks.com/community/forums/forum/hive/). If you have improvements, pull requests are accepted. ================================================ FILE: ddl-tpcds/bin_partitioned/add_constraints.sql ================================================ -- set hivevar:DB=tpcds_bin_partitioned_orc_10000 alter table customer_address add constraint ${DB}_pk_ca primary key (ca_address_sk) disable novalidate rely; alter table customer_demographics add constraint ${DB}_pk_cd primary key (cd_demo_sk) disable novalidate rely; alter table date_dim add constraint ${DB}_pk_dd primary key (d_date_sk) disable novalidate rely; alter table warehouse add constraint ${DB}_pk_w primary key (w_warehouse_sk) disable novalidate rely; alter table ship_mode add constraint ${DB}_pk_sm primary key (sm_ship_mode_sk) disable novalidate rely; alter table time_dim add constraint ${DB}_pk_td primary key (t_time_sk) disable novalidate rely; alter table reason add constraint ${DB}_pk_r primary key (r_reason_sk) disable novalidate rely; alter table income_band add constraint ${DB}_pk_ib primary key (ib_income_band_sk) disable novalidate rely; alter table item add constraint ${DB}_pk_i primary key (i_item_sk) disable novalidate rely; alter table store add constraint ${DB}_pk_s primary key (s_store_sk) disable novalidate rely; alter table call_center add constraint ${DB}_pk_cc primary key (cc_call_center_sk) disable novalidate rely; alter table customer add constraint ${DB}_pk_c primary key (c_customer_sk) disable novalidate rely; alter table web_site add constraint ${DB}_pk_ws primary key (web_site_sk) disable novalidate rely; alter table store_returns add constraint ${DB}_pk_sr primary key (sr_item_sk, sr_ticket_number) disable novalidate rely; alter table household_demographics add constraint ${DB}_pk_hd primary key (hd_demo_sk) disable novalidate rely; alter table web_page add constraint ${DB}_pk_wp primary key (wp_web_page_sk) disable novalidate rely; alter table promotion add constraint ${DB}_pk_p primary key (p_promo_sk) disable novalidate rely; alter table catalog_page add constraint ${DB}_pk_cp primary key (cp_catalog_page_sk) disable novalidate rely; -- partition_col case alter table inventory add constraint ${DB}_pk_in primary key (inv_date_sk, inv_item_sk, inv_warehouse_sk) disable novalidate rely; alter table catalog_returns add constraint ${DB}_pk_cr primary key (cr_item_sk, cr_order_number) disable novalidate rely; alter table web_returns add constraint ${DB}_pk_wr primary key (wr_item_sk, wr_order_number) disable novalidate rely; alter table web_sales add constraint ${DB}_pk_ws2 primary key (ws_item_sk, ws_order_number) disable novalidate rely; alter table catalog_sales add constraint ${DB}_pk_cs primary key (cs_item_sk, cs_order_number) disable novalidate rely; alter table store_sales add constraint ${DB}_pk_ss primary key (ss_item_sk, ss_ticket_number) disable novalidate rely; alter table call_center add constraint ${DB}_cc_d1 foreign key (cc_closed_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table call_center add constraint ${DB}_cc_d2 foreign key (cc_open_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table catalog_page add constraint ${DB}_cp_d1 foreign key (cp_end_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table catalog_page add constraint ${DB}_cp_d2 foreign key (cp_start_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table catalog_returns add constraint ${DB}_cr_cc foreign key (cr_call_center_sk) references call_center (cc_call_center_sk) disable novalidate rely; alter table catalog_returns add constraint ${DB}_cr_cp foreign key (cr_catalog_page_sk) references catalog_page (cp_catalog_page_sk) disable novalidate rely; alter table catalog_returns add constraint ${DB}_cr_cs foreign key (cr_item_sk, cr_order_number) references catalog_sales (cs_item_sk, cs_order_number) disable novalidate rely; alter table catalog_returns add constraint ${DB}_cr_i foreign key (cr_item_sk) references item (i_item_sk) disable novalidate rely; alter table catalog_returns add constraint ${DB}_cr_r foreign key (cr_reason_sk) references reason (r_reason_sk) disable novalidate rely; alter table catalog_returns add constraint ${DB}_cr_a1 foreign key (cr_refunded_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; alter table catalog_returns add constraint ${DB}_cr_cd1 foreign key (cr_refunded_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; alter table catalog_returns add constraint ${DB}_cr_c1 foreign key (cr_refunded_customer_sk) references customer (c_customer_sk) disable novalidate rely; alter table catalog_returns add constraint ${DB}_cr_hd1 foreign key (cr_refunded_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; -- partition_col case alter table catalog_returns add constraint ${DB}_cr_d1 foreign key (cr_returned_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table catalog_returns add constraint ${DB}_cr_t foreign key (cr_returned_time_sk) references time_dim (t_time_sk) disable novalidate rely; alter table catalog_returns add constraint ${DB}_cr_a2 foreign key (cr_returning_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; alter table catalog_returns add constraint ${DB}_cr_cd2 foreign key (cr_returning_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; alter table catalog_returns add constraint ${DB}_cr_c2 foreign key (cr_returning_customer_sk) references customer (c_customer_sk) disable novalidate rely; alter table catalog_returns add constraint ${DB}_cr_hd2 foreign key (cr_returning_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; -- alter table catalog_returns add constraint ${DB}_cr_d2 foreign key (cr_ship_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table catalog_returns add constraint ${DB}_cr_sm foreign key (cr_ship_mode_sk) references ship_mode (sm_ship_mode_sk) disable novalidate rely; alter table catalog_returns add constraint ${DB}_cr_w2 foreign key (cr_warehouse_sk) references warehouse (w_warehouse_sk) disable novalidate rely; alter table catalog_sales add constraint ${DB}_cs_b_a foreign key (cs_bill_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; alter table catalog_sales add constraint ${DB}_cs_b_cd foreign key (cs_bill_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; alter table catalog_sales add constraint ${DB}_cs_b_c foreign key (cs_bill_customer_sk) references customer (c_customer_sk) disable novalidate rely; alter table catalog_sales add constraint ${DB}_cs_b_hd foreign key (cs_bill_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; alter table catalog_sales add constraint ${DB}_cs_cc foreign key (cs_call_center_sk) references call_center (cc_call_center_sk) disable novalidate rely; alter table catalog_sales add constraint ${DB}_cs_cp foreign key (cs_catalog_page_sk) references catalog_page (cp_catalog_page_sk) disable novalidate rely; alter table catalog_sales add constraint ${DB}_cs_i foreign key (cs_item_sk) references item (i_item_sk) disable novalidate rely; alter table catalog_sales add constraint ${DB}_cs_p foreign key (cs_promo_sk) references promotion (p_promo_sk) disable novalidate rely; alter table catalog_sales add constraint ${DB}_cs_s_a foreign key (cs_ship_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; alter table catalog_sales add constraint ${DB}_cs_s_cd foreign key (cs_ship_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; alter table catalog_sales add constraint ${DB}_cs_s_c foreign key (cs_ship_customer_sk) references customer (c_customer_sk) disable novalidate rely; alter table catalog_sales add constraint ${DB}_cs_d1 foreign key (cs_ship_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table catalog_sales add constraint ${DB}_cs_s_hd foreign key (cs_ship_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; alter table catalog_sales add constraint ${DB}_cs_sm foreign key (cs_ship_mode_sk) references ship_mode (sm_ship_mode_sk) disable novalidate rely; -- partition_col case alter table catalog_sales add constraint ${DB}_cs_d2 foreign key (cs_sold_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table catalog_sales add constraint ${DB}_cs_t foreign key (cs_sold_time_sk) references time_dim (t_time_sk) disable novalidate rely; alter table catalog_sales add constraint ${DB}_cs_w foreign key (cs_warehouse_sk) references warehouse (w_warehouse_sk) disable novalidate rely; alter table customer add constraint ${DB}_c_a foreign key (c_current_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; alter table customer add constraint ${DB}_c_cd foreign key (c_current_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; alter table customer add constraint ${DB}_c_hd foreign key (c_current_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; alter table customer add constraint ${DB}_c_fsd foreign key (c_first_sales_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table customer add constraint ${DB}_c_fsd2 foreign key (c_first_shipto_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table household_demographics add constraint ${DB}_hd_ib foreign key (hd_income_band_sk) references income_band (ib_income_band_sk) disable novalidate rely; -- partition_col case alter table inventory add constraint ${DB}_inv_d foreign key (inv_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table inventory add constraint ${DB}_inv_i foreign key (inv_item_sk) references item (i_item_sk) disable novalidate rely; alter table inventory add constraint ${DB}_inv_w foreign key (inv_warehouse_sk) references warehouse (w_warehouse_sk) disable novalidate rely; alter table promotion add constraint ${DB}_p_end_date foreign key (p_end_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table promotion add constraint ${DB}_p_i foreign key (p_item_sk) references item (i_item_sk) disable novalidate rely; alter table promotion add constraint ${DB}_p_start_date foreign key (p_start_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table store add constraint ${DB}_s_close_date foreign key (s_closed_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table store_returns add constraint ${DB}_sr_a foreign key (sr_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; alter table store_returns add constraint ${DB}_sr_cd foreign key (sr_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; alter table store_returns add constraint ${DB}_sr_c foreign key (sr_customer_sk) references customer (c_customer_sk) disable novalidate rely; alter table store_returns add constraint ${DB}_sr_hd foreign key (sr_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; alter table store_returns add constraint ${DB}_sr_i foreign key (sr_item_sk) references item (i_item_sk) disable novalidate rely; alter table store_returns add constraint ${DB}_sr_r foreign key (sr_reason_sk) references reason (r_reason_sk) disable novalidate rely; -- partition_col case alter table store_returns add constraint ${DB}_sr_ret_d foreign key (sr_returned_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table store_returns add constraint ${DB}_sr_t foreign key (sr_return_time_sk) references time_dim (t_time_sk) disable novalidate rely; alter table store_returns add constraint ${DB}_sr_s foreign key (sr_store_sk) references store (s_store_sk) disable novalidate rely; alter table store_returns add constraint ${DB}_sr_ss foreign key (sr_item_sk, sr_ticket_number) references store_sales (ss_item_sk, ss_ticket_number) disable novalidate rely; alter table store_sales add constraint ${DB}_ss_a foreign key (ss_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; alter table store_sales add constraint ${DB}_ss_cd foreign key (ss_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; alter table store_sales add constraint ${DB}_ss_c foreign key (ss_customer_sk) references customer (c_customer_sk) disable novalidate rely; alter table store_sales add constraint ${DB}_ss_hd foreign key (ss_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; alter table store_sales add constraint ${DB}_ss_i foreign key (ss_item_sk) references item (i_item_sk) disable novalidate rely; alter table store_sales add constraint ${DB}_ss_p foreign key (ss_promo_sk) references promotion (p_promo_sk) disable novalidate rely; -- partition_col case alter table store_sales add constraint ${DB}_ss_d foreign key (ss_sold_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table store_sales add constraint ${DB}_ss_t foreign key (ss_sold_time_sk) references time_dim (t_time_sk) disable novalidate rely; alter table store_sales add constraint ${DB}_ss_s foreign key (ss_store_sk) references store (s_store_sk) disable novalidate rely; alter table web_page add constraint ${DB}_wp_ad foreign key (wp_access_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table web_page add constraint ${DB}_wp_cd foreign key (wp_creation_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table web_returns add constraint ${DB}_wr_i foreign key (wr_item_sk) references item (i_item_sk) disable novalidate rely; alter table web_returns add constraint ${DB}_wr_r foreign key (wr_reason_sk) references reason (r_reason_sk) disable novalidate rely; alter table web_returns add constraint ${DB}_wr_ref_a foreign key (wr_refunded_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; alter table web_returns add constraint ${DB}_wr_ref_cd foreign key (wr_refunded_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; alter table web_returns add constraint ${DB}_wr_ref_c foreign key (wr_refunded_customer_sk) references customer (c_customer_sk) disable novalidate rely; alter table web_returns add constraint ${DB}_wr_ref_hd foreign key (wr_refunded_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; -- partition_col case alter table web_returns add constraint ${DB}_wr_ret_d foreign key (wr_returned_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table web_returns add constraint ${DB}_wr_ret_t foreign key (wr_returned_time_sk) references time_dim (t_time_sk) disable novalidate rely; alter table web_returns add constraint ${DB}_wr_ret_a foreign key (wr_returning_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; alter table web_returns add constraint ${DB}_wr_ret_cd foreign key (wr_returning_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; alter table web_returns add constraint ${DB}_wr_ret_c foreign key (wr_returning_customer_sk) references customer (c_customer_sk) disable novalidate rely; alter table web_returns add constraint ${DB}_wr_ret_hd foreign key (wr_returning_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; alter table web_returns add constraint ${DB}_wr_ws foreign key (wr_item_sk, wr_order_number) references web_sales (ws_item_sk, ws_order_number) disable novalidate rely; alter table web_returns add constraint ${DB}_wr_wp foreign key (wr_web_page_sk) references web_page (wp_web_page_sk) disable novalidate rely; alter table web_sales add constraint ${DB}_ws_b_a foreign key (ws_bill_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; alter table web_sales add constraint ${DB}_ws_b_cd foreign key (ws_bill_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; alter table web_sales add constraint ${DB}_ws_b_c foreign key (ws_bill_customer_sk) references customer (c_customer_sk) disable novalidate rely; alter table web_sales add constraint ${DB}_ws_b_hd foreign key (ws_bill_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; alter table web_sales add constraint ${DB}_ws_i foreign key (ws_item_sk) references item (i_item_sk) disable novalidate rely; alter table web_sales add constraint ${DB}_ws_p foreign key (ws_promo_sk) references promotion (p_promo_sk) disable novalidate rely; alter table web_sales add constraint ${DB}_ws_s_a foreign key (ws_ship_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; alter table web_sales add constraint ${DB}_ws_s_cd foreign key (ws_ship_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; alter table web_sales add constraint ${DB}_ws_s_c foreign key (ws_ship_customer_sk) references customer (c_customer_sk) disable novalidate rely; alter table web_sales add constraint ${DB}_ws_s_d foreign key (ws_ship_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table web_sales add constraint ${DB}_ws_s_hd foreign key (ws_ship_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; alter table web_sales add constraint ${DB}_ws_sm foreign key (ws_ship_mode_sk) references ship_mode (sm_ship_mode_sk) disable novalidate rely; -- partition_col case alter table web_sales add constraint ${DB}_ws_d2 foreign key (ws_sold_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table web_sales add constraint ${DB}_ws_t foreign key (ws_sold_time_sk) references time_dim (t_time_sk) disable novalidate rely; alter table web_sales add constraint ${DB}_ws_w2 foreign key (ws_warehouse_sk) references warehouse (w_warehouse_sk) disable novalidate rely; alter table web_sales add constraint ${DB}_ws_wp foreign key (ws_web_page_sk) references web_page (wp_web_page_sk) disable novalidate rely; alter table web_sales add constraint ${DB}_ws_ws foreign key (ws_web_site_sk) references web_site (web_site_sk) disable novalidate rely; alter table web_site add constraint ${DB}_web_d1 foreign key (web_close_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table web_site add constraint ${DB}_web_d2 foreign key (web_open_date_sk) references date_dim (d_date_sk) disable novalidate rely; alter table store change column s_store_id s_store_id string constraint ${DB}_strid_nn not null disable novalidate rely; alter table call_center change column cc_call_center_id cc_call_center_id string constraint ${DB}_ccid_nn not null disable novalidate rely; alter table catalog_page change column cp_catalog_page_id cp_catalog_page_id string constraint ${DB}_cpid_nn not null disable novalidate rely; alter table web_site change column web_site_id web_site_id string constraint ${DB}_wsid_nn not null disable novalidate rely; alter table web_page change column wp_web_page_id wp_web_page_id string constraint ${DB}_wpid_nn not null disable novalidate rely; alter table warehouse change column w_warehouse_id w_warehouse_id string constraint ${DB}_wid_nn not null disable novalidate rely; alter table customer change column c_customer_id c_customer_id string constraint ${DB}_cid_nn not null disable novalidate rely; alter table customer_address change column ca_address_id ca_address_id string constraint ${DB}_caid_nn not null disable novalidate rely; alter table date_dim change column d_date_id d_date_id string constraint ${DB}_did_nn not null disable novalidate rely; alter table item change column i_item_id i_item_id string constraint ${DB}_itid_nn not null disable novalidate rely; alter table promotion change column p_promo_id p_promo_id string constraint ${DB}_pid_nn not null disable novalidate rely; alter table reason change column r_reason_id r_reason_id string constraint ${DB}_rid_nn not null disable novalidate rely; alter table ship_mode change column sm_ship_mode_id sm_ship_mode_id string constraint ${DB}_smid_nn not null disable novalidate rely; alter table time_dim change column t_time_id t_time_id string constraint ${DB}_tid_nn not null disable novalidate rely; alter table customer change column c_customer_id c_customer_id string constraint ${DB}_cid_uq unique disable novalidate rely; ================================================ FILE: ddl-tpcds/bin_partitioned/analyze.sql ================================================ analyze table call_center compute statistics for columns; analyze table catalog_page compute statistics for columns; analyze table catalog_returns compute statistics for columns; analyze table catalog_sales compute statistics for columns; analyze table customer compute statistics for columns; analyze table customer_address compute statistics for columns; analyze table customer_demographics compute statistics for columns; analyze table date_dim compute statistics for columns; analyze table household_demographics compute statistics for columns; analyze table income_band compute statistics for columns; analyze table inventory compute statistics for columns; analyze table item compute statistics for columns; analyze table promotion compute statistics for columns; analyze table reason compute statistics for columns; analyze table ship_mode compute statistics for columns; analyze table store compute statistics for columns; analyze table store_returns compute statistics for columns; analyze table store_sales compute statistics for columns; analyze table time_dim compute statistics for columns; analyze table warehouse compute statistics for columns; analyze table web_page compute statistics for columns; analyze table web_returns compute statistics for columns; analyze table web_sales compute statistics for columns; analyze table web_site compute statistics for columns; ================================================ FILE: ddl-tpcds/bin_partitioned/call_center.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists call_center; create table call_center stored as ${FILE} as select * from ${SOURCE}.call_center; ================================================ FILE: ddl-tpcds/bin_partitioned/catalog_page.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists catalog_page; create table catalog_page stored as ${FILE} as select * from ${SOURCE}.catalog_page; ================================================ FILE: ddl-tpcds/bin_partitioned/catalog_returns.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists catalog_returns; create table catalog_returns ( cr_returned_time_sk bigint , cr_item_sk bigint , cr_refunded_customer_sk bigint , cr_refunded_cdemo_sk bigint , cr_refunded_hdemo_sk bigint , cr_refunded_addr_sk bigint , cr_returning_customer_sk bigint , cr_returning_cdemo_sk bigint , cr_returning_hdemo_sk bigint , cr_returning_addr_sk bigint , cr_call_center_sk bigint , cr_catalog_page_sk bigint , cr_ship_mode_sk bigint , cr_warehouse_sk bigint , cr_reason_sk bigint , cr_order_number bigint , cr_return_quantity int , cr_return_amount decimal(7,2) , cr_return_tax decimal(7,2) , cr_return_amt_inc_tax decimal(7,2) , cr_fee decimal(7,2) , cr_return_ship_cost decimal(7,2) , cr_refunded_cash decimal(7,2) , cr_reversed_charge decimal(7,2) , cr_store_credit decimal(7,2) , cr_net_loss decimal(7,2) ) partitioned by (cr_returned_date_sk bigint) stored as ${FILE}; from ${SOURCE}.catalog_returns cr insert overwrite table catalog_returns partition(cr_returned_date_sk) select cr.cr_returned_time_sk, cr.cr_item_sk, cr.cr_refunded_customer_sk, cr.cr_refunded_cdemo_sk, cr.cr_refunded_hdemo_sk, cr.cr_refunded_addr_sk, cr.cr_returning_customer_sk, cr.cr_returning_cdemo_sk, cr.cr_returning_hdemo_sk, cr.cr_returning_addr_sk, cr.cr_call_center_sk, cr.cr_catalog_page_sk, cr.cr_ship_mode_sk, cr.cr_warehouse_sk, cr.cr_reason_sk, cr.cr_order_number, cr.cr_return_quantity, cr.cr_return_amount, cr.cr_return_tax, cr.cr_return_amt_inc_tax, cr.cr_fee, cr.cr_return_ship_cost, cr.cr_refunded_cash, cr.cr_reversed_charge, cr.cr_store_credit, cr.cr_net_loss, cr.cr_returned_date_sk where cr.cr_returned_date_sk is not null insert overwrite table catalog_returns partition (cr_returned_date_sk) select cr.cr_returned_time_sk, cr.cr_item_sk, cr.cr_refunded_customer_sk, cr.cr_refunded_cdemo_sk, cr.cr_refunded_hdemo_sk, cr.cr_refunded_addr_sk, cr.cr_returning_customer_sk, cr.cr_returning_cdemo_sk, cr.cr_returning_hdemo_sk, cr.cr_returning_addr_sk, cr.cr_call_center_sk, cr.cr_catalog_page_sk, cr.cr_ship_mode_sk, cr.cr_warehouse_sk, cr.cr_reason_sk, cr.cr_order_number, cr.cr_return_quantity, cr.cr_return_amount, cr.cr_return_tax, cr.cr_return_amt_inc_tax, cr.cr_fee, cr.cr_return_ship_cost, cr.cr_refunded_cash, cr.cr_reversed_charge, cr.cr_store_credit, cr.cr_net_loss, cr.cr_returned_date_sk where cr.cr_returned_date_sk is null sort by cr_returned_date_sk ; ================================================ FILE: ddl-tpcds/bin_partitioned/catalog_sales.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists catalog_sales; create table catalog_sales ( cs_sold_time_sk bigint , cs_ship_date_sk bigint , cs_bill_customer_sk bigint , cs_bill_cdemo_sk bigint , cs_bill_hdemo_sk bigint , cs_bill_addr_sk bigint , cs_ship_customer_sk bigint , cs_ship_cdemo_sk bigint , cs_ship_hdemo_sk bigint , cs_ship_addr_sk bigint , cs_call_center_sk bigint , cs_catalog_page_sk bigint , cs_ship_mode_sk bigint , cs_warehouse_sk bigint , cs_item_sk bigint , cs_promo_sk bigint , cs_order_number bigint , cs_quantity int , cs_wholesale_cost decimal(7,2) , cs_list_price decimal(7,2) , cs_sales_price decimal(7,2) , cs_ext_discount_amt decimal(7,2) , cs_ext_sales_price decimal(7,2) , cs_ext_wholesale_cost decimal(7,2) , cs_ext_list_price decimal(7,2) , cs_ext_tax decimal(7,2) , cs_coupon_amt decimal(7,2) , cs_ext_ship_cost decimal(7,2) , cs_net_paid decimal(7,2) , cs_net_paid_inc_tax decimal(7,2) , cs_net_paid_inc_ship decimal(7,2) , cs_net_paid_inc_ship_tax decimal(7,2) , cs_net_profit decimal(7,2) ) partitioned by (cs_sold_date_sk bigint) stored as ${FILE}; from ${SOURCE}.catalog_sales cs insert overwrite table catalog_sales partition (cs_sold_date_sk) select cs.cs_sold_time_sk, cs.cs_ship_date_sk, cs.cs_bill_customer_sk, cs.cs_bill_cdemo_sk, cs.cs_bill_hdemo_sk, cs.cs_bill_addr_sk, cs.cs_ship_customer_sk, cs.cs_ship_cdemo_sk, cs.cs_ship_hdemo_sk, cs.cs_ship_addr_sk, cs.cs_call_center_sk, cs.cs_catalog_page_sk, cs.cs_ship_mode_sk, cs.cs_warehouse_sk, cs.cs_item_sk, cs.cs_promo_sk, cs.cs_order_number, cs.cs_quantity, cs.cs_wholesale_cost, cs.cs_list_price, cs.cs_sales_price, cs.cs_ext_discount_amt, cs.cs_ext_sales_price, cs.cs_ext_wholesale_cost, cs.cs_ext_list_price, cs.cs_ext_tax, cs.cs_coupon_amt, cs.cs_ext_ship_cost, cs.cs_net_paid, cs.cs_net_paid_inc_tax, cs.cs_net_paid_inc_ship, cs.cs_net_paid_inc_ship_tax, cs.cs_net_profit, cs.cs_sold_date_sk where cs.cs_sold_date_sk is not null insert overwrite table catalog_sales partition (cs_sold_date_sk) select cs.cs_sold_time_sk, cs.cs_ship_date_sk, cs.cs_bill_customer_sk, cs.cs_bill_cdemo_sk, cs.cs_bill_hdemo_sk, cs.cs_bill_addr_sk, cs.cs_ship_customer_sk, cs.cs_ship_cdemo_sk, cs.cs_ship_hdemo_sk, cs.cs_ship_addr_sk, cs.cs_call_center_sk, cs.cs_catalog_page_sk, cs.cs_ship_mode_sk, cs.cs_warehouse_sk, cs.cs_item_sk, cs.cs_promo_sk, cs.cs_order_number, cs.cs_quantity, cs.cs_wholesale_cost, cs.cs_list_price, cs.cs_sales_price, cs.cs_ext_discount_amt, cs.cs_ext_sales_price, cs.cs_ext_wholesale_cost, cs.cs_ext_list_price, cs.cs_ext_tax, cs.cs_coupon_amt, cs.cs_ext_ship_cost, cs.cs_net_paid, cs.cs_net_paid_inc_tax, cs.cs_net_paid_inc_ship, cs.cs_net_paid_inc_ship_tax, cs.cs_net_profit, cs.cs_sold_date_sk where cs.cs_sold_date_sk is null sort by cs.cs_sold_date_sk ; ================================================ FILE: ddl-tpcds/bin_partitioned/customer.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists customer; create table customer stored as ${FILE} as select * from ${SOURCE}.customer CLUSTER BY c_customer_sk ; ================================================ FILE: ddl-tpcds/bin_partitioned/customer_address.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists customer_address; create table customer_address stored as ${FILE} as select * from ${SOURCE}.customer_address CLUSTER BY ca_address_sk ; ================================================ FILE: ddl-tpcds/bin_partitioned/customer_demographics.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists customer_demographics; create table customer_demographics stored as ${FILE} as select * from ${SOURCE}.customer_demographics; ================================================ FILE: ddl-tpcds/bin_partitioned/date_dim.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists date_dim; create table date_dim stored as ${FILE} as select * from ${SOURCE}.date_dim; ================================================ FILE: ddl-tpcds/bin_partitioned/household_demographics.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists household_demographics; create table household_demographics stored as ${FILE} as select * from ${SOURCE}.household_demographics; ================================================ FILE: ddl-tpcds/bin_partitioned/income_band.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists income_band; create table income_band stored as ${FILE} as select * from ${SOURCE}.income_band; ================================================ FILE: ddl-tpcds/bin_partitioned/inventory.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists inventory; create table inventory stored as ${FILE} as select * from ${SOURCE}.inventory CLUSTER BY inv_date_sk ; ================================================ FILE: ddl-tpcds/bin_partitioned/item.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists item; create table item stored as ${FILE} as select * from ${SOURCE}.item CLUSTER BY i_item_sk ; ================================================ FILE: ddl-tpcds/bin_partitioned/promotion.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists promotion; create table promotion stored as ${FILE} as select * from ${SOURCE}.promotion; ================================================ FILE: ddl-tpcds/bin_partitioned/reason.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists reason; create table reason stored as ${FILE} as select * from ${SOURCE}.reason; ================================================ FILE: ddl-tpcds/bin_partitioned/ship_mode.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists ship_mode; create table ship_mode stored as ${FILE} as select * from ${SOURCE}.ship_mode; ================================================ FILE: ddl-tpcds/bin_partitioned/store.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists store; create table store stored as ${FILE} as select * from ${SOURCE}.store CLUSTER BY s_store_sk ; ================================================ FILE: ddl-tpcds/bin_partitioned/store_returns.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists store_returns; create table store_returns ( sr_return_time_sk bigint , sr_item_sk bigint , sr_customer_sk bigint , sr_cdemo_sk bigint , sr_hdemo_sk bigint , sr_addr_sk bigint , sr_store_sk bigint , sr_reason_sk bigint , sr_ticket_number bigint , sr_return_quantity int , sr_return_amt decimal(7,2) , sr_return_tax decimal(7,2) , sr_return_amt_inc_tax decimal(7,2) , sr_fee decimal(7,2) , sr_return_ship_cost decimal(7,2) , sr_refunded_cash decimal(7,2) , sr_reversed_charge decimal(7,2) , sr_store_credit decimal(7,2) , sr_net_loss decimal(7,2) ) partitioned by (sr_returned_date_sk bigint) stored as ${FILE}; from ${SOURCE}.store_returns sr insert overwrite table store_returns partition (sr_returned_date_sk) select sr.sr_return_time_sk, sr.sr_item_sk, sr.sr_customer_sk, sr.sr_cdemo_sk, sr.sr_hdemo_sk, sr.sr_addr_sk, sr.sr_store_sk, sr.sr_reason_sk, sr.sr_ticket_number, sr.sr_return_quantity, sr.sr_return_amt, sr.sr_return_tax, sr.sr_return_amt_inc_tax, sr.sr_fee, sr.sr_return_ship_cost, sr.sr_refunded_cash, sr.sr_reversed_charge, sr.sr_store_credit, sr.sr_net_loss, sr.sr_returned_date_sk where sr.sr_returned_date_sk is not null insert overwrite table store_returns partition (sr_returned_date_sk) select sr.sr_return_time_sk, sr.sr_item_sk, sr.sr_customer_sk, sr.sr_cdemo_sk, sr.sr_hdemo_sk, sr.sr_addr_sk, sr.sr_store_sk, sr.sr_reason_sk, sr.sr_ticket_number, sr.sr_return_quantity, sr.sr_return_amt, sr.sr_return_tax, sr.sr_return_amt_inc_tax, sr.sr_fee, sr.sr_return_ship_cost, sr.sr_refunded_cash, sr.sr_reversed_charge, sr.sr_store_credit, sr.sr_net_loss, sr.sr_returned_date_sk where sr.sr_returned_date_sk is null sort by sr.sr_returned_date_sk ================================================ FILE: ddl-tpcds/bin_partitioned/store_sales.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists store_sales; create table store_sales ( ss_sold_time_sk bigint , ss_item_sk bigint , ss_customer_sk bigint , ss_cdemo_sk bigint , ss_hdemo_sk bigint , ss_addr_sk bigint , ss_store_sk bigint , ss_promo_sk bigint , ss_ticket_number bigint , ss_quantity int , ss_wholesale_cost decimal(7,2) , ss_list_price decimal(7,2) , ss_sales_price decimal(7,2) , ss_ext_discount_amt decimal(7,2) , ss_ext_sales_price decimal(7,2) , ss_ext_wholesale_cost decimal(7,2) , ss_ext_list_price decimal(7,2) , ss_ext_tax decimal(7,2) , ss_coupon_amt decimal(7,2) , ss_net_paid decimal(7,2) , ss_net_paid_inc_tax decimal(7,2) , ss_net_profit decimal(7,2) ) partitioned by (ss_sold_date_sk bigint) stored as ${FILE}; from ${SOURCE}.store_sales ss insert overwrite table store_sales partition (ss_sold_date_sk) select ss.ss_sold_time_sk, ss.ss_item_sk, ss.ss_customer_sk, ss.ss_cdemo_sk, ss.ss_hdemo_sk, ss.ss_addr_sk, ss.ss_store_sk, ss.ss_promo_sk, ss.ss_ticket_number, ss.ss_quantity, ss.ss_wholesale_cost, ss.ss_list_price, ss.ss_sales_price, ss.ss_ext_discount_amt, ss.ss_ext_sales_price, ss.ss_ext_wholesale_cost, ss.ss_ext_list_price, ss.ss_ext_tax, ss.ss_coupon_amt, ss.ss_net_paid, ss.ss_net_paid_inc_tax, ss.ss_net_profit, ss.ss_sold_date_sk where ss.ss_sold_date_sk is not null insert overwrite table store_sales partition (ss_sold_date_sk) select ss.ss_sold_time_sk, ss.ss_item_sk, ss.ss_customer_sk, ss.ss_cdemo_sk, ss.ss_hdemo_sk, ss.ss_addr_sk, ss.ss_store_sk, ss.ss_promo_sk, ss.ss_ticket_number, ss.ss_quantity, ss.ss_wholesale_cost, ss.ss_list_price, ss.ss_sales_price, ss.ss_ext_discount_amt, ss.ss_ext_sales_price, ss.ss_ext_wholesale_cost, ss.ss_ext_list_price, ss.ss_ext_tax, ss.ss_coupon_amt, ss.ss_net_paid, ss.ss_net_paid_inc_tax, ss.ss_net_profit, ss.ss_sold_date_sk where ss.ss_sold_date_sk is null sort by ss.ss_sold_date_sk ; ================================================ FILE: ddl-tpcds/bin_partitioned/time_dim.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists time_dim; create table time_dim stored as ${FILE} as select * from ${SOURCE}.time_dim; ================================================ FILE: ddl-tpcds/bin_partitioned/warehouse.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists warehouse; create table warehouse stored as ${FILE} as select * from ${SOURCE}.warehouse; ================================================ FILE: ddl-tpcds/bin_partitioned/web_page.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists web_page; create table web_page stored as ${FILE} as select * from ${SOURCE}.web_page; ================================================ FILE: ddl-tpcds/bin_partitioned/web_returns.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists web_returns; create table web_returns ( wr_returned_time_sk bigint , wr_item_sk bigint , wr_refunded_customer_sk bigint , wr_refunded_cdemo_sk bigint , wr_refunded_hdemo_sk bigint , wr_refunded_addr_sk bigint , wr_returning_customer_sk bigint , wr_returning_cdemo_sk bigint , wr_returning_hdemo_sk bigint , wr_returning_addr_sk bigint , wr_web_page_sk bigint , wr_reason_sk bigint , wr_order_number bigint , wr_return_quantity int , wr_return_amt decimal(7,2) , wr_return_tax decimal(7,2) , wr_return_amt_inc_tax decimal(7,2) , wr_fee decimal(7,2) , wr_return_ship_cost decimal(7,2) , wr_refunded_cash decimal(7,2) , wr_reversed_charge decimal(7,2) , wr_account_credit decimal(7,2) , wr_net_loss decimal(7,2) ) partitioned by (wr_returned_date_sk bigint) stored as ${FILE}; from ${SOURCE}.web_returns wr insert overwrite table web_returns partition (wr_returned_date_sk) select wr.wr_returned_time_sk, wr.wr_item_sk, wr.wr_refunded_customer_sk, wr.wr_refunded_cdemo_sk, wr.wr_refunded_hdemo_sk, wr.wr_refunded_addr_sk, wr.wr_returning_customer_sk, wr.wr_returning_cdemo_sk, wr.wr_returning_hdemo_sk, wr.wr_returning_addr_sk, wr.wr_web_page_sk, wr.wr_reason_sk, wr.wr_order_number, wr.wr_return_quantity, wr.wr_return_amt, wr.wr_return_tax, wr.wr_return_amt_inc_tax, wr.wr_fee, wr.wr_return_ship_cost, wr.wr_refunded_cash, wr.wr_reversed_charge, wr.wr_account_credit, wr.wr_net_loss, wr.wr_returned_date_sk where wr.wr_returned_date_sk is not null insert overwrite table web_returns partition (wr_returned_date_sk) select wr.wr_returned_time_sk, wr.wr_item_sk, wr.wr_refunded_customer_sk, wr.wr_refunded_cdemo_sk, wr.wr_refunded_hdemo_sk, wr.wr_refunded_addr_sk, wr.wr_returning_customer_sk, wr.wr_returning_cdemo_sk, wr.wr_returning_hdemo_sk, wr.wr_returning_addr_sk, wr.wr_web_page_sk, wr.wr_reason_sk, wr.wr_order_number, wr.wr_return_quantity, wr.wr_return_amt, wr.wr_return_tax, wr.wr_return_amt_inc_tax, wr.wr_fee, wr.wr_return_ship_cost, wr.wr_refunded_cash, wr.wr_reversed_charge, wr.wr_account_credit, wr.wr_net_loss, wr.wr_returned_date_sk where wr.wr_returned_date_sk is null sort by wr.wr_returned_date_sk ; ================================================ FILE: ddl-tpcds/bin_partitioned/web_sales.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists web_sales; create table web_sales ( ws_sold_time_sk bigint, ws_ship_date_sk bigint, ws_item_sk bigint, ws_bill_customer_sk bigint, ws_bill_cdemo_sk bigint, ws_bill_hdemo_sk bigint, ws_bill_addr_sk bigint, ws_ship_customer_sk bigint, ws_ship_cdemo_sk bigint, ws_ship_hdemo_sk bigint, ws_ship_addr_sk bigint, ws_web_page_sk bigint, ws_web_site_sk bigint, ws_ship_mode_sk bigint, ws_warehouse_sk bigint, ws_promo_sk bigint, ws_order_number bigint, ws_quantity int, ws_wholesale_cost decimal(7,2), ws_list_price decimal(7,2), ws_sales_price decimal(7,2), ws_ext_discount_amt decimal(7,2), ws_ext_sales_price decimal(7,2), ws_ext_wholesale_cost decimal(7,2), ws_ext_list_price decimal(7,2), ws_ext_tax decimal(7,2), ws_coupon_amt decimal(7,2), ws_ext_ship_cost decimal(7,2), ws_net_paid decimal(7,2), ws_net_paid_inc_tax decimal(7,2), ws_net_paid_inc_ship decimal(7,2), ws_net_paid_inc_ship_tax decimal(7,2), ws_net_profit decimal(7,2) ) partitioned by (ws_sold_date_sk bigint) stored as ${FILE}; from ${SOURCE}.web_sales ws insert overwrite table web_sales partition (ws_sold_date_sk) select ws.ws_sold_time_sk, ws.ws_ship_date_sk, ws.ws_item_sk, ws.ws_bill_customer_sk, ws.ws_bill_cdemo_sk, ws.ws_bill_hdemo_sk, ws.ws_bill_addr_sk, ws.ws_ship_customer_sk, ws.ws_ship_cdemo_sk, ws.ws_ship_hdemo_sk, ws.ws_ship_addr_sk, ws.ws_web_page_sk, ws.ws_web_site_sk, ws.ws_ship_mode_sk, ws.ws_warehouse_sk, ws.ws_promo_sk, ws.ws_order_number, ws.ws_quantity, ws.ws_wholesale_cost, ws.ws_list_price, ws.ws_sales_price, ws.ws_ext_discount_amt, ws.ws_ext_sales_price, ws.ws_ext_wholesale_cost, ws.ws_ext_list_price, ws.ws_ext_tax, ws.ws_coupon_amt, ws.ws_ext_ship_cost, ws.ws_net_paid, ws.ws_net_paid_inc_tax, ws.ws_net_paid_inc_ship, ws.ws_net_paid_inc_ship_tax, ws.ws_net_profit, ws.ws_sold_date_sk where ws.ws_sold_date_sk is not null insert overwrite table web_sales partition (ws_sold_date_sk) select ws.ws_sold_time_sk, ws.ws_ship_date_sk, ws.ws_item_sk, ws.ws_bill_customer_sk, ws.ws_bill_cdemo_sk, ws.ws_bill_hdemo_sk, ws.ws_bill_addr_sk, ws.ws_ship_customer_sk, ws.ws_ship_cdemo_sk, ws.ws_ship_hdemo_sk, ws.ws_ship_addr_sk, ws.ws_web_page_sk, ws.ws_web_site_sk, ws.ws_ship_mode_sk, ws.ws_warehouse_sk, ws.ws_promo_sk, ws.ws_order_number, ws.ws_quantity, ws.ws_wholesale_cost, ws.ws_list_price, ws.ws_sales_price, ws.ws_ext_discount_amt, ws.ws_ext_sales_price, ws.ws_ext_wholesale_cost, ws.ws_ext_list_price, ws.ws_ext_tax, ws.ws_coupon_amt, ws.ws_ext_ship_cost, ws.ws_net_paid, ws.ws_net_paid_inc_tax, ws.ws_net_paid_inc_ship, ws.ws_net_paid_inc_ship_tax, ws.ws_net_profit, ws.ws_sold_date_sk where ws.ws_sold_date_sk is null sort by ws.ws_sold_date_sk ; ================================================ FILE: ddl-tpcds/bin_partitioned/web_site.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists web_site; create table web_site stored as ${FILE} as select * from ${SOURCE}.web_site; ================================================ FILE: ddl-tpcds/text/alltables.sql ================================================ create database if not exists ${DB}; use ${DB}; -- Table drop table if exists store_sales; create external table if not exists store_sales( ss_sold_date_sk bigint , ss_sold_time_sk bigint , ss_item_sk bigint , ss_customer_sk bigint , ss_cdemo_sk bigint , ss_hdemo_sk bigint , ss_addr_sk bigint , ss_store_sk bigint , ss_promo_sk bigint , ss_ticket_number bigint , ss_quantity int , ss_wholesale_cost decimal(7,2) , ss_list_price decimal(7,2) , ss_sales_price decimal(7,2) , ss_ext_discount_amt decimal(7,2) , ss_ext_sales_price decimal(7,2) , ss_ext_wholesale_cost decimal(7,2) , ss_ext_list_price decimal(7,2) , ss_ext_tax decimal(7,2) , ss_coupon_amt decimal(7,2) , ss_net_paid decimal(7,2) , ss_net_paid_inc_tax decimal(7,2) , ss_net_profit decimal(7,2) ) row format delimited fields terminated by '|' location '${LOCATION}/store_sales' ; -- Table drop table if exists store_returns; create external table if not exists store_returns( sr_returned_date_sk bigint , sr_return_time_sk bigint , sr_item_sk bigint , sr_customer_sk bigint , sr_cdemo_sk bigint , sr_hdemo_sk bigint , sr_addr_sk bigint , sr_store_sk bigint , sr_reason_sk bigint , sr_ticket_number bigint , sr_return_quantity int , sr_return_amt decimal(7,2) , sr_return_tax decimal(7,2) , sr_return_amt_inc_tax decimal(7,2) , sr_fee decimal(7,2) , sr_return_ship_cost decimal(7,2) , sr_refunded_cash decimal(7,2) , sr_reversed_charge decimal(7,2) , sr_store_credit decimal(7,2) , sr_net_loss decimal(7,2) ) row format delimited fields terminated by '|' location '${LOCATION}/store_returns' ; -- Table drop table if exists catalog_sales; create external table if not exists catalog_sales( cs_sold_date_sk bigint , cs_sold_time_sk bigint , cs_ship_date_sk bigint , cs_bill_customer_sk bigint , cs_bill_cdemo_sk bigint , cs_bill_hdemo_sk bigint , cs_bill_addr_sk bigint , cs_ship_customer_sk bigint , cs_ship_cdemo_sk bigint , cs_ship_hdemo_sk bigint , cs_ship_addr_sk bigint , cs_call_center_sk bigint , cs_catalog_page_sk bigint , cs_ship_mode_sk bigint , cs_warehouse_sk bigint , cs_item_sk bigint , cs_promo_sk bigint , cs_order_number bigint , cs_quantity int , cs_wholesale_cost decimal(7,2) , cs_list_price decimal(7,2) , cs_sales_price decimal(7,2) , cs_ext_discount_amt decimal(7,2) , cs_ext_sales_price decimal(7,2) , cs_ext_wholesale_cost decimal(7,2) , cs_ext_list_price decimal(7,2) , cs_ext_tax decimal(7,2) , cs_coupon_amt decimal(7,2) , cs_ext_ship_cost decimal(7,2) , cs_net_paid decimal(7,2) , cs_net_paid_inc_tax decimal(7,2) , cs_net_paid_inc_ship decimal(7,2) , cs_net_paid_inc_ship_tax decimal(7,2) , cs_net_profit decimal(7,2) ) row format delimited fields terminated by '|' location '${LOCATION}/catalog_sales' ; -- Table drop table if exists catalog_returns; create external table if not exists catalog_returns( cr_returned_date_sk bigint , cr_returned_time_sk bigint , cr_item_sk bigint , cr_refunded_customer_sk bigint , cr_refunded_cdemo_sk bigint , cr_refunded_hdemo_sk bigint , cr_refunded_addr_sk bigint , cr_returning_customer_sk bigint , cr_returning_cdemo_sk bigint , cr_returning_hdemo_sk bigint , cr_returning_addr_sk bigint , cr_call_center_sk bigint , cr_catalog_page_sk bigint , cr_ship_mode_sk bigint , cr_warehouse_sk bigint , cr_reason_sk bigint , cr_order_number bigint , cr_return_quantity int , cr_return_amount decimal(7,2) , cr_return_tax decimal(7,2) , cr_return_amt_inc_tax decimal(7,2) , cr_fee decimal(7,2) , cr_return_ship_cost decimal(7,2) , cr_refunded_cash decimal(7,2) , cr_reversed_charge decimal(7,2) , cr_store_credit decimal(7,2) , cr_net_loss decimal(7,2) ) row format delimited fields terminated by '|' location '${LOCATION}/catalog_returns' ; -- Table drop table if exists web_sales; create external table if not exists web_sales( ws_sold_date_sk bigint , ws_sold_time_sk bigint , ws_ship_date_sk bigint , ws_item_sk bigint , ws_bill_customer_sk bigint , ws_bill_cdemo_sk bigint , ws_bill_hdemo_sk bigint , ws_bill_addr_sk bigint , ws_ship_customer_sk bigint , ws_ship_cdemo_sk bigint , ws_ship_hdemo_sk bigint , ws_ship_addr_sk bigint , ws_web_page_sk bigint , ws_web_site_sk bigint , ws_ship_mode_sk bigint , ws_warehouse_sk bigint , ws_promo_sk bigint , ws_order_number bigint , ws_quantity int , ws_wholesale_cost decimal(7,2) , ws_list_price decimal(7,2) , ws_sales_price decimal(7,2) , ws_ext_discount_amt decimal(7,2) , ws_ext_sales_price decimal(7,2) , ws_ext_wholesale_cost decimal(7,2) , ws_ext_list_price decimal(7,2) , ws_ext_tax decimal(7,2) , ws_coupon_amt decimal(7,2) , ws_ext_ship_cost decimal(7,2) , ws_net_paid decimal(7,2) , ws_net_paid_inc_tax decimal(7,2) , ws_net_paid_inc_ship decimal(7,2) , ws_net_paid_inc_ship_tax decimal(7,2) , ws_net_profit decimal(7,2) ) row format delimited fields terminated by '|' location '${LOCATION}/web_sales' ; -- Table drop table if exists web_returns; create external table if not exists web_returns( wr_returned_date_sk bigint , wr_returned_time_sk bigint , wr_item_sk bigint , wr_refunded_customer_sk bigint , wr_refunded_cdemo_sk bigint , wr_refunded_hdemo_sk bigint , wr_refunded_addr_sk bigint , wr_returning_customer_sk bigint , wr_returning_cdemo_sk bigint , wr_returning_hdemo_sk bigint , wr_returning_addr_sk bigint , wr_web_page_sk bigint , wr_reason_sk bigint , wr_order_number bigint , wr_return_quantity int , wr_return_amt decimal(7,2) , wr_return_tax decimal(7,2) , wr_return_amt_inc_tax decimal(7,2) , wr_fee decimal(7,2) , wr_return_ship_cost decimal(7,2) , wr_refunded_cash decimal(7,2) , wr_reversed_charge decimal(7,2) , wr_account_credit decimal(7,2) , wr_net_loss decimal(7,2) ) row format delimited fields terminated by '|' location '${LOCATION}/web_returns' ; -- Table drop table if exists inventory; create external table if not exists inventory( inv_date_sk bigint , inv_item_sk bigint , inv_warehouse_sk bigint , inv_quantity_on_hand int ) row format delimited fields terminated by '|' location '${LOCATION}/inventory'; -- Table drop table if exists store; create external table if not exists store( s_store_sk bigint , s_store_id char(16) , s_rec_start_date date , s_rec_end_date date , s_closed_date_sk bigint , s_store_name varchar(50) , s_number_employees int , s_floor_space int , s_hours char(20) , S_manager varchar(40) , S_market_id int , S_geography_class varchar(100) , S_market_desc varchar(100) , s_market_manager varchar(40) , s_division_id int , s_division_name varchar(50) , s_company_id int , s_company_name varchar(50) , s_street_number varchar(10) , s_street_name varchar(60) , s_street_type char(15) , s_suite_number char(10) , s_city varchar(60) , s_county varchar(30) , s_state char(2) , s_zip char(10) , s_country varchar(20) , s_gmt_offset decimal(5,2) , s_tax_percentage decimal(5,2) ) row format delimited fields terminated by '|' location '${LOCATION}/store' tblproperties ('serialization.null.format'=''); -- Table drop table if exists call_center; create external table if not exists call_center( cc_call_center_sk bigint , cc_call_center_id char(16) , cc_rec_start_date date , cc_rec_end_date date , cc_closed_date_sk bigint , cc_open_date_sk bigint , cc_name varchar(50) , cc_class varchar(50) , cc_employees int , cc_sq_ft int , cc_hours char(20) , cc_manager varchar(40) , cc_mkt_id int , cc_mkt_class char(50) , cc_mkt_desc varchar(100) , cc_market_manager varchar(40) , cc_division int , cc_division_name varchar(50) , cc_company int , cc_company_name char(50) , cc_street_number char(10) , cc_street_name varchar(60) , cc_street_type char(15) , cc_suite_number char(10) , cc_city varchar(60) , cc_county varchar(30) , cc_state char(2) , cc_zip char(10) , cc_country varchar(20) , cc_gmt_offset decimal(5,2) , cc_tax_percentage decimal(5,2) ) row format delimited fields terminated by '|' location '${LOCATION}/call_center' tblproperties ('serialization.null.format'=''); -- Table drop table if exists catalog_page; create external table if not exists catalog_page( cp_catalog_page_sk bigint , cp_catalog_page_id char(16) , cp_start_date_sk bigint , cp_end_date_sk bigint , cp_department varchar(50) , cp_catalog_number int , cp_catalog_page_number int , cp_description varchar(100) , cp_type varchar(100) ) row format delimited fields terminated by '|' location '${LOCATION}/catalog_page' tblproperties ('serialization.null.format'=''); -- Table drop table if exists web_site; create external table if not exists web_site( web_site_sk bigint , web_site_id char(16) , web_rec_start_date date , web_rec_end_date date , web_name varchar(50) , web_open_date_sk bigint , web_close_date_sk bigint , web_class varchar(50) , web_manager varchar(40) , web_mkt_id int , web_mkt_class varchar(50) , web_mkt_desc varchar(100) , web_market_manager varchar(40) , web_company_id int , web_company_name char(50) , web_street_number char(10) , web_street_name varchar(60) , web_street_type char(15) , web_suite_number char(10) , web_city varchar(60) , web_county varchar(30) , web_state char(2) , web_zip char(10) , web_country varchar(20) , web_gmt_offset decimal(5,2) , web_tax_percentage decimal(5,2) ) row format delimited fields terminated by '|' location '${LOCATION}/web_site' tblproperties ('serialization.null.format'=''); -- Table drop table if exists web_page; create external table if not exists web_page( wp_web_page_sk bigint , wp_web_page_id char(16) , wp_rec_start_date date , wp_rec_end_date date , wp_creation_date_sk bigint , wp_access_date_sk bigint , wp_autogen_flag char(1) , wp_customer_sk bigint , wp_url varchar(100) , wp_type char(50) , wp_char_count int , wp_link_count int , wp_image_count int , wp_max_ad_count int ) row format delimited fields terminated by '|' location '${LOCATION}/web_page' tblproperties ('serialization.null.format'=''); -- Table drop table if exists warehouse; create external table if not exists warehouse( w_warehouse_sk bigint , w_warehouse_id char(16) , w_warehouse_name varchar(20) , w_warehouse_sq_ft int , w_street_number char(10) , w_street_name varchar(60) , w_street_type char(15) , w_suite_number char(10) , w_city varchar(60) , w_county varchar(30) , w_state char(2) , w_zip char(10) , w_country varchar(20) , w_gmt_offset decimal(5,2) ) row format delimited fields terminated by '|' location '${LOCATION}/warehouse' tblproperties ('serialization.null.format'=''); -- Table drop table if exists customer; create external table if not exists customer( c_customer_sk bigint , c_customer_id char(16) , c_current_cdemo_sk bigint , c_current_hdemo_sk bigint , c_current_addr_sk bigint , c_first_shipto_date_sk bigint , c_first_sales_date_sk bigint , c_salutation char(10) , c_first_name char(20) , c_last_name char(30) , c_preferred_cust_flag char(1) , c_birth_day int , c_birth_month int , c_birth_year int , c_birth_country varchar(20) , c_login char(13) , c_email_address char(50) , c_last_review_date_sk bigint ) row format delimited fields terminated by '|' location '${LOCATION}/customer' tblproperties ('serialization.null.format'=''); -- Table drop table if exists customer_address; create external table if not exists customer_address( ca_address_sk bigint , ca_address_id char(16) , ca_street_number char(10) , ca_street_name varchar(60) , ca_street_type char(15) , ca_suite_number char(10) , ca_city varchar(60) , ca_county varchar(30) , ca_state char(2) , ca_zip char(10) , ca_country varchar(20) , ca_gmt_offset decimal(5,2) , ca_location_type char(20) ) row format delimited fields terminated by '|' location '${LOCATION}/customer_address' tblproperties ('serialization.null.format'=''); -- Table drop table if exists customer_demographics; create external table if not exists customer_demographics( cd_demo_sk bigint , cd_gender char(1) , cd_marital_status char(1) , cd_education_status char(20) , cd_purchase_estimate int , cd_credit_rating char(10) , cd_dep_count int , cd_dep_employed_count int , cd_dep_college_count int ) row format delimited fields terminated by '|' location '${LOCATION}/customer_demographics' tblproperties ('serialization.null.format'=''); -- Table drop table if exists date_dim; create external table if not exists date_dim( d_date_sk bigint , d_date_id char(16) , d_date date , d_month_seq int , d_week_seq int , d_quarter_seq int , d_year int , d_dow int , d_moy int , d_dom int , d_qoy int , d_fy_year int , d_fy_quarter_seq int , d_fy_week_seq int , d_day_name char(9) , d_quarter_name char(6) , d_holiday char(1) , d_weekend char(1) , d_following_holiday char(1) , d_first_dom int , d_last_dom int , d_same_day_ly int , d_same_day_lq int , d_current_day char(1) , d_current_week char(1) , d_current_month char(1) , d_current_quarter char(1) , d_current_year char(1) ) row format delimited fields terminated by '|' location '${LOCATION}/date_dim' tblproperties ('serialization.null.format'=''); -- Table drop table if exists household_demographics; create external table if not exists household_demographics( hd_demo_sk bigint , hd_income_band_sk bigint , hd_buy_potential char(15) , hd_dep_count int , hd_vehicle_count int ) row format delimited fields terminated by '|' location '${LOCATION}/household_demographics' tblproperties ('serialization.null.format'=''); -- Table drop table if exists item; create external table if not exists item( i_item_sk bigint , i_item_id char(16) , i_rec_start_date date , i_rec_end_date date , i_item_desc varchar(200) , i_current_price decimal(7,2) , i_wholesale_cost decimal(7,2) , i_brand_id int , i_brand char(50) , i_class_id int , i_class char(50) , i_category_id int , i_category char(50) , i_manufact_id int , i_manufact char(50) , i_size char(20) , i_formulation char(20) , i_color char(20) , i_units char(10) , i_container char(10) , i_manager_id int , i_product_name char(50) ) row format delimited fields terminated by '|' location '${LOCATION}/item' tblproperties ('serialization.null.format'=''); -- Table drop table if exists income_band; create external table if not exists income_band( ib_income_band_sk bigint , ib_lower_bound int , ib_upper_bound int ) row format delimited fields terminated by '|' location '${LOCATION}/income_band'; -- Table drop table if exists promotion; create external table if not exists promotion( p_promo_sk bigint , p_promo_id char(16) , p_start_date_sk bigint , p_end_date_sk bigint , p_item_sk bigint , p_cost decimal(15,2) , p_response_target int , p_promo_name char(50) , p_channel_dmail char(1) , p_channel_email char(1) , p_channel_catalog char(1) , p_channel_tv char(1) , p_channel_radio char(1) , p_channel_press char(1) , p_channel_event char(1) , p_channel_demo char(1) , p_channel_details varchar(100) , p_purpose char(15) , p_discount_active char(1) ) row format delimited fields terminated by '|' location '${LOCATION}/promotion' tblproperties ('serialization.null.format'=''); -- Table drop table if exists reason; create external table if not exists reason( r_reason_sk bigint , r_reason_id char(16) , r_reason_desc char(100) ) row format delimited fields terminated by '|' location '${LOCATION}/reason' tblproperties ('serialization.null.format'=''); -- Table drop table if exists ship_mode; create external table if not exists ship_mode( sm_ship_mode_sk bigint , sm_ship_mode_id char(16) , sm_type char(30) , sm_code char(10) , sm_carrier char(20) , sm_contract char(20) ) row format delimited fields terminated by '|' location '${LOCATION}/ship_mode' tblproperties ('serialization.null.format'=''); -- Table drop table if exists time_dim; create external table if not exists time_dim( t_time_sk bigint , t_time_id char(16) , t_time int , t_hour int , t_minute int , t_second int , t_am_pm char(2) , t_shift char(20) , t_sub_shift char(20) , t_meal_time char(20) ) row format delimited fields terminated by '|' location '${LOCATION}/time_dim' tblproperties ('serialization.null.format'=''); ================================================ FILE: ddl-tpcds/text/analyze_everything.sql ================================================ analyze table call_center compute statistics for columns; analyze table catalog_page compute statistics for columns; analyze table catalog_returns compute statistics for columns; analyze table catalog_sales compute statistics for columns; analyze table customer compute statistics for columns; analyze table customer_address compute statistics for columns; analyze table customer_demographics compute statistics for columns; analyze table date_dim compute statistics for columns; analyze table household_demographics compute statistics for columns; analyze table income_band compute statistics for columns; analyze table inventory compute statistics for columns; analyze table item compute statistics for columns; analyze table promotion compute statistics for columns; analyze table reason compute statistics for columns; analyze table ship_mode compute statistics for columns; analyze table store compute statistics for columns; analyze table store_returns compute statistics for columns; analyze table store_sales compute statistics for columns; analyze table time_dim compute statistics for columns; analyze table warehouse compute statistics for columns; analyze table web_page compute statistics for columns; analyze table web_returns compute statistics for columns; analyze table web_sales compute statistics for columns; analyze table web_site compute statistics for columns; ================================================ FILE: ddl-tpch/bin_flat/alltables.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists lineitem; create external table lineitem (L_ORDERKEY BIGINT, L_PARTKEY BIGINT, L_SUPPKEY BIGINT, L_LINENUMBER INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '${LOCATION}/lineitem'; drop table if exists part; create external table part (P_PARTKEY BIGINT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '${LOCATION}/part/'; drop table if exists supplier; create external table supplier (S_SUPPKEY BIGINT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY BIGINT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '${LOCATION}/supplier/'; drop table if exists partsupp; create external table partsupp (PS_PARTKEY BIGINT, PS_SUPPKEY BIGINT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION'${LOCATION}/partsupp'; drop table if exists nation; create external table nation (N_NATIONKEY BIGINT, N_NAME STRING, N_REGIONKEY BIGINT, N_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '${LOCATION}/nation'; drop table if exists region; create external table region (R_REGIONKEY BIGINT, R_NAME STRING, R_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '${LOCATION}/region'; drop table if exists customer; create external table customer (C_CUSTKEY BIGINT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY BIGINT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '${LOCATION}/customer'; drop table if exists orders; create external table orders (O_ORDERKEY BIGINT, O_CUSTKEY BIGINT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '${LOCATION}/orders'; ================================================ FILE: ddl-tpch/bin_flat/analyze.sql ================================================ analyze table nation compute statistics for columns; analyze table region compute statistics for columns; analyze table supplier compute statistics for columns; analyze table part compute statistics for columns; analyze table partsupp compute statistics for columns; analyze table customer compute statistics for columns; analyze table orders compute statistics for columns; analyze table lineitem compute statistics for columns; ================================================ FILE: ddl-tpch/bin_flat/customer.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists customer; create table customer stored as ${FILE} as select * from ${SOURCE}.customer cluster by C_MKTSEGMENT ; ================================================ FILE: ddl-tpch/bin_flat/lineitem.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists lineitem; create table lineitem stored as ${FILE} as select * from ${SOURCE}.lineitem cluster by L_SHIPDATE ; ================================================ FILE: ddl-tpch/bin_flat/nation.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists nation; create table nation stored as ${FILE} as select distinct * from ${SOURCE}.nation; ================================================ FILE: ddl-tpch/bin_flat/orders.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists orders; create table orders stored as ${FILE} as select * from ${SOURCE}.orders cluster by o_orderdate ; ================================================ FILE: ddl-tpch/bin_flat/part.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists part; create table part stored as ${FILE} as select * from ${SOURCE}.part cluster by p_brand ; ================================================ FILE: ddl-tpch/bin_flat/partsupp.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists partsupp; create table partsupp stored as ${FILE} as select * from ${SOURCE}.partsupp cluster by PS_SUPPKEY ; ================================================ FILE: ddl-tpch/bin_flat/region.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists region; create table region stored as ${FILE} as select distinct * from ${SOURCE}.region; ================================================ FILE: ddl-tpch/bin_flat/supplier.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists supplier; create table supplier stored as ${FILE} as select * from ${SOURCE}.supplier cluster by s_nationkey, s_suppkey ; ================================================ FILE: ddl-tpch/bin_partitioned/analyze.sql ================================================ analyze table nation compute statistics for columns; analyze table region compute statistics for columns; analyze table supplier compute statistics for columns; analyze table part compute statistics for columns; analyze table partsupp compute statistics for columns; analyze table customer compute statistics for columns; analyze table orders compute statistics for columns; analyze table lineitem compute statistics for columns; ================================================ FILE: ddl-tpch/bin_partitioned/customer.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists customer; create table customer stored as ${FILE} TBLPROPERTIES('orc.bloom.filter.columns'='*','orc.compress'='ZLIB') as select * from ${SOURCE}.customer cluster by C_MKTSEGMENT ; ================================================ FILE: ddl-tpch/bin_partitioned/lineitem.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists lineitem; create table lineitem (L_ORDERKEY BIGINT, L_PARTKEY BIGINT, L_SUPPKEY BIGINT, L_LINENUMBER INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING) partitioned by (L_SHIPDATE STRING) stored as ${FILE} ; ALTER TABLE lineitem SET TBLPROPERTIES('orc.bloom.filter.columns'='*','orc.compress'='ZLIB'); INSERT OVERWRITE TABLE lineitem Partition(L_SHIPDATE) select L_ORDERKEY , L_PARTKEY , L_SUPPKEY , L_LINENUMBER , L_QUANTITY , L_EXTENDEDPRICE , L_DISCOUNT , L_TAX , L_RETURNFLAG , L_LINESTATUS , L_COMMITDATE , L_RECEIPTDATE , L_SHIPINSTRUCT , L_SHIPMODE , L_COMMENT , L_SHIPDATE from ${SOURCE}.lineitem ; ================================================ FILE: ddl-tpch/bin_partitioned/nation.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists nation; create table nation stored as ${FILE} TBLPROPERTIES('orc.bloom.filter.columns'='*','orc.compress'='ZLIB') as select distinct * from ${SOURCE}.nation; ================================================ FILE: ddl-tpch/bin_partitioned/orders.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists orders; create table orders (O_ORDERKEY BIGINT, O_CUSTKEY BIGINT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING) partitioned by (O_ORDERDATE STRING) stored as ${FILE} ; ALTER TABLE orders SET TBLPROPERTIES('orc.bloom.filter.columns'='*','orc.compress'='ZLIB'); INSERT OVERWRITE TABLE orders partition(O_ORDERDATE) select O_ORDERKEY , O_CUSTKEY , O_ORDERSTATUS , O_TOTALPRICE , O_ORDERPRIORITY , O_CLERK , O_SHIPPRIORITY , O_COMMENT, O_ORDERDATE from ${SOURCE}.orders ; ================================================ FILE: ddl-tpch/bin_partitioned/part.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists part; create table part stored as ${FILE} TBLPROPERTIES('orc.bloom.filter.columns'='*','orc.compress'='ZLIB') as select * from ${SOURCE}.part cluster by p_brand ; ================================================ FILE: ddl-tpch/bin_partitioned/partsupp.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists partsupp; create table partsupp stored as ${FILE} TBLPROPERTIES('orc.bloom.filter.columns'='*','orc.compress'='ZLIB') as select * from ${SOURCE}.partsupp cluster by PS_SUPPKEY ; ================================================ FILE: ddl-tpch/bin_partitioned/region.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists region; create table region stored as ${FILE} TBLPROPERTIES('orc.bloom.filter.columns'='*','orc.compress'='ZLIB') as select distinct * from ${SOURCE}.region; ================================================ FILE: ddl-tpch/bin_partitioned/supplier.sql ================================================ create database if not exists ${DB}; use ${DB}; drop table if exists supplier; create table supplier stored as ${FILE} TBLPROPERTIES('orc.bloom.filter.columns'='*','orc.compress'='ZLIB') as select * from ${SOURCE}.supplier cluster by s_nationkey, s_suppkey ; ================================================ FILE: runSuite.pl ================================================ #!/usr/bin/perl use strict; use warnings; use File::Basename; # PROTOTYPES sub dieWithUsage(;$); # GLOBALS my $SCRIPT_NAME = basename( __FILE__ ); my $SCRIPT_PATH = dirname( __FILE__ ); # MAIN dieWithUsage("one or more parameters not defined") unless @ARGV >= 1; my $suite = shift; my $scale = shift || 2; dieWithUsage("suite name required") unless $suite eq "tpcds" or $suite eq "tpch"; chdir $SCRIPT_PATH; if( $suite eq 'tpcds' ) { chdir "sample-queries-tpcds"; } else { chdir 'sample-queries-tpch'; } # end if my @queries = glob '*.sql'; my $db = { 'tpcds' => "tpcds_bin_partitioned_orc_$scale", 'tpch' => "tpch_flat_orc_$scale" }; print "filename,status,time,rows\n"; for my $query ( @queries ) { my $logname = "$query.log"; my $cmd="echo 'use $db->{${suite}}; source $query;' | hive -i testbench.settings 2>&1 | tee $query.log"; # my $cmd="cat $query.log"; #print $cmd ; exit; my $hiveStart = time(); my @hiveoutput=`$cmd`; die "${SCRIPT_NAME}:: ERROR: hive command unexpectedly exited \$? = '$?', \$! = '$!'" if $?; my $hiveEnd = time(); my $hiveTime = $hiveEnd - $hiveStart; foreach my $line ( @hiveoutput ) { if( $line =~ /Time taken:\s+([\d\.]+)\s+seconds,\s+Fetched:\s+(\d+)\s+row/ ) { print "$query,success,$hiveTime,$2\n"; } elsif( $line =~ /^FAILED: / # || /Task failed!/ ) { print "$query,failed,$hiveTime\n"; } # end if } # end while } # end for sub dieWithUsage(;$) { my $err = shift || ''; if( $err ne '' ) { chomp $err; $err = "ERROR: $err\n\n"; } # end if print STDERR < (select avg(ctr_total_return)*1.2 from customer_total_return ctr2 where ctr1.ctr_store_sk = ctr2.ctr_store_sk) and s_store_sk = ctr1.ctr_store_sk and s_state = 'NM' and ctr1.ctr_customer_sk = c_customer_sk order by c_customer_id limit 100; -- end query 1 in stream 0 using template query1.tpl ================================================ FILE: sample-queries-tpcds/query10.sql ================================================ -- start query 1 in stream 0 using template query10.tpl and seed 797269820 select cd_gender, cd_marital_status, cd_education_status, count(*) cnt1, cd_purchase_estimate, count(*) cnt2, cd_credit_rating, count(*) cnt3, cd_dep_count, count(*) cnt4, cd_dep_employed_count, count(*) cnt5, cd_dep_college_count, count(*) cnt6 from customer c,customer_address ca,customer_demographics where c.c_current_addr_sk = ca.ca_address_sk and ca_county in ('Fillmore County','McPherson County','Bonneville County','Boone County','Brown County') and cd_demo_sk = c.c_current_cdemo_sk and exists (select * from store_sales,date_dim where c.c_customer_sk = ss_customer_sk and ss_sold_date_sk = d_date_sk and d_year = 2000 and d_moy between 3 and 3+3) and (exists (select * from web_sales,date_dim where c.c_customer_sk = ws_bill_customer_sk and ws_sold_date_sk = d_date_sk and d_year = 2000 and d_moy between 3 ANd 3+3) or exists (select * from catalog_sales,date_dim where c.c_customer_sk = cs_ship_customer_sk and cs_sold_date_sk = d_date_sk and d_year = 2000 and d_moy between 3 and 3+3)) group by cd_gender, cd_marital_status, cd_education_status, cd_purchase_estimate, cd_credit_rating, cd_dep_count, cd_dep_employed_count, cd_dep_college_count order by cd_gender, cd_marital_status, cd_education_status, cd_purchase_estimate, cd_credit_rating, cd_dep_count, cd_dep_employed_count, cd_dep_college_count limit 100; -- end query 1 in stream 0 using template query10.tpl ================================================ FILE: sample-queries-tpcds/query11.sql ================================================ -- start query 1 in stream 0 using template query11.tpl and seed 1819994127 with year_total as ( select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address ,d_year dyear ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total ,'s' sale_type from customer ,store_sales ,date_dim where c_customer_sk = ss_customer_sk and ss_sold_date_sk = d_date_sk group by c_customer_id ,c_first_name ,c_last_name ,c_preferred_cust_flag ,c_birth_country ,c_login ,c_email_address ,d_year union all select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address ,d_year dyear ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total ,'w' sale_type from customer ,web_sales ,date_dim where c_customer_sk = ws_bill_customer_sk and ws_sold_date_sk = d_date_sk group by c_customer_id ,c_first_name ,c_last_name ,c_preferred_cust_flag ,c_birth_country ,c_login ,c_email_address ,d_year ) select t_s_secyear.customer_id ,t_s_secyear.customer_first_name ,t_s_secyear.customer_last_name ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_w_firstyear ,year_total t_w_secyear where t_s_secyear.customer_id = t_s_firstyear.customer_id and t_s_firstyear.customer_id = t_w_secyear.customer_id and t_s_firstyear.customer_id = t_w_firstyear.customer_id and t_s_firstyear.sale_type = 's' and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' and t_s_firstyear.dyear = 1999 and t_s_secyear.dyear = 1999+1 and t_w_firstyear.dyear = 1999 and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end order by t_s_secyear.customer_id ,t_s_secyear.customer_first_name ,t_s_secyear.customer_last_name ,t_s_secyear.customer_birth_country limit 100; -- end query 1 in stream 0 using template query11.tpl ================================================ FILE: sample-queries-tpcds/query12.sql ================================================ -- start query 1 in stream 0 using template query12.tpl and seed 345591136 select i_item_id ,i_item_desc ,i_category ,i_class ,i_current_price ,sum(ws_ext_sales_price) as itemrevenue ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over (partition by i_class) as revenueratio from web_sales ,item ,date_dim where ws_item_sk = i_item_sk and i_category in ('Electronics', 'Books', 'Women') and ws_sold_date_sk = d_date_sk and d_date between cast('1998-01-06' as date) and (cast('1998-01-06' as date) + 30 days) group by i_item_id ,i_item_desc ,i_category ,i_class ,i_current_price order by i_category ,i_class ,i_item_id ,i_item_desc ,revenueratio limit 100; -- end query 1 in stream 0 using template query12.tpl ================================================ FILE: sample-queries-tpcds/query13.sql ================================================ -- start query 1 in stream 0 using template query13.tpl and seed 622697896 select avg(ss_quantity) ,avg(ss_ext_sales_price) ,avg(ss_ext_wholesale_cost) ,sum(ss_ext_wholesale_cost) from store_sales ,store ,customer_demographics ,household_demographics ,customer_address ,date_dim where s_store_sk = ss_store_sk and ss_sold_date_sk = d_date_sk and d_year = 2001 and((ss_hdemo_sk=hd_demo_sk and cd_demo_sk = ss_cdemo_sk and cd_marital_status = 'U' and cd_education_status = 'Secondary' and ss_sales_price between 100.00 and 150.00 and hd_dep_count = 3 )or (ss_hdemo_sk=hd_demo_sk and cd_demo_sk = ss_cdemo_sk and cd_marital_status = 'W' and cd_education_status = 'College' and ss_sales_price between 50.00 and 100.00 and hd_dep_count = 1 ) or (ss_hdemo_sk=hd_demo_sk and cd_demo_sk = ss_cdemo_sk and cd_marital_status = 'D' and cd_education_status = 'Primary' and ss_sales_price between 150.00 and 200.00 and hd_dep_count = 1 )) and((ss_addr_sk = ca_address_sk and ca_country = 'United States' and ca_state in ('TX', 'OK', 'MI') and ss_net_profit between 100 and 200 ) or (ss_addr_sk = ca_address_sk and ca_country = 'United States' and ca_state in ('WA', 'NC', 'OH') and ss_net_profit between 150 and 300 ) or (ss_addr_sk = ca_address_sk and ca_country = 'United States' and ca_state in ('MT', 'FL', 'GA') and ss_net_profit between 50 and 250 )) ; -- end query 1 in stream 0 using template query13.tpl ================================================ FILE: sample-queries-tpcds/query14.sql ================================================ -- start query 1 in stream 0 using template query14.tpl and seed 1819994127 with cross_items as (select i_item_sk ss_item_sk from item, (select iss.i_brand_id brand_id ,iss.i_class_id class_id ,iss.i_category_id category_id from store_sales ,item iss ,date_dim d1 where ss_item_sk = iss.i_item_sk and ss_sold_date_sk = d1.d_date_sk and d1.d_year between 2000 AND 2000 + 2 intersect select ics.i_brand_id ,ics.i_class_id ,ics.i_category_id from catalog_sales ,item ics ,date_dim d2 where cs_item_sk = ics.i_item_sk and cs_sold_date_sk = d2.d_date_sk and d2.d_year between 2000 AND 2000 + 2 intersect select iws.i_brand_id ,iws.i_class_id ,iws.i_category_id from web_sales ,item iws ,date_dim d3 where ws_item_sk = iws.i_item_sk and ws_sold_date_sk = d3.d_date_sk and d3.d_year between 2000 AND 2000 + 2) x where i_brand_id = brand_id and i_class_id = class_id and i_category_id = category_id ), avg_sales as (select avg(quantity*list_price) average_sales from (select ss_quantity quantity ,ss_list_price list_price from store_sales ,date_dim where ss_sold_date_sk = d_date_sk and d_year between 2000 and 2000 + 2 union all select cs_quantity quantity ,cs_list_price list_price from catalog_sales ,date_dim where cs_sold_date_sk = d_date_sk and d_year between 2000 and 2000 + 2 union all select ws_quantity quantity ,ws_list_price list_price from web_sales ,date_dim where ws_sold_date_sk = d_date_sk and d_year between 2000 and 2000 + 2) x) select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) from( select 'store' channel, i_brand_id,i_class_id ,i_category_id,sum(ss_quantity*ss_list_price) sales , count(*) number_sales from store_sales ,item ,date_dim where ss_item_sk in (select ss_item_sk from cross_items) and ss_item_sk = i_item_sk and ss_sold_date_sk = d_date_sk and d_year = 2000+2 and d_moy = 11 group by i_brand_id,i_class_id,i_category_id having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) union all select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales from catalog_sales ,item ,date_dim where cs_item_sk in (select ss_item_sk from cross_items) and cs_item_sk = i_item_sk and cs_sold_date_sk = d_date_sk and d_year = 2000+2 and d_moy = 11 group by i_brand_id,i_class_id,i_category_id having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) union all select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales from web_sales ,item ,date_dim where ws_item_sk in (select ss_item_sk from cross_items) and ws_item_sk = i_item_sk and ws_sold_date_sk = d_date_sk and d_year = 2000+2 and d_moy = 11 group by i_brand_id,i_class_id,i_category_id having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) ) y group by rollup (channel, i_brand_id,i_class_id,i_category_id) order by channel,i_brand_id,i_class_id,i_category_id limit 100; with cross_items as (select i_item_sk ss_item_sk from item, (select iss.i_brand_id brand_id ,iss.i_class_id class_id ,iss.i_category_id category_id from store_sales ,item iss ,date_dim d1 where ss_item_sk = iss.i_item_sk and ss_sold_date_sk = d1.d_date_sk and d1.d_year between 2000 AND 2000 + 2 intersect select ics.i_brand_id ,ics.i_class_id ,ics.i_category_id from catalog_sales ,item ics ,date_dim d2 where cs_item_sk = ics.i_item_sk and cs_sold_date_sk = d2.d_date_sk and d2.d_year between 2000 AND 2000 + 2 intersect select iws.i_brand_id ,iws.i_class_id ,iws.i_category_id from web_sales ,item iws ,date_dim d3 where ws_item_sk = iws.i_item_sk and ws_sold_date_sk = d3.d_date_sk and d3.d_year between 2000 AND 2000 + 2) x where i_brand_id = brand_id and i_class_id = class_id and i_category_id = category_id ), avg_sales as (select avg(quantity*list_price) average_sales from (select ss_quantity quantity ,ss_list_price list_price from store_sales ,date_dim where ss_sold_date_sk = d_date_sk and d_year between 2000 and 2000 + 2 union all select cs_quantity quantity ,cs_list_price list_price from catalog_sales ,date_dim where cs_sold_date_sk = d_date_sk and d_year between 2000 and 2000 + 2 union all select ws_quantity quantity ,ws_list_price list_price from web_sales ,date_dim where ws_sold_date_sk = d_date_sk and d_year between 2000 and 2000 + 2) x) select this_year.channel ty_channel ,this_year.i_brand_id ty_brand ,this_year.i_class_id ty_class ,this_year.i_category_id ty_category ,this_year.sales ty_sales ,this_year.number_sales ty_number_sales ,last_year.channel ly_channel ,last_year.i_brand_id ly_brand ,last_year.i_class_id ly_class ,last_year.i_category_id ly_category ,last_year.sales ly_sales ,last_year.number_sales ly_number_sales from (select 'store' channel, i_brand_id,i_class_id,i_category_id ,sum(ss_quantity*ss_list_price) sales, count(*) number_sales from store_sales ,item ,date_dim where ss_item_sk in (select ss_item_sk from cross_items) and ss_item_sk = i_item_sk and ss_sold_date_sk = d_date_sk and d_week_seq = (select d_week_seq from date_dim where d_year = 2000 + 1 and d_moy = 12 and d_dom = 15) group by i_brand_id,i_class_id,i_category_id having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) this_year, (select 'store' channel, i_brand_id,i_class_id ,i_category_id, sum(ss_quantity*ss_list_price) sales, count(*) number_sales from store_sales ,item ,date_dim where ss_item_sk in (select ss_item_sk from cross_items) and ss_item_sk = i_item_sk and ss_sold_date_sk = d_date_sk and d_week_seq = (select d_week_seq from date_dim where d_year = 2000 and d_moy = 12 and d_dom = 15) group by i_brand_id,i_class_id,i_category_id having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) last_year where this_year.i_brand_id= last_year.i_brand_id and this_year.i_class_id = last_year.i_class_id and this_year.i_category_id = last_year.i_category_id order by this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id limit 100; -- end query 1 in stream 0 using template query14.tpl ================================================ FILE: sample-queries-tpcds/query15.sql ================================================ -- start query 1 in stream 0 using template query15.tpl and seed 1819994127 select ca_zip ,sum(cs_sales_price) from catalog_sales ,customer ,customer_address ,date_dim where cs_bill_customer_sk = c_customer_sk and c_current_addr_sk = ca_address_sk and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') or ca_state in ('CA','WA','GA') or cs_sales_price > 500) and cs_sold_date_sk = d_date_sk and d_qoy = 2 and d_year = 1998 group by ca_zip order by ca_zip limit 100; -- end query 1 in stream 0 using template query15.tpl ================================================ FILE: sample-queries-tpcds/query16.sql ================================================ -- start query 1 in stream 0 using template query16.tpl and seed 171719422 select count(distinct cs_order_number) as `order count` ,sum(cs_ext_ship_cost) as `total shipping cost` ,sum(cs_net_profit) as `total net profit` from catalog_sales cs1 ,date_dim ,customer_address ,call_center where d_date between '1999-4-01' and (cast('1999-4-01' as date) + 60 days) and cs1.cs_ship_date_sk = d_date_sk and cs1.cs_ship_addr_sk = ca_address_sk and ca_state = 'IL' and cs1.cs_call_center_sk = cc_call_center_sk and cc_county in ('Richland County','Bronx County','Maverick County','Mesa County', 'Raleigh County' ) and exists (select * from catalog_sales cs2 where cs1.cs_order_number = cs2.cs_order_number and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) and not exists(select * from catalog_returns cr1 where cs1.cs_order_number = cr1.cr_order_number) order by count(distinct cs_order_number) limit 100; -- end query 1 in stream 0 using template query16.tpl ================================================ FILE: sample-queries-tpcds/query17.sql ================================================ -- start query 1 in stream 0 using template query17.tpl and seed 1819994127 select i_item_id ,i_item_desc ,s_state ,count(ss_quantity) as store_sales_quantitycount ,avg(ss_quantity) as store_sales_quantityave ,stddev_samp(ss_quantity) as store_sales_quantitystdev ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov ,count(sr_return_quantity) as store_returns_quantitycount ,avg(sr_return_quantity) as store_returns_quantityave ,stddev_samp(sr_return_quantity) as store_returns_quantitystdev ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave ,stddev_samp(cs_quantity) as catalog_sales_quantitystdev ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov from store_sales ,store_returns ,catalog_sales ,date_dim d1 ,date_dim d2 ,date_dim d3 ,store ,item where d1.d_quarter_name = '2000Q1' and d1.d_date_sk = ss_sold_date_sk and i_item_sk = ss_item_sk and s_store_sk = ss_store_sk and ss_customer_sk = sr_customer_sk and ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number and sr_returned_date_sk = d2.d_date_sk and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') and sr_customer_sk = cs_bill_customer_sk and sr_item_sk = cs_item_sk and cs_sold_date_sk = d3.d_date_sk and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') group by i_item_id ,i_item_desc ,s_state order by i_item_id ,i_item_desc ,s_state limit 100; -- end query 1 in stream 0 using template query17.tpl ================================================ FILE: sample-queries-tpcds/query18.sql ================================================ -- start query 1 in stream 0 using template query18.tpl and seed 1978355063 select i_item_id, ca_country, ca_state, ca_county, avg( cast(cs_quantity as decimal(12,2))) agg1, avg( cast(cs_list_price as decimal(12,2))) agg2, avg( cast(cs_coupon_amt as decimal(12,2))) agg3, avg( cast(cs_sales_price as decimal(12,2))) agg4, avg( cast(cs_net_profit as decimal(12,2))) agg5, avg( cast(c_birth_year as decimal(12,2))) agg6, avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 from catalog_sales, customer_demographics cd1, customer_demographics cd2, customer, customer_address, date_dim, item where cs_sold_date_sk = d_date_sk and cs_item_sk = i_item_sk and cs_bill_cdemo_sk = cd1.cd_demo_sk and cs_bill_customer_sk = c_customer_sk and cd1.cd_gender = 'M' and cd1.cd_education_status = 'Unknown' and c_current_cdemo_sk = cd2.cd_demo_sk and c_current_addr_sk = ca_address_sk and c_birth_month in (5,1,4,7,8,9) and d_year = 2002 and ca_state in ('AR','TX','NC' ,'GA','MS','WV','AL') group by rollup (i_item_id, ca_country, ca_state, ca_county) order by ca_country, ca_state, ca_county, i_item_id limit 100; -- end query 1 in stream 0 using template query18.tpl ================================================ FILE: sample-queries-tpcds/query19.sql ================================================ -- start query 1 in stream 0 using template query19.tpl and seed 1930872976 select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, sum(ss_ext_sales_price) ext_price from date_dim, store_sales, item,customer,customer_address,store where d_date_sk = ss_sold_date_sk and ss_item_sk = i_item_sk and i_manager_id=16 and d_moy=12 and d_year=1998 and ss_customer_sk = c_customer_sk and c_current_addr_sk = ca_address_sk and substr(ca_zip,1,5) <> substr(s_zip,1,5) and ss_store_sk = s_store_sk group by i_brand ,i_brand_id ,i_manufact_id ,i_manufact order by ext_price desc ,i_brand ,i_brand_id ,i_manufact_id ,i_manufact limit 100 ; -- end query 1 in stream 0 using template query19.tpl ================================================ FILE: sample-queries-tpcds/query2.sql ================================================ -- start query 1 in stream 0 using template query2.tpl and seed 1819994127 with wscs as (select sold_date_sk ,sales_price from (select ws_sold_date_sk sold_date_sk ,ws_ext_sales_price sales_price from web_sales) x union all (select cs_sold_date_sk sold_date_sk ,cs_ext_sales_price sales_price from catalog_sales)), wswscs as (select d_week_seq, sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales from wscs ,date_dim where d_date_sk = sold_date_sk group by d_week_seq) select d_week_seq1 ,round(sun_sales1/sun_sales2,2) ,round(mon_sales1/mon_sales2,2) ,round(tue_sales1/tue_sales2,2) ,round(wed_sales1/wed_sales2,2) ,round(thu_sales1/thu_sales2,2) ,round(fri_sales1/fri_sales2,2) ,round(sat_sales1/sat_sales2,2) from (select wswscs.d_week_seq d_week_seq1 ,sun_sales sun_sales1 ,mon_sales mon_sales1 ,tue_sales tue_sales1 ,wed_sales wed_sales1 ,thu_sales thu_sales1 ,fri_sales fri_sales1 ,sat_sales sat_sales1 from wswscs,date_dim where date_dim.d_week_seq = wswscs.d_week_seq and d_year = 1998) y, (select wswscs.d_week_seq d_week_seq2 ,sun_sales sun_sales2 ,mon_sales mon_sales2 ,tue_sales tue_sales2 ,wed_sales wed_sales2 ,thu_sales thu_sales2 ,fri_sales fri_sales2 ,sat_sales sat_sales2 from wswscs ,date_dim where date_dim.d_week_seq = wswscs.d_week_seq and d_year = 1998+1) z where d_week_seq1=d_week_seq2-53 order by d_week_seq1; -- end query 1 in stream 0 using template query2.tpl ================================================ FILE: sample-queries-tpcds/query20.sql ================================================ -- start query 1 in stream 0 using template query20.tpl and seed 345591136 select i_item_id ,i_item_desc ,i_category ,i_class ,i_current_price ,sum(cs_ext_sales_price) as itemrevenue ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over (partition by i_class) as revenueratio from catalog_sales ,item ,date_dim where cs_item_sk = i_item_sk and i_category in ('Shoes', 'Electronics', 'Children') and cs_sold_date_sk = d_date_sk and d_date between cast('2001-03-14' as date) and (cast('2001-03-14' as date) + 30 days) group by i_item_id ,i_item_desc ,i_category ,i_class ,i_current_price order by i_category ,i_class ,i_item_id ,i_item_desc ,revenueratio limit 100; -- end query 1 in stream 0 using template query20.tpl ================================================ FILE: sample-queries-tpcds/query21.sql ================================================ -- start query 1 in stream 0 using template query21.tpl and seed 1819994127 select * from(select w_warehouse_name ,i_item_id ,sum(case when (cast(d_date as date) < cast ('1999-03-20' as date)) then inv_quantity_on_hand else 0 end) as inv_before ,sum(case when (cast(d_date as date) >= cast ('1999-03-20' as date)) then inv_quantity_on_hand else 0 end) as inv_after from inventory ,warehouse ,item ,date_dim where i_current_price between 0.99 and 1.49 and i_item_sk = inv_item_sk and inv_warehouse_sk = w_warehouse_sk and inv_date_sk = d_date_sk and d_date between (cast ('1999-03-20' as date) - 30 days) and (cast ('1999-03-20' as date) + 30 days) group by w_warehouse_name, i_item_id) x where (case when inv_before > 0 then inv_after / inv_before else null end) between 2.0/3.0 and 3.0/2.0 order by w_warehouse_name ,i_item_id limit 100; -- end query 1 in stream 0 using template query21.tpl ================================================ FILE: sample-queries-tpcds/query22.sql ================================================ -- start query 1 in stream 0 using template query22.tpl and seed 1819994127 select i_product_name ,i_brand ,i_class ,i_category ,avg(inv_quantity_on_hand) qoh from inventory ,date_dim ,item where inv_date_sk=d_date_sk and inv_item_sk=i_item_sk and d_month_seq between 1186 and 1186 + 11 group by rollup(i_product_name ,i_brand ,i_class ,i_category) order by qoh, i_product_name, i_brand, i_class, i_category limit 100; -- end query 1 in stream 0 using template query22.tpl ================================================ FILE: sample-queries-tpcds/query23.sql ================================================ -- start query 1 in stream 0 using template query23.tpl and seed 2031708268 with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt from store_sales ,date_dim ,item where ss_sold_date_sk = d_date_sk and ss_item_sk = i_item_sk and d_year in (2000,2000+1,2000+2,2000+3) group by substr(i_item_desc,1,30),i_item_sk,d_date having count(*) >4), max_store_sales as (select max(csales) tpcds_cmax from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales from store_sales ,customer ,date_dim where ss_customer_sk = c_customer_sk and ss_sold_date_sk = d_date_sk and d_year in (2000,2000+1,2000+2,2000+3) group by c_customer_sk) x), best_ss_customer as (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales from store_sales ,customer where ss_customer_sk = c_customer_sk group by c_customer_sk having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select * from max_store_sales)) select sum(sales) from (select cs_quantity*cs_list_price sales from catalog_sales ,date_dim where d_year = 2000 and d_moy = 3 and cs_sold_date_sk = d_date_sk and cs_item_sk in (select item_sk from frequent_ss_items) and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) union all select ws_quantity*ws_list_price sales from web_sales ,date_dim where d_year = 2000 and d_moy = 3 and ws_sold_date_sk = d_date_sk and ws_item_sk in (select item_sk from frequent_ss_items) and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) y limit 100; with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt from store_sales ,date_dim ,item where ss_sold_date_sk = d_date_sk and ss_item_sk = i_item_sk and d_year in (2000,2000 + 1,2000 + 2,2000 + 3) group by substr(i_item_desc,1,30),i_item_sk,d_date having count(*) >4), max_store_sales as (select max(csales) tpcds_cmax from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales from store_sales ,customer ,date_dim where ss_customer_sk = c_customer_sk and ss_sold_date_sk = d_date_sk and d_year in (2000,2000+1,2000+2,2000+3) group by c_customer_sk) x), best_ss_customer as (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales from store_sales ,customer where ss_customer_sk = c_customer_sk group by c_customer_sk having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select * from max_store_sales)) select c_last_name,c_first_name,sales from (select c_last_name,c_first_name,sum(cs_quantity*cs_list_price) sales from catalog_sales ,customer ,date_dim where d_year = 2000 and d_moy = 3 and cs_sold_date_sk = d_date_sk and cs_item_sk in (select item_sk from frequent_ss_items) and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) and cs_bill_customer_sk = c_customer_sk group by c_last_name,c_first_name union all select c_last_name,c_first_name,sum(ws_quantity*ws_list_price) sales from web_sales ,customer ,date_dim where d_year = 2000 and d_moy = 3 and ws_sold_date_sk = d_date_sk and ws_item_sk in (select item_sk from frequent_ss_items) and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer) and ws_bill_customer_sk = c_customer_sk group by c_last_name,c_first_name) y order by c_last_name,c_first_name,sales limit 100; -- end query 1 in stream 0 using template query23.tpl ================================================ FILE: sample-queries-tpcds/query24.sql ================================================ -- start query 1 in stream 0 using template query24.tpl and seed 1220860970 with ssales as (select c_last_name ,c_first_name ,s_store_name ,ca_state ,s_state ,i_color ,i_current_price ,i_manager_id ,i_units ,i_size ,sum(ss_sales_price) netpaid from store_sales ,store_returns ,store ,item ,customer ,customer_address where ss_ticket_number = sr_ticket_number and ss_item_sk = sr_item_sk and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk and c_current_addr_sk = ca_address_sk and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=10 group by c_last_name ,c_first_name ,s_store_name ,ca_state ,s_state ,i_color ,i_current_price ,i_manager_id ,i_units ,i_size) select c_last_name ,c_first_name ,s_store_name ,sum(netpaid) paid from ssales where i_color = 'snow' group by c_last_name ,c_first_name ,s_store_name having sum(netpaid) > (select 0.05*avg(netpaid) from ssales) order by c_last_name ,c_first_name ,s_store_name ; with ssales as (select c_last_name ,c_first_name ,s_store_name ,ca_state ,s_state ,i_color ,i_current_price ,i_manager_id ,i_units ,i_size ,sum(ss_sales_price) netpaid from store_sales ,store_returns ,store ,item ,customer ,customer_address where ss_ticket_number = sr_ticket_number and ss_item_sk = sr_item_sk and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk and c_current_addr_sk = ca_address_sk and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id = 10 group by c_last_name ,c_first_name ,s_store_name ,ca_state ,s_state ,i_color ,i_current_price ,i_manager_id ,i_units ,i_size) select c_last_name ,c_first_name ,s_store_name ,sum(netpaid) paid from ssales where i_color = 'chiffon' group by c_last_name ,c_first_name ,s_store_name having sum(netpaid) > (select 0.05*avg(netpaid) from ssales) order by c_last_name ,c_first_name ,s_store_name ; -- end query 1 in stream 0 using template query24.tpl ================================================ FILE: sample-queries-tpcds/query25.sql ================================================ -- start query 1 in stream 0 using template query25.tpl and seed 1819994127 select i_item_id ,i_item_desc ,s_store_id ,s_store_name ,sum(ss_net_profit) as store_sales_profit ,sum(sr_net_loss) as store_returns_loss ,sum(cs_net_profit) as catalog_sales_profit from store_sales ,store_returns ,catalog_sales ,date_dim d1 ,date_dim d2 ,date_dim d3 ,store ,item where d1.d_moy = 4 and d1.d_year = 2000 and d1.d_date_sk = ss_sold_date_sk and i_item_sk = ss_item_sk and s_store_sk = ss_store_sk and ss_customer_sk = sr_customer_sk and ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number and sr_returned_date_sk = d2.d_date_sk and d2.d_moy between 4 and 10 and d2.d_year = 2000 and sr_customer_sk = cs_bill_customer_sk and sr_item_sk = cs_item_sk and cs_sold_date_sk = d3.d_date_sk and d3.d_moy between 4 and 10 and d3.d_year = 2000 group by i_item_id ,i_item_desc ,s_store_id ,s_store_name order by i_item_id ,i_item_desc ,s_store_id ,s_store_name limit 100; -- end query 1 in stream 0 using template query25.tpl ================================================ FILE: sample-queries-tpcds/query26.sql ================================================ -- start query 1 in stream 0 using template query26.tpl and seed 1930872976 select i_item_id, avg(cs_quantity) agg1, avg(cs_list_price) agg2, avg(cs_coupon_amt) agg3, avg(cs_sales_price) agg4 from catalog_sales, customer_demographics, date_dim, item, promotion where cs_sold_date_sk = d_date_sk and cs_item_sk = i_item_sk and cs_bill_cdemo_sk = cd_demo_sk and cs_promo_sk = p_promo_sk and cd_gender = 'F' and cd_marital_status = 'S' and cd_education_status = 'College' and (p_channel_email = 'N' or p_channel_event = 'N') and d_year = 1998 group by i_item_id order by i_item_id limit 100; -- end query 1 in stream 0 using template query26.tpl ================================================ FILE: sample-queries-tpcds/query27.sql ================================================ -- start query 1 in stream 0 using template query27.tpl and seed 2017787633 select i_item_id, s_state, grouping(s_state) g_state, avg(ss_quantity) agg1, avg(ss_list_price) agg2, avg(ss_coupon_amt) agg3, avg(ss_sales_price) agg4 from store_sales, customer_demographics, date_dim, store, item where ss_sold_date_sk = d_date_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk and ss_cdemo_sk = cd_demo_sk and cd_gender = 'F' and cd_marital_status = 'U' and cd_education_status = '2 yr Degree' and d_year = 2000 and s_state in ('AL','IN', 'SC', 'NY', 'OH', 'FL') group by rollup (i_item_id, s_state) order by i_item_id ,s_state limit 100; -- end query 1 in stream 0 using template query27.tpl ================================================ FILE: sample-queries-tpcds/query28.sql ================================================ -- start query 1 in stream 0 using template query28.tpl and seed 444293455 select * from (select avg(ss_list_price) B1_LP ,count(ss_list_price) B1_CNT ,count(distinct ss_list_price) B1_CNTD from store_sales where ss_quantity between 0 and 5 and (ss_list_price between 73 and 73+10 or ss_coupon_amt between 7826 and 7826+1000 or ss_wholesale_cost between 70 and 70+20)) B1, (select avg(ss_list_price) B2_LP ,count(ss_list_price) B2_CNT ,count(distinct ss_list_price) B2_CNTD from store_sales where ss_quantity between 6 and 10 and (ss_list_price between 152 and 152+10 or ss_coupon_amt between 2196 and 2196+1000 or ss_wholesale_cost between 56 and 56+20)) B2, (select avg(ss_list_price) B3_LP ,count(ss_list_price) B3_CNT ,count(distinct ss_list_price) B3_CNTD from store_sales where ss_quantity between 11 and 15 and (ss_list_price between 53 and 53+10 or ss_coupon_amt between 3430 and 3430+1000 or ss_wholesale_cost between 13 and 13+20)) B3, (select avg(ss_list_price) B4_LP ,count(ss_list_price) B4_CNT ,count(distinct ss_list_price) B4_CNTD from store_sales where ss_quantity between 16 and 20 and (ss_list_price between 182 and 182+10 or ss_coupon_amt between 3262 and 3262+1000 or ss_wholesale_cost between 20 and 20+20)) B4, (select avg(ss_list_price) B5_LP ,count(ss_list_price) B5_CNT ,count(distinct ss_list_price) B5_CNTD from store_sales where ss_quantity between 21 and 25 and (ss_list_price between 85 and 85+10 or ss_coupon_amt between 3310 and 3310+1000 or ss_wholesale_cost between 37 and 37+20)) B5, (select avg(ss_list_price) B6_LP ,count(ss_list_price) B6_CNT ,count(distinct ss_list_price) B6_CNTD from store_sales where ss_quantity between 26 and 30 and (ss_list_price between 180 and 180+10 or ss_coupon_amt between 12592 and 12592+1000 or ss_wholesale_cost between 22 and 22+20)) B6 limit 100; -- end query 1 in stream 0 using template query28.tpl ================================================ FILE: sample-queries-tpcds/query29.sql ================================================ -- start query 1 in stream 0 using template query29.tpl and seed 2031708268 select i_item_id ,i_item_desc ,s_store_id ,s_store_name ,stddev_samp(ss_quantity) as store_sales_quantity ,stddev_samp(sr_return_quantity) as store_returns_quantity ,stddev_samp(cs_quantity) as catalog_sales_quantity from store_sales ,store_returns ,catalog_sales ,date_dim d1 ,date_dim d2 ,date_dim d3 ,store ,item where d1.d_moy = 4 and d1.d_year = 1998 and d1.d_date_sk = ss_sold_date_sk and i_item_sk = ss_item_sk and s_store_sk = ss_store_sk and ss_customer_sk = sr_customer_sk and ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number and sr_returned_date_sk = d2.d_date_sk and d2.d_moy between 4 and 4 + 3 and d2.d_year = 1998 and sr_customer_sk = cs_bill_customer_sk and sr_item_sk = cs_item_sk and cs_sold_date_sk = d3.d_date_sk and d3.d_year in (1998,1998+1,1998+2) group by i_item_id ,i_item_desc ,s_store_id ,s_store_name order by i_item_id ,i_item_desc ,s_store_id ,s_store_name limit 100; -- end query 1 in stream 0 using template query29.tpl ================================================ FILE: sample-queries-tpcds/query3.sql ================================================ -- start query 1 in stream 0 using template query3.tpl and seed 2031708268 select dt.d_year ,item.i_brand_id brand_id ,item.i_brand brand ,sum(ss_sales_price) sum_agg from date_dim dt ,store_sales ,item where dt.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and item.i_manufact_id = 816 and dt.d_moy=11 group by dt.d_year ,item.i_brand ,item.i_brand_id order by dt.d_year ,sum_agg desc ,brand_id limit 100; -- end query 1 in stream 0 using template query3.tpl ================================================ FILE: sample-queries-tpcds/query30.sql ================================================ -- start query 1 in stream 0 using template query30.tpl and seed 1819994127 with customer_total_return as (select wr_returning_customer_sk as ctr_customer_sk ,ca_state as ctr_state, sum(wr_return_amt) as ctr_total_return from web_returns ,date_dim ,customer_address where wr_returned_date_sk = d_date_sk and d_year =2000 and wr_returning_addr_sk = ca_address_sk group by wr_returning_customer_sk ,ca_state) select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address ,c_last_review_date_sk,ctr_total_return from customer_total_return ctr1 ,customer_address ,customer where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 from customer_total_return ctr2 where ctr1.ctr_state = ctr2.ctr_state) and ca_address_sk = c_current_addr_sk and ca_state = 'GA' and ctr1.ctr_customer_sk = c_customer_sk order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address ,c_last_review_date_sk,ctr_total_return limit 100; -- end query 1 in stream 0 using template query30.tpl ================================================ FILE: sample-queries-tpcds/query31.sql ================================================ -- start query 1 in stream 0 using template query31.tpl and seed 1819994127 with ss as (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales from store_sales,date_dim,customer_address where ss_sold_date_sk = d_date_sk and ss_addr_sk=ca_address_sk group by ca_county,d_qoy, d_year), ws as (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales from web_sales,date_dim,customer_address where ws_sold_date_sk = d_date_sk and ws_bill_addr_sk=ca_address_sk group by ca_county,d_qoy, d_year) select ss1.ca_county ,ss1.d_year ,ws2.web_sales/ws1.web_sales web_q1_q2_increase ,ss2.store_sales/ss1.store_sales store_q1_q2_increase ,ws3.web_sales/ws2.web_sales web_q2_q3_increase ,ss3.store_sales/ss2.store_sales store_q2_q3_increase from ss ss1 ,ss ss2 ,ss ss3 ,ws ws1 ,ws ws2 ,ws ws3 where ss1.d_qoy = 1 and ss1.d_year = 1999 and ss1.ca_county = ss2.ca_county and ss2.d_qoy = 2 and ss2.d_year = 1999 and ss2.ca_county = ss3.ca_county and ss3.d_qoy = 3 and ss3.d_year = 1999 and ss1.ca_county = ws1.ca_county and ws1.d_qoy = 1 and ws1.d_year = 1999 and ws1.ca_county = ws2.ca_county and ws2.d_qoy = 2 and ws2.d_year = 1999 and ws1.ca_county = ws3.ca_county and ws3.d_qoy = 3 and ws3.d_year =1999 and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end order by ss1.d_year; -- end query 1 in stream 0 using template query31.tpl ================================================ FILE: sample-queries-tpcds/query32.sql ================================================ -- start query 1 in stream 0 using template query32.tpl and seed 2031708268 select sum(cs_ext_discount_amt) as `excess discount amount` from catalog_sales ,item ,date_dim where i_manufact_id = 66 and i_item_sk = cs_item_sk and d_date between '2002-03-29' and (cast('2002-03-29' as date) + 90 days) and d_date_sk = cs_sold_date_sk and cs_ext_discount_amt > ( select 1.3 * avg(cs_ext_discount_amt) from catalog_sales ,date_dim where cs_item_sk = i_item_sk and d_date between '2002-03-29' and (cast('2002-03-29' as date) + 90 days) and d_date_sk = cs_sold_date_sk ) limit 100; -- end query 1 in stream 0 using template query32.tpl ================================================ FILE: sample-queries-tpcds/query33.sql ================================================ -- start query 1 in stream 0 using template query33.tpl and seed 1930872976 with ss as ( select i_manufact_id,sum(ss_ext_sales_price) total_sales from store_sales, date_dim, customer_address, item where i_manufact_id in (select i_manufact_id from item where i_category in ('Home')) and ss_item_sk = i_item_sk and ss_sold_date_sk = d_date_sk and d_year = 1998 and d_moy = 5 and ss_addr_sk = ca_address_sk and ca_gmt_offset = -6 group by i_manufact_id), cs as ( select i_manufact_id,sum(cs_ext_sales_price) total_sales from catalog_sales, date_dim, customer_address, item where i_manufact_id in (select i_manufact_id from item where i_category in ('Home')) and cs_item_sk = i_item_sk and cs_sold_date_sk = d_date_sk and d_year = 1998 and d_moy = 5 and cs_bill_addr_sk = ca_address_sk and ca_gmt_offset = -6 group by i_manufact_id), ws as ( select i_manufact_id,sum(ws_ext_sales_price) total_sales from web_sales, date_dim, customer_address, item where i_manufact_id in (select i_manufact_id from item where i_category in ('Home')) and ws_item_sk = i_item_sk and ws_sold_date_sk = d_date_sk and d_year = 1998 and d_moy = 5 and ws_bill_addr_sk = ca_address_sk and ca_gmt_offset = -6 group by i_manufact_id) select i_manufact_id ,sum(total_sales) total_sales from (select * from ss union all select * from cs union all select * from ws) tmp1 group by i_manufact_id order by total_sales limit 100; -- end query 1 in stream 0 using template query33.tpl ================================================ FILE: sample-queries-tpcds/query34.sql ================================================ -- start query 1 in stream 0 using template query34.tpl and seed 1971067816 select c_last_name ,c_first_name ,c_salutation ,c_preferred_cust_flag ,ss_ticket_number ,cnt from (select ss_ticket_number ,ss_customer_sk ,count(*) cnt from store_sales,date_dim,store,household_demographics where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) and (household_demographics.hd_buy_potential = '>10000' or household_demographics.hd_buy_potential = 'Unknown') and household_demographics.hd_vehicle_count > 0 and (case when household_demographics.hd_vehicle_count > 0 then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end) > 1.2 and date_dim.d_year in (2000,2000+1,2000+2) and store.s_county in ('Salem County','Terrell County','Arthur County','Oglethorpe County', 'Lunenburg County','Perry County','Halifax County','Sumner County') group by ss_ticket_number,ss_customer_sk) dn,customer where ss_customer_sk = c_customer_sk and cnt between 15 and 20 order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc, ss_ticket_number; -- end query 1 in stream 0 using template query34.tpl ================================================ FILE: sample-queries-tpcds/query35.sql ================================================ -- start query 1 in stream 0 using template query35.tpl and seed 1930872976 select ca_state, cd_gender, cd_marital_status, cd_dep_count, count(*) cnt1, avg(cd_dep_count), min(cd_dep_count), stddev_samp(cd_dep_count), cd_dep_employed_count, count(*) cnt2, avg(cd_dep_employed_count), min(cd_dep_employed_count), stddev_samp(cd_dep_employed_count), cd_dep_college_count, count(*) cnt3, avg(cd_dep_college_count), min(cd_dep_college_count), stddev_samp(cd_dep_college_count) from customer c,customer_address ca,customer_demographics where c.c_current_addr_sk = ca.ca_address_sk and cd_demo_sk = c.c_current_cdemo_sk and exists (select * from store_sales,date_dim where c.c_customer_sk = ss_customer_sk and ss_sold_date_sk = d_date_sk and d_year = 2001 and d_qoy < 4) and (exists (select * from web_sales,date_dim where c.c_customer_sk = ws_bill_customer_sk and ws_sold_date_sk = d_date_sk and d_year = 2001 and d_qoy < 4) or exists (select * from catalog_sales,date_dim where c.c_customer_sk = cs_ship_customer_sk and cs_sold_date_sk = d_date_sk and d_year = 2001 and d_qoy < 4)) group by ca_state, cd_gender, cd_marital_status, cd_dep_count, cd_dep_employed_count, cd_dep_college_count order by ca_state, cd_gender, cd_marital_status, cd_dep_count, cd_dep_employed_count, cd_dep_college_count limit 100; -- end query 1 in stream 0 using template query35.tpl ================================================ FILE: sample-queries-tpcds/query36.sql ================================================ -- start query 1 in stream 0 using template query36.tpl and seed 1544728811 select sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin ,i_category ,i_class ,grouping(i_category)+grouping(i_class) as lochierarchy ,rank() over ( partition by grouping(i_category)+grouping(i_class), case when grouping(i_class) = 0 then i_category end order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent from store_sales ,date_dim d1 ,item ,store where d1.d_year = 1999 and d1.d_date_sk = ss_sold_date_sk and i_item_sk = ss_item_sk and s_store_sk = ss_store_sk and s_state in ('IN','AL','MI','MN', 'TN','LA','FL','NM') group by rollup(i_category,i_class) order by lochierarchy desc ,case when lochierarchy = 0 then i_category end ,rank_within_parent limit 100; -- end query 1 in stream 0 using template query36.tpl ================================================ FILE: sample-queries-tpcds/query37.sql ================================================ -- start query 1 in stream 0 using template query37.tpl and seed 301843662 select i_item_id ,i_item_desc ,i_current_price from item, inventory, date_dim, catalog_sales where i_current_price between 39 and 39 + 30 and inv_item_sk = i_item_sk and d_date_sk=inv_date_sk and d_date between cast('2001-01-16' as date) and (cast('2001-01-16' as date) + 60 days) and i_manufact_id in (765,886,889,728) and inv_quantity_on_hand between 100 and 500 and cs_item_sk = i_item_sk group by i_item_id,i_item_desc,i_current_price order by i_item_id limit 100; -- end query 1 in stream 0 using template query37.tpl ================================================ FILE: sample-queries-tpcds/query38.sql ================================================ -- start query 1 in stream 0 using template query38.tpl and seed 1819994127 select count(*) from ( select distinct c_last_name, c_first_name, d_date from store_sales, date_dim, customer where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_customer_sk = customer.c_customer_sk and d_month_seq between 1186 and 1186 + 11 intersect select distinct c_last_name, c_first_name, d_date from catalog_sales, date_dim, customer where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk and d_month_seq between 1186 and 1186 + 11 intersect select distinct c_last_name, c_first_name, d_date from web_sales, date_dim, customer where web_sales.ws_sold_date_sk = date_dim.d_date_sk and web_sales.ws_bill_customer_sk = customer.c_customer_sk and d_month_seq between 1186 and 1186 + 11 ) hot_cust limit 100; -- end query 1 in stream 0 using template query38.tpl ================================================ FILE: sample-queries-tpcds/query39.sql ================================================ -- start query 1 in stream 0 using template query39.tpl and seed 1327317894 with inv as (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy ,stdev,mean, case mean when 0 then null else stdev/mean end cov from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean from inventory ,item ,warehouse ,date_dim where inv_item_sk = i_item_sk and inv_warehouse_sk = w_warehouse_sk and inv_date_sk = d_date_sk and d_year =2000 group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo where case mean when 0 then 0 else stdev/mean end > 1) select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov from inv inv1,inv inv2 where inv1.i_item_sk = inv2.i_item_sk and inv1.w_warehouse_sk = inv2.w_warehouse_sk and inv1.d_moy=2 and inv2.d_moy=2+1 order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov ,inv2.d_moy,inv2.mean, inv2.cov ; with inv as (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy ,stdev,mean, case mean when 0 then null else stdev/mean end cov from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean from inventory ,item ,warehouse ,date_dim where inv_item_sk = i_item_sk and inv_warehouse_sk = w_warehouse_sk and inv_date_sk = d_date_sk and d_year =2000 group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo where case mean when 0 then 0 else stdev/mean end > 1) select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov from inv inv1,inv inv2 where inv1.i_item_sk = inv2.i_item_sk and inv1.w_warehouse_sk = inv2.w_warehouse_sk and inv1.d_moy=2 and inv2.d_moy=2+1 and inv1.cov > 1.5 order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov ,inv2.d_moy,inv2.mean, inv2.cov ; -- end query 1 in stream 0 using template query39.tpl ================================================ FILE: sample-queries-tpcds/query4.sql ================================================ -- start query 1 in stream 0 using template query4.tpl and seed 1819994127 with year_total as ( select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address ,d_year dyear ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total ,'s' sale_type from customer ,store_sales ,date_dim where c_customer_sk = ss_customer_sk and ss_sold_date_sk = d_date_sk group by c_customer_id ,c_first_name ,c_last_name ,c_preferred_cust_flag ,c_birth_country ,c_login ,c_email_address ,d_year union all select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address ,d_year dyear ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total ,'c' sale_type from customer ,catalog_sales ,date_dim where c_customer_sk = cs_bill_customer_sk and cs_sold_date_sk = d_date_sk group by c_customer_id ,c_first_name ,c_last_name ,c_preferred_cust_flag ,c_birth_country ,c_login ,c_email_address ,d_year union all select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address ,d_year dyear ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total ,'w' sale_type from customer ,web_sales ,date_dim where c_customer_sk = ws_bill_customer_sk and ws_sold_date_sk = d_date_sk group by c_customer_id ,c_first_name ,c_last_name ,c_preferred_cust_flag ,c_birth_country ,c_login ,c_email_address ,d_year ) select t_s_secyear.customer_id ,t_s_secyear.customer_first_name ,t_s_secyear.customer_last_name ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_c_firstyear ,year_total t_c_secyear ,year_total t_w_firstyear ,year_total t_w_secyear where t_s_secyear.customer_id = t_s_firstyear.customer_id and t_s_firstyear.customer_id = t_c_secyear.customer_id and t_s_firstyear.customer_id = t_c_firstyear.customer_id and t_s_firstyear.customer_id = t_w_firstyear.customer_id and t_s_firstyear.customer_id = t_w_secyear.customer_id and t_s_firstyear.sale_type = 's' and t_c_firstyear.sale_type = 'c' and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_c_secyear.sale_type = 'c' and t_w_secyear.sale_type = 'w' and t_s_firstyear.dyear = 1999 and t_s_secyear.dyear = 1999+1 and t_c_firstyear.dyear = 1999 and t_c_secyear.dyear = 1999+1 and t_w_firstyear.dyear = 1999 and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_c_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end order by t_s_secyear.customer_id ,t_s_secyear.customer_first_name ,t_s_secyear.customer_last_name ,t_s_secyear.customer_birth_country limit 100; -- end query 1 in stream 0 using template query4.tpl ================================================ FILE: sample-queries-tpcds/query40.sql ================================================ -- start query 1 in stream 0 using template query40.tpl and seed 1819994127 select w_state ,i_item_id ,sum(case when (cast(d_date as date) < cast ('2000-03-18' as date)) then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before ,sum(case when (cast(d_date as date) >= cast ('2000-03-18' as date)) then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after from catalog_sales left outer join catalog_returns on (cs_order_number = cr_order_number and cs_item_sk = cr_item_sk) ,warehouse ,item ,date_dim where i_current_price between 0.99 and 1.49 and i_item_sk = cs_item_sk and cs_warehouse_sk = w_warehouse_sk and cs_sold_date_sk = d_date_sk and d_date between (cast ('2000-03-18' as date) - 30 days) and (cast ('2000-03-18' as date) + 30 days) group by w_state,i_item_id order by w_state,i_item_id limit 100; -- end query 1 in stream 0 using template query40.tpl ================================================ FILE: sample-queries-tpcds/query41.sql ================================================ -- start query 1 in stream 0 using template query41.tpl and seed 1581015815 select distinct(i_product_name) from item i1 where i_manufact_id between 970 and 970+40 and (select count(*) as item_cnt from item where (i_manufact = i1.i_manufact and ((i_category = 'Women' and (i_color = 'frosted' or i_color = 'rose') and (i_units = 'Lb' or i_units = 'Gross') and (i_size = 'medium' or i_size = 'large') ) or (i_category = 'Women' and (i_color = 'chocolate' or i_color = 'black') and (i_units = 'Box' or i_units = 'Dram') and (i_size = 'economy' or i_size = 'petite') ) or (i_category = 'Men' and (i_color = 'slate' or i_color = 'magenta') and (i_units = 'Carton' or i_units = 'Bundle') and (i_size = 'N/A' or i_size = 'small') ) or (i_category = 'Men' and (i_color = 'cornflower' or i_color = 'firebrick') and (i_units = 'Pound' or i_units = 'Oz') and (i_size = 'medium' or i_size = 'large') ))) or (i_manufact = i1.i_manufact and ((i_category = 'Women' and (i_color = 'almond' or i_color = 'steel') and (i_units = 'Tsp' or i_units = 'Case') and (i_size = 'medium' or i_size = 'large') ) or (i_category = 'Women' and (i_color = 'purple' or i_color = 'aquamarine') and (i_units = 'Bunch' or i_units = 'Gram') and (i_size = 'economy' or i_size = 'petite') ) or (i_category = 'Men' and (i_color = 'lavender' or i_color = 'papaya') and (i_units = 'Pallet' or i_units = 'Cup') and (i_size = 'N/A' or i_size = 'small') ) or (i_category = 'Men' and (i_color = 'maroon' or i_color = 'cyan') and (i_units = 'Each' or i_units = 'N/A') and (i_size = 'medium' or i_size = 'large') )))) > 0 order by i_product_name limit 100; -- end query 1 in stream 0 using template query41.tpl ================================================ FILE: sample-queries-tpcds/query42.sql ================================================ -- start query 1 in stream 0 using template query42.tpl and seed 1819994127 select dt.d_year ,item.i_category_id ,item.i_category ,sum(ss_ext_sales_price) from date_dim dt ,store_sales ,item where dt.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and item.i_manager_id = 1 and dt.d_moy=12 and dt.d_year=1998 group by dt.d_year ,item.i_category_id ,item.i_category order by sum(ss_ext_sales_price) desc,dt.d_year ,item.i_category_id ,item.i_category limit 100 ; -- end query 1 in stream 0 using template query42.tpl ================================================ FILE: sample-queries-tpcds/query43.sql ================================================ -- start query 1 in stream 0 using template query43.tpl and seed 1819994127 select s_store_name, s_store_id, sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales from date_dim, store_sales, store where d_date_sk = ss_sold_date_sk and s_store_sk = ss_store_sk and s_gmt_offset = -6 and d_year = 2001 group by s_store_name, s_store_id order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales limit 100; -- end query 1 in stream 0 using template query43.tpl ================================================ FILE: sample-queries-tpcds/query44.sql ================================================ -- start query 1 in stream 0 using template query44.tpl and seed 1819994127 select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing from(select * from (select item_sk,rank() over (order by rank_col asc) rnk from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col from store_sales ss1 where ss_store_sk = 366 group by ss_item_sk having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col from store_sales where ss_store_sk = 366 and ss_cdemo_sk is null group by ss_store_sk))V1)V11 where rnk < 11) asceding, (select * from (select item_sk,rank() over (order by rank_col desc) rnk from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col from store_sales ss1 where ss_store_sk = 366 group by ss_item_sk having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col from store_sales where ss_store_sk = 366 and ss_cdemo_sk is null group by ss_store_sk))V2)V21 where rnk < 11) descending, item i1, item i2 where asceding.rnk = descending.rnk and i1.i_item_sk=asceding.item_sk and i2.i_item_sk=descending.item_sk order by asceding.rnk limit 100; -- end query 1 in stream 0 using template query44.tpl ================================================ FILE: sample-queries-tpcds/query45.sql ================================================ -- start query 1 in stream 0 using template query45.tpl and seed 2031708268 select ca_zip, ca_county, sum(ws_sales_price) from web_sales, customer, customer_address, date_dim, item where ws_bill_customer_sk = c_customer_sk and c_current_addr_sk = ca_address_sk and ws_item_sk = i_item_sk and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') or i_item_id in (select i_item_id from item where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) ) ) and ws_sold_date_sk = d_date_sk and d_qoy = 1 and d_year = 1998 group by ca_zip, ca_county order by ca_zip, ca_county limit 100; -- end query 1 in stream 0 using template query45.tpl ================================================ FILE: sample-queries-tpcds/query46.sql ================================================ -- start query 1 in stream 0 using template query46.tpl and seed 803547492 select c_last_name ,c_first_name ,ca_city ,bought_city ,ss_ticket_number ,amt,profit from (select ss_ticket_number ,ss_customer_sk ,ca_city bought_city ,sum(ss_coupon_amt) amt ,sum(ss_net_profit) profit from store_sales,date_dim,store,household_demographics,customer_address where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and store_sales.ss_addr_sk = customer_address.ca_address_sk and (household_demographics.hd_dep_count = 0 or household_demographics.hd_vehicle_count= 1) and date_dim.d_dow in (6,0) and date_dim.d_year in (2000,2000+1,2000+2) and store.s_city in ('Five Forks','Oakland','Fairview','Winchester','Farmington') group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr where ss_customer_sk = c_customer_sk and customer.c_current_addr_sk = current_addr.ca_address_sk and current_addr.ca_city <> bought_city order by c_last_name ,c_first_name ,ca_city ,bought_city ,ss_ticket_number limit 100; -- end query 1 in stream 0 using template query46.tpl ================================================ FILE: sample-queries-tpcds/query47.sql ================================================ -- start query 1 in stream 0 using template query47.tpl and seed 2031708268 with v1 as( select i_category, i_brand, s_store_name, s_company_name, d_year, d_moy, sum(ss_sales_price) sum_sales, avg(sum(ss_sales_price)) over (partition by i_category, i_brand, s_store_name, s_company_name, d_year) avg_monthly_sales, rank() over (partition by i_category, i_brand, s_store_name, s_company_name order by d_year, d_moy) rn from item, store_sales, date_dim, store where ss_item_sk = i_item_sk and ss_sold_date_sk = d_date_sk and ss_store_sk = s_store_sk and ( d_year = 1999 or ( d_year = 1999-1 and d_moy =12) or ( d_year = 1999+1 and d_moy =1) ) group by i_category, i_brand, s_store_name, s_company_name, d_year, d_moy), v2 as( select v1.s_store_name ,v1.d_year, v1.d_moy ,v1.avg_monthly_sales ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum from v1, v1 v1_lag, v1 v1_lead where v1.i_category = v1_lag.i_category and v1.i_category = v1_lead.i_category and v1.i_brand = v1_lag.i_brand and v1.i_brand = v1_lead.i_brand and v1.s_store_name = v1_lag.s_store_name and v1.s_store_name = v1_lead.s_store_name and v1.s_company_name = v1_lag.s_company_name and v1.s_company_name = v1_lead.s_company_name and v1.rn = v1_lag.rn + 1 and v1.rn = v1_lead.rn - 1) select * from v2 where d_year = 1999 and avg_monthly_sales > 0 and case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 order by sum_sales - avg_monthly_sales, sum_sales limit 100; -- end query 1 in stream 0 using template query47.tpl ================================================ FILE: sample-queries-tpcds/query48.sql ================================================ -- start query 1 in stream 0 using template query48.tpl and seed 622697896 select sum (ss_quantity) from store_sales, store, customer_demographics, customer_address, date_dim where s_store_sk = ss_store_sk and ss_sold_date_sk = d_date_sk and d_year = 1998 and ( ( cd_demo_sk = ss_cdemo_sk and cd_marital_status = 'M' and cd_education_status = 'Unknown' and ss_sales_price between 100.00 and 150.00 ) or ( cd_demo_sk = ss_cdemo_sk and cd_marital_status = 'W' and cd_education_status = 'College' and ss_sales_price between 50.00 and 100.00 ) or ( cd_demo_sk = ss_cdemo_sk and cd_marital_status = 'D' and cd_education_status = 'Primary' and ss_sales_price between 150.00 and 200.00 ) ) and ( ( ss_addr_sk = ca_address_sk and ca_country = 'United States' and ca_state in ('MI', 'GA', 'NH') and ss_net_profit between 0 and 2000 ) or (ss_addr_sk = ca_address_sk and ca_country = 'United States' and ca_state in ('TX', 'KY', 'SD') and ss_net_profit between 150 and 3000 ) or (ss_addr_sk = ca_address_sk and ca_country = 'United States' and ca_state in ('NY', 'OH', 'FL') and ss_net_profit between 50 and 25000 ) ) ; -- end query 1 in stream 0 using template query48.tpl ================================================ FILE: sample-queries-tpcds/query49.sql ================================================ -- start query 1 in stream 0 using template query49.tpl and seed 1819994127 select channel, item, return_ratio, return_rank, currency_rank from (select 'web' as channel ,web.item as item ,web.return_ratio as return_ratio ,web.return_rank as return_rank ,web.currency_rank as currency_rank from ( select item ,return_ratio ,currency_ratio ,rank() over (order by return_ratio) as return_rank ,rank() over (order by currency_ratio) as currency_rank from ( select ws.ws_item_sk as item ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as decimal(15,4))/ cast(sum(coalesce(ws.ws_quantity,0)) as decimal(15,4) )) as return_ratio ,(cast(sum(coalesce(wr.wr_return_amt,0)) as decimal(15,4))/ cast(sum(coalesce(ws.ws_net_paid,0)) as decimal(15,4) )) as currency_ratio from web_sales ws left outer join web_returns wr on (ws.ws_order_number = wr.wr_order_number and ws.ws_item_sk = wr.wr_item_sk) ,date_dim where wr.wr_return_amt > 10000 and ws.ws_net_profit > 1 and ws.ws_net_paid > 0 and ws.ws_quantity > 0 and ws_sold_date_sk = d_date_sk and d_year = 2000 and d_moy = 12 group by ws.ws_item_sk ) in_web ) web where ( web.return_rank <= 10 or web.currency_rank <= 10 ) union select 'catalog' as channel ,catalog.item as item ,catalog.return_ratio as return_ratio ,catalog.return_rank as return_rank ,catalog.currency_rank as currency_rank from ( select item ,return_ratio ,currency_ratio ,rank() over (order by return_ratio) as return_rank ,rank() over (order by currency_ratio) as currency_rank from ( select cs.cs_item_sk as item ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as decimal(15,4))/ cast(sum(coalesce(cs.cs_quantity,0)) as decimal(15,4) )) as return_ratio ,(cast(sum(coalesce(cr.cr_return_amount,0)) as decimal(15,4))/ cast(sum(coalesce(cs.cs_net_paid,0)) as decimal(15,4) )) as currency_ratio from catalog_sales cs left outer join catalog_returns cr on (cs.cs_order_number = cr.cr_order_number and cs.cs_item_sk = cr.cr_item_sk) ,date_dim where cr.cr_return_amount > 10000 and cs.cs_net_profit > 1 and cs.cs_net_paid > 0 and cs.cs_quantity > 0 and cs_sold_date_sk = d_date_sk and d_year = 2000 and d_moy = 12 group by cs.cs_item_sk ) in_cat ) catalog where ( catalog.return_rank <= 10 or catalog.currency_rank <=10 ) union select 'store' as channel ,store.item as item ,store.return_ratio as return_ratio ,store.return_rank as return_rank ,store.currency_rank as currency_rank from ( select item ,return_ratio ,currency_ratio ,rank() over (order by return_ratio) as return_rank ,rank() over (order by currency_ratio) as currency_rank from ( select sts.ss_item_sk as item ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as decimal(15,4) )) as return_ratio ,(cast(sum(coalesce(sr.sr_return_amt,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as decimal(15,4) )) as currency_ratio from store_sales sts left outer join store_returns sr on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) ,date_dim where sr.sr_return_amt > 10000 and sts.ss_net_profit > 1 and sts.ss_net_paid > 0 and sts.ss_quantity > 0 and ss_sold_date_sk = d_date_sk and d_year = 2000 and d_moy = 12 group by sts.ss_item_sk ) in_store ) store where ( store.return_rank <= 10 or store.currency_rank <= 10 ) ) y order by 1,4,5,2 limit 100; -- end query 1 in stream 0 using template query49.tpl ================================================ FILE: sample-queries-tpcds/query5.sql ================================================ -- start query 1 in stream 0 using template query5.tpl and seed 1819994127 with ssr as (select s_store_id, sum(sales_price) as sales, sum(profit) as profit, sum(return_amt) as returns, sum(net_loss) as profit_loss from ( select ss_store_sk as store_sk, ss_sold_date_sk as date_sk, ss_ext_sales_price as sales_price, ss_net_profit as profit, cast(0 as decimal(7,2)) as return_amt, cast(0 as decimal(7,2)) as net_loss from store_sales union all select sr_store_sk as store_sk, sr_returned_date_sk as date_sk, cast(0 as decimal(7,2)) as sales_price, cast(0 as decimal(7,2)) as profit, sr_return_amt as return_amt, sr_net_loss as net_loss from store_returns ) salesreturns, date_dim, store where date_sk = d_date_sk and d_date between cast('2000-08-19' as date) and (cast('2000-08-19' as date) + 14 days) and store_sk = s_store_sk group by s_store_id) , csr as (select cp_catalog_page_id, sum(sales_price) as sales, sum(profit) as profit, sum(return_amt) as returns, sum(net_loss) as profit_loss from ( select cs_catalog_page_sk as page_sk, cs_sold_date_sk as date_sk, cs_ext_sales_price as sales_price, cs_net_profit as profit, cast(0 as decimal(7,2)) as return_amt, cast(0 as decimal(7,2)) as net_loss from catalog_sales union all select cr_catalog_page_sk as page_sk, cr_returned_date_sk as date_sk, cast(0 as decimal(7,2)) as sales_price, cast(0 as decimal(7,2)) as profit, cr_return_amount as return_amt, cr_net_loss as net_loss from catalog_returns ) salesreturns, date_dim, catalog_page where date_sk = d_date_sk and d_date between cast('2000-08-19' as date) and (cast('2000-08-19' as date) + 14 days) and page_sk = cp_catalog_page_sk group by cp_catalog_page_id) , wsr as (select web_site_id, sum(sales_price) as sales, sum(profit) as profit, sum(return_amt) as returns, sum(net_loss) as profit_loss from ( select ws_web_site_sk as wsr_web_site_sk, ws_sold_date_sk as date_sk, ws_ext_sales_price as sales_price, ws_net_profit as profit, cast(0 as decimal(7,2)) as return_amt, cast(0 as decimal(7,2)) as net_loss from web_sales union all select ws_web_site_sk as wsr_web_site_sk, wr_returned_date_sk as date_sk, cast(0 as decimal(7,2)) as sales_price, cast(0 as decimal(7,2)) as profit, wr_return_amt as return_amt, wr_net_loss as net_loss from web_returns left outer join web_sales on ( wr_item_sk = ws_item_sk and wr_order_number = ws_order_number) ) salesreturns, date_dim, web_site where date_sk = d_date_sk and d_date between cast('2000-08-19' as date) and (cast('2000-08-19' as date) + 14 days) and wsr_web_site_sk = web_site_sk group by web_site_id) select channel , id , sum(sales) as sales , sum(returns) as returns , sum(profit) as profit from (select 'store channel' as channel , 'store' || s_store_id as id , sales , returns , (profit - profit_loss) as profit from ssr union all select 'catalog channel' as channel , 'catalog_page' || cp_catalog_page_id as id , sales , returns , (profit - profit_loss) as profit from csr union all select 'web channel' as channel , 'web_site' || web_site_id as id , sales , returns , (profit - profit_loss) as profit from wsr ) x group by rollup (channel, id) order by channel ,id limit 100; -- end query 1 in stream 0 using template query5.tpl ================================================ FILE: sample-queries-tpcds/query50.sql ================================================ -- start query 1 in stream 0 using template query50.tpl and seed 1819994127 select s_store_name ,s_company_id ,s_street_number ,s_street_name ,s_street_type ,s_suite_number ,s_city ,s_county ,s_state ,s_zip ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days` from store_sales ,store_returns ,store ,date_dim d1 ,date_dim d2 where d2.d_year = 1998 and d2.d_moy = 9 and ss_ticket_number = sr_ticket_number and ss_item_sk = sr_item_sk and ss_sold_date_sk = d1.d_date_sk and sr_returned_date_sk = d2.d_date_sk and ss_customer_sk = sr_customer_sk and ss_store_sk = s_store_sk group by s_store_name ,s_company_id ,s_street_number ,s_street_name ,s_street_type ,s_suite_number ,s_city ,s_county ,s_state ,s_zip order by s_store_name ,s_company_id ,s_street_number ,s_street_name ,s_street_type ,s_suite_number ,s_city ,s_county ,s_state ,s_zip limit 100; -- end query 1 in stream 0 using template query50.tpl ================================================ FILE: sample-queries-tpcds/query51.sql ================================================ -- start query 1 in stream 0 using template query51.tpl and seed 1819994127 WITH web_v1 as ( select ws_item_sk item_sk, d_date, sum(sum(ws_sales_price)) over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales from web_sales ,date_dim where ws_sold_date_sk=d_date_sk and d_month_seq between 1214 and 1214+11 and ws_item_sk is not NULL group by ws_item_sk, d_date), store_v1 as ( select ss_item_sk item_sk, d_date, sum(sum(ss_sales_price)) over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales from store_sales ,date_dim where ss_sold_date_sk=d_date_sk and d_month_seq between 1214 and 1214+11 and ss_item_sk is not NULL group by ss_item_sk, d_date) select * from (select item_sk ,d_date ,web_sales ,store_sales ,max(web_sales) over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative ,max(store_sales) over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk ,case when web.d_date is not null then web.d_date else store.d_date end d_date ,web.cume_sales web_sales ,store.cume_sales store_sales from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk and web.d_date = store.d_date) )x )y where web_cumulative > store_cumulative order by item_sk ,d_date limit 100; -- end query 1 in stream 0 using template query51.tpl ================================================ FILE: sample-queries-tpcds/query52.sql ================================================ -- start query 1 in stream 0 using template query52.tpl and seed 1819994127 select dt.d_year ,item.i_brand_id brand_id ,item.i_brand brand ,sum(ss_ext_sales_price) ext_price from date_dim dt ,store_sales ,item where dt.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and item.i_manager_id = 1 and dt.d_moy=12 and dt.d_year=2000 group by dt.d_year ,item.i_brand ,item.i_brand_id order by dt.d_year ,ext_price desc ,brand_id limit 100 ; -- end query 1 in stream 0 using template query52.tpl ================================================ FILE: sample-queries-tpcds/query53.sql ================================================ -- start query 1 in stream 0 using template query53.tpl and seed 1819994127 select * from (select i_manufact_id, sum(ss_sales_price) sum_sales, avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales from item, store_sales, date_dim, store where ss_item_sk = i_item_sk and ss_sold_date_sk = d_date_sk and ss_store_sk = s_store_sk and d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) and ((i_category in ('Books','Children','Electronics') and i_class in ('personal','portable','reference','self-help') and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', 'exportiunivamalg #9','scholaramalgamalg #9')) or(i_category in ('Women','Music','Men') and i_class in ('accessories','classical','fragrances','pants') and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', 'importoamalg #1'))) group by i_manufact_id, d_qoy ) tmp1 where case when avg_quarterly_sales > 0 then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales else null end > 0.1 order by avg_quarterly_sales, sum_sales, i_manufact_id limit 100; -- end query 1 in stream 0 using template query53.tpl ================================================ FILE: sample-queries-tpcds/query54.sql ================================================ -- start query 1 in stream 0 using template query54.tpl and seed 1930872976 with my_customers as ( select distinct c_customer_sk , c_current_addr_sk from ( select cs_sold_date_sk sold_date_sk, cs_bill_customer_sk customer_sk, cs_item_sk item_sk from catalog_sales union all select ws_sold_date_sk sold_date_sk, ws_bill_customer_sk customer_sk, ws_item_sk item_sk from web_sales ) cs_or_ws_sales, item, date_dim, customer where sold_date_sk = d_date_sk and item_sk = i_item_sk and i_category = 'Books' and i_class = 'business' and c_customer_sk = cs_or_ws_sales.customer_sk and d_moy = 2 and d_year = 2000 ) , my_revenue as ( select c_customer_sk, sum(ss_ext_sales_price) as revenue from my_customers, store_sales, customer_address, store, date_dim where c_current_addr_sk = ca_address_sk and ca_county = s_county and ca_state = s_state and ss_sold_date_sk = d_date_sk and c_customer_sk = ss_customer_sk and d_month_seq between (select distinct d_month_seq+1 from date_dim where d_year = 2000 and d_moy = 2) and (select distinct d_month_seq+3 from date_dim where d_year = 2000 and d_moy = 2) group by c_customer_sk ) , segments as (select cast((revenue/50) as int) as segment from my_revenue ) select segment, count(*) as num_customers, segment*50 as segment_base from segments group by segment order by segment, num_customers limit 100; -- end query 1 in stream 0 using template query54.tpl ================================================ FILE: sample-queries-tpcds/query55.sql ================================================ -- start query 1 in stream 0 using template query55.tpl and seed 2031708268 select i_brand_id brand_id, i_brand brand, sum(ss_ext_sales_price) ext_price from date_dim, store_sales, item where d_date_sk = ss_sold_date_sk and ss_item_sk = i_item_sk and i_manager_id=13 and d_moy=11 and d_year=1999 group by i_brand, i_brand_id order by ext_price desc, i_brand_id limit 100 ; -- end query 1 in stream 0 using template query55.tpl ================================================ FILE: sample-queries-tpcds/query56.sql ================================================ -- start query 1 in stream 0 using template query56.tpl and seed 1951559352 with ss as ( select i_item_id,sum(ss_ext_sales_price) total_sales from store_sales, date_dim, customer_address, item where i_item_id in (select i_item_id from item where i_color in ('chiffon','smoke','lace')) and ss_item_sk = i_item_sk and ss_sold_date_sk = d_date_sk and d_year = 2001 and d_moy = 5 and ss_addr_sk = ca_address_sk and ca_gmt_offset = -6 group by i_item_id), cs as ( select i_item_id,sum(cs_ext_sales_price) total_sales from catalog_sales, date_dim, customer_address, item where i_item_id in (select i_item_id from item where i_color in ('chiffon','smoke','lace')) and cs_item_sk = i_item_sk and cs_sold_date_sk = d_date_sk and d_year = 2001 and d_moy = 5 and cs_bill_addr_sk = ca_address_sk and ca_gmt_offset = -6 group by i_item_id), ws as ( select i_item_id,sum(ws_ext_sales_price) total_sales from web_sales, date_dim, customer_address, item where i_item_id in (select i_item_id from item where i_color in ('chiffon','smoke','lace')) and ws_item_sk = i_item_sk and ws_sold_date_sk = d_date_sk and d_year = 2001 and d_moy = 5 and ws_bill_addr_sk = ca_address_sk and ca_gmt_offset = -6 group by i_item_id) select i_item_id ,sum(total_sales) total_sales from (select * from ss union all select * from cs union all select * from ws) tmp1 group by i_item_id order by total_sales, i_item_id limit 100; -- end query 1 in stream 0 using template query56.tpl ================================================ FILE: sample-queries-tpcds/query57.sql ================================================ -- start query 1 in stream 0 using template query57.tpl and seed 2031708268 with v1 as( select i_category, i_brand, cc_name, d_year, d_moy, sum(cs_sales_price) sum_sales, avg(sum(cs_sales_price)) over (partition by i_category, i_brand, cc_name, d_year) avg_monthly_sales, rank() over (partition by i_category, i_brand, cc_name order by d_year, d_moy) rn from item, catalog_sales, date_dim, call_center where cs_item_sk = i_item_sk and cs_sold_date_sk = d_date_sk and cc_call_center_sk= cs_call_center_sk and ( d_year = 1999 or ( d_year = 1999-1 and d_moy =12) or ( d_year = 1999+1 and d_moy =1) ) group by i_category, i_brand, cc_name , d_year, d_moy), v2 as( select v1.i_category, v1.i_brand ,v1.d_year, v1.d_moy ,v1.avg_monthly_sales ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum from v1, v1 v1_lag, v1 v1_lead where v1.i_category = v1_lag.i_category and v1.i_category = v1_lead.i_category and v1.i_brand = v1_lag.i_brand and v1.i_brand = v1_lead.i_brand and v1. cc_name = v1_lag. cc_name and v1. cc_name = v1_lead. cc_name and v1.rn = v1_lag.rn + 1 and v1.rn = v1_lead.rn - 1) select * from v2 where d_year = 1999 and avg_monthly_sales > 0 and case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 order by sum_sales - avg_monthly_sales, avg_monthly_sales limit 100; -- end query 1 in stream 0 using template query57.tpl ================================================ FILE: sample-queries-tpcds/query58.sql ================================================ -- start query 1 in stream 0 using template query58.tpl and seed 1819994127 with ss_items as (select i_item_id item_id ,sum(ss_ext_sales_price) ss_item_rev from store_sales ,item ,date_dim where ss_item_sk = i_item_sk and d_date in (select d_date from date_dim where d_week_seq = (select d_week_seq from date_dim where d_date = '1998-02-21')) and ss_sold_date_sk = d_date_sk group by i_item_id), cs_items as (select i_item_id item_id ,sum(cs_ext_sales_price) cs_item_rev from catalog_sales ,item ,date_dim where cs_item_sk = i_item_sk and d_date in (select d_date from date_dim where d_week_seq = (select d_week_seq from date_dim where d_date = '1998-02-21')) and cs_sold_date_sk = d_date_sk group by i_item_id), ws_items as (select i_item_id item_id ,sum(ws_ext_sales_price) ws_item_rev from web_sales ,item ,date_dim where ws_item_sk = i_item_sk and d_date in (select d_date from date_dim where d_week_seq =(select d_week_seq from date_dim where d_date = '1998-02-21')) and ws_sold_date_sk = d_date_sk group by i_item_id) select ss_items.item_id ,ss_item_rev ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev ,cs_item_rev ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev ,ws_item_rev ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average from ss_items,cs_items,ws_items where ss_items.item_id=cs_items.item_id and ss_items.item_id=ws_items.item_id and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev order by item_id ,ss_item_rev limit 100; -- end query 1 in stream 0 using template query58.tpl ================================================ FILE: sample-queries-tpcds/query59.sql ================================================ -- start query 1 in stream 0 using template query59.tpl and seed 1819994127 with wss as (select d_week_seq, ss_store_sk, sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales from store_sales,date_dim where d_date_sk = ss_sold_date_sk group by d_week_seq,ss_store_sk ) select s_store_name1,s_store_id1,d_week_seq1 ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 ,tue_sales1/tue_sales2,wed_sales1/wed_sales2,thu_sales1/thu_sales2 ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 from (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 ,s_store_id s_store_id1,sun_sales sun_sales1 ,mon_sales mon_sales1,tue_sales tue_sales1 ,wed_sales wed_sales1,thu_sales thu_sales1 ,fri_sales fri_sales1,sat_sales sat_sales1 from wss,store,date_dim d where d.d_week_seq = wss.d_week_seq and ss_store_sk = s_store_sk and d_month_seq between 1205 and 1205 + 11) y, (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 ,s_store_id s_store_id2,sun_sales sun_sales2 ,mon_sales mon_sales2,tue_sales tue_sales2 ,wed_sales wed_sales2,thu_sales thu_sales2 ,fri_sales fri_sales2,sat_sales sat_sales2 from wss,store,date_dim d where d.d_week_seq = wss.d_week_seq and ss_store_sk = s_store_sk and d_month_seq between 1205+ 12 and 1205 + 23) x where s_store_id1=s_store_id2 and d_week_seq1=d_week_seq2-52 order by s_store_name1,s_store_id1,d_week_seq1 limit 100; -- end query 1 in stream 0 using template query59.tpl ================================================ FILE: sample-queries-tpcds/query6.sql ================================================ -- start query 1 in stream 0 using template query6.tpl and seed 1819994127 select a.ca_state state, count(*) cnt from customer_address a ,customer c ,store_sales s ,date_dim d ,item i where a.ca_address_sk = c.c_current_addr_sk and c.c_customer_sk = s.ss_customer_sk and s.ss_sold_date_sk = d.d_date_sk and s.ss_item_sk = i.i_item_sk and d.d_month_seq = (select distinct (d_month_seq) from date_dim where d_year = 2002 and d_moy = 3 ) and i.i_current_price > 1.2 * (select avg(j.i_current_price) from item j where j.i_category = i.i_category) group by a.ca_state having count(*) >= 10 order by cnt, a.ca_state limit 100; -- end query 1 in stream 0 using template query6.tpl ================================================ FILE: sample-queries-tpcds/query60.sql ================================================ -- start query 1 in stream 0 using template query60.tpl and seed 1930872976 with ss as ( select i_item_id,sum(ss_ext_sales_price) total_sales from store_sales, date_dim, customer_address, item where i_item_id in (select i_item_id from item where i_category in ('Children')) and ss_item_sk = i_item_sk and ss_sold_date_sk = d_date_sk and d_year = 1998 and d_moy = 10 and ss_addr_sk = ca_address_sk and ca_gmt_offset = -5 group by i_item_id), cs as ( select i_item_id,sum(cs_ext_sales_price) total_sales from catalog_sales, date_dim, customer_address, item where i_item_id in (select i_item_id from item where i_category in ('Children')) and cs_item_sk = i_item_sk and cs_sold_date_sk = d_date_sk and d_year = 1998 and d_moy = 10 and cs_bill_addr_sk = ca_address_sk and ca_gmt_offset = -5 group by i_item_id), ws as ( select i_item_id,sum(ws_ext_sales_price) total_sales from web_sales, date_dim, customer_address, item where i_item_id in (select i_item_id from item where i_category in ('Children')) and ws_item_sk = i_item_sk and ws_sold_date_sk = d_date_sk and d_year = 1998 and d_moy = 10 and ws_bill_addr_sk = ca_address_sk and ca_gmt_offset = -5 group by i_item_id) select i_item_id ,sum(total_sales) total_sales from (select * from ss union all select * from cs union all select * from ws) tmp1 group by i_item_id order by i_item_id ,total_sales limit 100; -- end query 1 in stream 0 using template query60.tpl ================================================ FILE: sample-queries-tpcds/query61.sql ================================================ -- start query 1 in stream 0 using template query61.tpl and seed 1930872976 select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 from (select sum(ss_ext_sales_price) promotions from store_sales ,store ,promotion ,date_dim ,customer ,customer_address ,item where ss_sold_date_sk = d_date_sk and ss_store_sk = s_store_sk and ss_promo_sk = p_promo_sk and ss_customer_sk= c_customer_sk and ca_address_sk = c_current_addr_sk and ss_item_sk = i_item_sk and ca_gmt_offset = -6 and i_category = 'Sports' and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') and s_gmt_offset = -6 and d_year = 2001 and d_moy = 12) promotional_sales, (select sum(ss_ext_sales_price) total from store_sales ,store ,date_dim ,customer ,customer_address ,item where ss_sold_date_sk = d_date_sk and ss_store_sk = s_store_sk and ss_customer_sk= c_customer_sk and ca_address_sk = c_current_addr_sk and ss_item_sk = i_item_sk and ca_gmt_offset = -6 and i_category = 'Sports' and s_gmt_offset = -6 and d_year = 2001 and d_moy = 12) all_sales order by promotions, total limit 100; -- end query 1 in stream 0 using template query61.tpl ================================================ FILE: sample-queries-tpcds/query62.sql ================================================ -- start query 1 in stream 0 using template query62.tpl and seed 1819994127 select substr(w_warehouse_name,1,20) ,sm_type ,web_name ,sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 30) and (ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 60) and (ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 90) and (ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 120) then 1 else 0 end) as `>120 days` from web_sales ,warehouse ,ship_mode ,web_site ,date_dim where d_month_seq between 1215 and 1215 + 11 and ws_ship_date_sk = d_date_sk and ws_warehouse_sk = w_warehouse_sk and ws_ship_mode_sk = sm_ship_mode_sk and ws_web_site_sk = web_site_sk group by substr(w_warehouse_name,1,20) ,sm_type ,web_name order by substr(w_warehouse_name,1,20) ,sm_type ,web_name limit 100; -- end query 1 in stream 0 using template query62.tpl ================================================ FILE: sample-queries-tpcds/query63.sql ================================================ -- start query 1 in stream 0 using template query63.tpl and seed 1819994127 select * from (select i_manager_id ,sum(ss_sales_price) sum_sales ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales from item ,store_sales ,date_dim ,store where ss_item_sk = i_item_sk and ss_sold_date_sk = d_date_sk and ss_store_sk = s_store_sk and d_month_seq in (1211,1211+1,1211+2,1211+3,1211+4,1211+5,1211+6,1211+7,1211+8,1211+9,1211+10,1211+11) and (( i_category in ('Books','Children','Electronics') and i_class in ('personal','portable','reference','self-help') and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', 'exportiunivamalg #9','scholaramalgamalg #9')) or( i_category in ('Women','Music','Men') and i_class in ('accessories','classical','fragrances','pants') and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', 'importoamalg #1'))) group by i_manager_id, d_moy) tmp1 where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 order by i_manager_id ,avg_monthly_sales ,sum_sales limit 100; -- end query 1 in stream 0 using template query63.tpl ================================================ FILE: sample-queries-tpcds/query64.sql ================================================ -- start query 1 in stream 0 using template query64.tpl and seed 1220860970 with cs_ui as (select cs_item_sk ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund from catalog_sales ,catalog_returns where cs_item_sk = cr_item_sk and cs_order_number = cr_order_number group by cs_item_sk having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), cross_sales as (select i_product_name product_name ,i_item_sk item_sk ,s_store_name store_name ,s_zip store_zip ,ad1.ca_street_number b_street_number ,ad1.ca_street_name b_street_name ,ad1.ca_city b_city ,ad1.ca_zip b_zip ,ad2.ca_street_number c_street_number ,ad2.ca_street_name c_street_name ,ad2.ca_city c_city ,ad2.ca_zip c_zip ,d1.d_year as syear ,d2.d_year as fsyear ,d3.d_year s2year ,count(*) cnt ,sum(ss_wholesale_cost) s1 ,sum(ss_list_price) s2 ,sum(ss_coupon_amt) s3 FROM store_sales ,store_returns ,cs_ui ,date_dim d1 ,date_dim d2 ,date_dim d3 ,store ,customer ,customer_demographics cd1 ,customer_demographics cd2 ,promotion ,household_demographics hd1 ,household_demographics hd2 ,customer_address ad1 ,customer_address ad2 ,income_band ib1 ,income_band ib2 ,item WHERE ss_store_sk = s_store_sk AND ss_sold_date_sk = d1.d_date_sk AND ss_customer_sk = c_customer_sk AND ss_cdemo_sk= cd1.cd_demo_sk AND ss_hdemo_sk = hd1.hd_demo_sk AND ss_addr_sk = ad1.ca_address_sk and ss_item_sk = i_item_sk and ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number and ss_item_sk = cs_ui.cs_item_sk and c_current_cdemo_sk = cd2.cd_demo_sk AND c_current_hdemo_sk = hd2.hd_demo_sk AND c_current_addr_sk = ad2.ca_address_sk and c_first_sales_date_sk = d2.d_date_sk and c_first_shipto_date_sk = d3.d_date_sk and ss_promo_sk = p_promo_sk and hd1.hd_income_band_sk = ib1.ib_income_band_sk and hd2.hd_income_band_sk = ib2.ib_income_band_sk and cd1.cd_marital_status <> cd2.cd_marital_status and i_color in ('azure','gainsboro','misty','blush','hot','lemon') and i_current_price between 80 and 80 + 10 and i_current_price between 80 + 1 and 80 + 15 group by i_product_name ,i_item_sk ,s_store_name ,s_zip ,ad1.ca_street_number ,ad1.ca_street_name ,ad1.ca_city ,ad1.ca_zip ,ad2.ca_street_number ,ad2.ca_street_name ,ad2.ca_city ,ad2.ca_zip ,d1.d_year ,d2.d_year ,d3.d_year ) select cs1.product_name ,cs1.store_name ,cs1.store_zip ,cs1.b_street_number ,cs1.b_street_name ,cs1.b_city ,cs1.b_zip ,cs1.c_street_number ,cs1.c_street_name ,cs1.c_city ,cs1.c_zip ,cs1.syear ,cs1.cnt ,cs1.s1 as s11 ,cs1.s2 as s21 ,cs1.s3 as s31 ,cs2.s1 as s12 ,cs2.s2 as s22 ,cs2.s3 as s32 ,cs2.syear ,cs2.cnt from cross_sales cs1,cross_sales cs2 where cs1.item_sk=cs2.item_sk and cs1.syear = 1999 and cs2.syear = 1999 + 1 and cs2.cnt <= cs1.cnt and cs1.store_name = cs2.store_name and cs1.store_zip = cs2.store_zip order by cs1.product_name ,cs1.store_name ,cs2.cnt ,cs1.s1 ,cs2.s1; -- end query 1 in stream 0 using template query64.tpl ================================================ FILE: sample-queries-tpcds/query65.sql ================================================ -- start query 1 in stream 0 using template query65.tpl and seed 1819994127 select s_store_name, i_item_desc, sc.revenue, i_current_price, i_wholesale_cost, i_brand from store, item, (select ss_store_sk, avg(revenue) as ave from (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue from store_sales, date_dim where ss_sold_date_sk = d_date_sk and d_month_seq between 1186 and 1186+11 group by ss_store_sk, ss_item_sk) sa group by ss_store_sk) sb, (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue from store_sales, date_dim where ss_sold_date_sk = d_date_sk and d_month_seq between 1186 and 1186+11 group by ss_store_sk, ss_item_sk) sc where sb.ss_store_sk = sc.ss_store_sk and sc.revenue <= 0.1 * sb.ave and s_store_sk = sc.ss_store_sk and i_item_sk = sc.ss_item_sk order by s_store_name, i_item_desc limit 100; -- end query 1 in stream 0 using template query65.tpl ================================================ FILE: sample-queries-tpcds/query66.sql ================================================ -- start query 1 in stream 0 using template query66.tpl and seed 2042478054 select w_warehouse_name ,w_warehouse_sq_ft ,w_city ,w_county ,w_state ,w_country ,ship_carriers ,year ,sum(jan_sales) as jan_sales ,sum(feb_sales) as feb_sales ,sum(mar_sales) as mar_sales ,sum(apr_sales) as apr_sales ,sum(may_sales) as may_sales ,sum(jun_sales) as jun_sales ,sum(jul_sales) as jul_sales ,sum(aug_sales) as aug_sales ,sum(sep_sales) as sep_sales ,sum(oct_sales) as oct_sales ,sum(nov_sales) as nov_sales ,sum(dec_sales) as dec_sales ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot ,sum(jan_net) as jan_net ,sum(feb_net) as feb_net ,sum(mar_net) as mar_net ,sum(apr_net) as apr_net ,sum(may_net) as may_net ,sum(jun_net) as jun_net ,sum(jul_net) as jul_net ,sum(aug_net) as aug_net ,sum(sep_net) as sep_net ,sum(oct_net) as oct_net ,sum(nov_net) as nov_net ,sum(dec_net) as dec_net from ( select w_warehouse_name ,w_warehouse_sq_ft ,w_city ,w_county ,w_state ,w_country ,'MSC' || ',' || 'GERMA' as ship_carriers ,d_year as year ,sum(case when d_moy = 1 then ws_sales_price* ws_quantity else 0 end) as jan_sales ,sum(case when d_moy = 2 then ws_sales_price* ws_quantity else 0 end) as feb_sales ,sum(case when d_moy = 3 then ws_sales_price* ws_quantity else 0 end) as mar_sales ,sum(case when d_moy = 4 then ws_sales_price* ws_quantity else 0 end) as apr_sales ,sum(case when d_moy = 5 then ws_sales_price* ws_quantity else 0 end) as may_sales ,sum(case when d_moy = 6 then ws_sales_price* ws_quantity else 0 end) as jun_sales ,sum(case when d_moy = 7 then ws_sales_price* ws_quantity else 0 end) as jul_sales ,sum(case when d_moy = 8 then ws_sales_price* ws_quantity else 0 end) as aug_sales ,sum(case when d_moy = 9 then ws_sales_price* ws_quantity else 0 end) as sep_sales ,sum(case when d_moy = 10 then ws_sales_price* ws_quantity else 0 end) as oct_sales ,sum(case when d_moy = 11 then ws_sales_price* ws_quantity else 0 end) as nov_sales ,sum(case when d_moy = 12 then ws_sales_price* ws_quantity else 0 end) as dec_sales ,sum(case when d_moy = 1 then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as jan_net ,sum(case when d_moy = 2 then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as feb_net ,sum(case when d_moy = 3 then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as mar_net ,sum(case when d_moy = 4 then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as apr_net ,sum(case when d_moy = 5 then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as may_net ,sum(case when d_moy = 6 then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as jun_net ,sum(case when d_moy = 7 then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as jul_net ,sum(case when d_moy = 8 then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as aug_net ,sum(case when d_moy = 9 then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as sep_net ,sum(case when d_moy = 10 then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as oct_net ,sum(case when d_moy = 11 then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as nov_net ,sum(case when d_moy = 12 then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as dec_net from web_sales ,warehouse ,date_dim ,time_dim ,ship_mode where ws_warehouse_sk = w_warehouse_sk and ws_sold_date_sk = d_date_sk and ws_sold_time_sk = t_time_sk and ws_ship_mode_sk = sm_ship_mode_sk and d_year = 2001 and t_time between 9453 and 9453+28800 and sm_carrier in ('MSC','GERMA') group by w_warehouse_name ,w_warehouse_sq_ft ,w_city ,w_county ,w_state ,w_country ,d_year union all select w_warehouse_name ,w_warehouse_sq_ft ,w_city ,w_county ,w_state ,w_country ,'MSC' || ',' || 'GERMA' as ship_carriers ,d_year as year ,sum(case when d_moy = 1 then cs_ext_list_price* cs_quantity else 0 end) as jan_sales ,sum(case when d_moy = 2 then cs_ext_list_price* cs_quantity else 0 end) as feb_sales ,sum(case when d_moy = 3 then cs_ext_list_price* cs_quantity else 0 end) as mar_sales ,sum(case when d_moy = 4 then cs_ext_list_price* cs_quantity else 0 end) as apr_sales ,sum(case when d_moy = 5 then cs_ext_list_price* cs_quantity else 0 end) as may_sales ,sum(case when d_moy = 6 then cs_ext_list_price* cs_quantity else 0 end) as jun_sales ,sum(case when d_moy = 7 then cs_ext_list_price* cs_quantity else 0 end) as jul_sales ,sum(case when d_moy = 8 then cs_ext_list_price* cs_quantity else 0 end) as aug_sales ,sum(case when d_moy = 9 then cs_ext_list_price* cs_quantity else 0 end) as sep_sales ,sum(case when d_moy = 10 then cs_ext_list_price* cs_quantity else 0 end) as oct_sales ,sum(case when d_moy = 11 then cs_ext_list_price* cs_quantity else 0 end) as nov_sales ,sum(case when d_moy = 12 then cs_ext_list_price* cs_quantity else 0 end) as dec_sales ,sum(case when d_moy = 1 then cs_net_paid_inc_ship * cs_quantity else 0 end) as jan_net ,sum(case when d_moy = 2 then cs_net_paid_inc_ship * cs_quantity else 0 end) as feb_net ,sum(case when d_moy = 3 then cs_net_paid_inc_ship * cs_quantity else 0 end) as mar_net ,sum(case when d_moy = 4 then cs_net_paid_inc_ship * cs_quantity else 0 end) as apr_net ,sum(case when d_moy = 5 then cs_net_paid_inc_ship * cs_quantity else 0 end) as may_net ,sum(case when d_moy = 6 then cs_net_paid_inc_ship * cs_quantity else 0 end) as jun_net ,sum(case when d_moy = 7 then cs_net_paid_inc_ship * cs_quantity else 0 end) as jul_net ,sum(case when d_moy = 8 then cs_net_paid_inc_ship * cs_quantity else 0 end) as aug_net ,sum(case when d_moy = 9 then cs_net_paid_inc_ship * cs_quantity else 0 end) as sep_net ,sum(case when d_moy = 10 then cs_net_paid_inc_ship * cs_quantity else 0 end) as oct_net ,sum(case when d_moy = 11 then cs_net_paid_inc_ship * cs_quantity else 0 end) as nov_net ,sum(case when d_moy = 12 then cs_net_paid_inc_ship * cs_quantity else 0 end) as dec_net from catalog_sales ,warehouse ,date_dim ,time_dim ,ship_mode where cs_warehouse_sk = w_warehouse_sk and cs_sold_date_sk = d_date_sk and cs_sold_time_sk = t_time_sk and cs_ship_mode_sk = sm_ship_mode_sk and d_year = 2001 and t_time between 9453 AND 9453+28800 and sm_carrier in ('MSC','GERMA') group by w_warehouse_name ,w_warehouse_sq_ft ,w_city ,w_county ,w_state ,w_country ,d_year ) x group by w_warehouse_name ,w_warehouse_sq_ft ,w_city ,w_county ,w_state ,w_country ,ship_carriers ,year order by w_warehouse_name limit 100; -- end query 1 in stream 0 using template query66.tpl ================================================ FILE: sample-queries-tpcds/query67.sql ================================================ -- start query 1 in stream 0 using template query67.tpl and seed 1819994127 select * from (select i_category ,i_class ,i_brand ,i_product_name ,d_year ,d_qoy ,d_moy ,s_store_id ,sumsales ,rank() over (partition by i_category order by sumsales desc) rk from (select i_category ,i_class ,i_brand ,i_product_name ,d_year ,d_qoy ,d_moy ,s_store_id ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales from store_sales ,date_dim ,store ,item where ss_sold_date_sk=d_date_sk and ss_item_sk=i_item_sk and ss_store_sk = s_store_sk and d_month_seq between 1185 and 1185+11 group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 where rk <= 100 order by i_category ,i_class ,i_brand ,i_product_name ,d_year ,d_qoy ,d_moy ,s_store_id ,sumsales ,rk limit 100; -- end query 1 in stream 0 using template query67.tpl ================================================ FILE: sample-queries-tpcds/query68.sql ================================================ -- start query 1 in stream 0 using template query68.tpl and seed 803547492 select c_last_name ,c_first_name ,ca_city ,bought_city ,ss_ticket_number ,extended_price ,extended_tax ,list_price from (select ss_ticket_number ,ss_customer_sk ,ca_city bought_city ,sum(ss_ext_sales_price) extended_price ,sum(ss_ext_list_price) list_price ,sum(ss_ext_tax) extended_tax from store_sales ,date_dim ,store ,household_demographics ,customer_address where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and store_sales.ss_addr_sk = customer_address.ca_address_sk and date_dim.d_dom between 1 and 2 and (household_demographics.hd_dep_count = 4 or household_demographics.hd_vehicle_count= 0) and date_dim.d_year in (1999,1999+1,1999+2) and store.s_city in ('Pleasant Hill','Bethel') group by ss_ticket_number ,ss_customer_sk ,ss_addr_sk,ca_city) dn ,customer ,customer_address current_addr where ss_customer_sk = c_customer_sk and customer.c_current_addr_sk = current_addr.ca_address_sk and current_addr.ca_city <> bought_city order by c_last_name ,ss_ticket_number limit 100; -- end query 1 in stream 0 using template query68.tpl ================================================ FILE: sample-queries-tpcds/query69.sql ================================================ -- start query 1 in stream 0 using template query69.tpl and seed 797269820 select cd_gender, cd_marital_status, cd_education_status, count(*) cnt1, cd_purchase_estimate, count(*) cnt2, cd_credit_rating, count(*) cnt3 from customer c,customer_address ca,customer_demographics where c.c_current_addr_sk = ca.ca_address_sk and ca_state in ('MO','MN','AZ') and cd_demo_sk = c.c_current_cdemo_sk and exists (select * from store_sales,date_dim where c.c_customer_sk = ss_customer_sk and ss_sold_date_sk = d_date_sk and d_year = 2003 and d_moy between 2 and 2+2) and (not exists (select * from web_sales,date_dim where c.c_customer_sk = ws_bill_customer_sk and ws_sold_date_sk = d_date_sk and d_year = 2003 and d_moy between 2 and 2+2) and not exists (select * from catalog_sales,date_dim where c.c_customer_sk = cs_ship_customer_sk and cs_sold_date_sk = d_date_sk and d_year = 2003 and d_moy between 2 and 2+2)) group by cd_gender, cd_marital_status, cd_education_status, cd_purchase_estimate, cd_credit_rating order by cd_gender, cd_marital_status, cd_education_status, cd_purchase_estimate, cd_credit_rating limit 100; -- end query 1 in stream 0 using template query69.tpl ================================================ FILE: sample-queries-tpcds/query7.sql ================================================ -- start query 1 in stream 0 using template query7.tpl and seed 1930872976 select i_item_id, avg(ss_quantity) agg1, avg(ss_list_price) agg2, avg(ss_coupon_amt) agg3, avg(ss_sales_price) agg4 from store_sales, customer_demographics, date_dim, item, promotion where ss_sold_date_sk = d_date_sk and ss_item_sk = i_item_sk and ss_cdemo_sk = cd_demo_sk and ss_promo_sk = p_promo_sk and cd_gender = 'F' and cd_marital_status = 'W' and cd_education_status = 'College' and (p_channel_email = 'N' or p_channel_event = 'N') and d_year = 2001 group by i_item_id order by i_item_id limit 100; -- end query 1 in stream 0 using template query7.tpl ================================================ FILE: sample-queries-tpcds/query70.sql ================================================ -- start query 1 in stream 0 using template query70.tpl and seed 1819994127 select sum(ss_net_profit) as total_sum ,s_state ,s_county ,grouping(s_state)+grouping(s_county) as lochierarchy ,rank() over ( partition by grouping(s_state)+grouping(s_county), case when grouping(s_county) = 0 then s_state end order by sum(ss_net_profit) desc) as rank_within_parent from store_sales ,date_dim d1 ,store where d1.d_month_seq between 1218 and 1218+11 and d1.d_date_sk = ss_sold_date_sk and s_store_sk = ss_store_sk and s_state in ( select s_state from (select s_state as s_state, rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking from store_sales, store, date_dim where d_month_seq between 1218 and 1218+11 and d_date_sk = ss_sold_date_sk and s_store_sk = ss_store_sk group by s_state ) tmp1 where ranking <= 5 ) group by rollup(s_state,s_county) order by lochierarchy desc ,case when lochierarchy = 0 then s_state end ,rank_within_parent limit 100; -- end query 1 in stream 0 using template query70.tpl ================================================ FILE: sample-queries-tpcds/query71.sql ================================================ -- start query 1 in stream 0 using template query71.tpl and seed 2031708268 select i_brand_id brand_id, i_brand brand,t_hour,t_minute, sum(ext_price) ext_price from item, (select ws_ext_sales_price as ext_price, ws_sold_date_sk as sold_date_sk, ws_item_sk as sold_item_sk, ws_sold_time_sk as time_sk from web_sales,date_dim where d_date_sk = ws_sold_date_sk and d_moy=12 and d_year=2000 union all select cs_ext_sales_price as ext_price, cs_sold_date_sk as sold_date_sk, cs_item_sk as sold_item_sk, cs_sold_time_sk as time_sk from catalog_sales,date_dim where d_date_sk = cs_sold_date_sk and d_moy=12 and d_year=2000 union all select ss_ext_sales_price as ext_price, ss_sold_date_sk as sold_date_sk, ss_item_sk as sold_item_sk, ss_sold_time_sk as time_sk from store_sales,date_dim where d_date_sk = ss_sold_date_sk and d_moy=12 and d_year=2000 ) tmp,time_dim where sold_item_sk = i_item_sk and i_manager_id=1 and time_sk = t_time_sk and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') group by i_brand, i_brand_id,t_hour,t_minute order by ext_price desc, i_brand_id ; -- end query 1 in stream 0 using template query71.tpl ================================================ FILE: sample-queries-tpcds/query72.sql ================================================ -- start query 1 in stream 0 using template query72.tpl and seed 2031708268 select i_item_desc ,w_warehouse_name ,d1.d_week_seq ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo ,sum(case when p_promo_sk is not null then 1 else 0 end) promo ,count(*) total_cnt from catalog_sales join inventory on (cs_item_sk = inv_item_sk) join warehouse on (w_warehouse_sk=inv_warehouse_sk) join item on (i_item_sk = cs_item_sk) join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) join date_dim d2 on (inv_date_sk = d2.d_date_sk) join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) left outer join promotion on (cs_promo_sk=p_promo_sk) left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) where d1.d_week_seq = d2.d_week_seq and inv_quantity_on_hand < cs_quantity and d3.d_date > d1.d_date + INTERVAL(5) DAY and hd_buy_potential = '1001-5000' and d1.d_year = 2000 and cd_marital_status = 'D' group by i_item_desc,w_warehouse_name,d1.d_week_seq order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq limit 100; -- end query 1 in stream 0 using template query72.tpl ================================================ FILE: sample-queries-tpcds/query73.sql ================================================ -- start query 1 in stream 0 using template query73.tpl and seed 1971067816 select c_last_name ,c_first_name ,c_salutation ,c_preferred_cust_flag ,ss_ticket_number ,cnt from (select ss_ticket_number ,ss_customer_sk ,count(*) cnt from store_sales,date_dim,store,household_demographics where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and date_dim.d_dom between 1 and 2 and (household_demographics.hd_buy_potential = '>10000' or household_demographics.hd_buy_potential = '5001-10000') and household_demographics.hd_vehicle_count > 0 and case when household_demographics.hd_vehicle_count > 0 then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 and date_dim.d_year in (2000,2000+1,2000+2) and store.s_county in ('Lea County','Furnas County','Pennington County','Bronx County') group by ss_ticket_number,ss_customer_sk) dj,customer where ss_customer_sk = c_customer_sk and cnt between 1 and 5 order by cnt desc, c_last_name asc; -- end query 1 in stream 0 using template query73.tpl ================================================ FILE: sample-queries-tpcds/query74.sql ================================================ -- start query 1 in stream 0 using template query74.tpl and seed 1556717815 with year_total as ( select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year ,sum(ss_net_paid) year_total ,'s' sale_type from customer ,store_sales ,date_dim where c_customer_sk = ss_customer_sk and ss_sold_date_sk = d_date_sk and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name ,d_year union all select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year ,sum(ws_net_paid) year_total ,'w' sale_type from customer ,web_sales ,date_dim where c_customer_sk = ws_bill_customer_sk and ws_sold_date_sk = d_date_sk and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name ,d_year ) select t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_w_firstyear ,year_total t_w_secyear where t_s_secyear.customer_id = t_s_firstyear.customer_id and t_s_firstyear.customer_id = t_w_secyear.customer_id and t_s_firstyear.customer_id = t_w_firstyear.customer_id and t_s_firstyear.sale_type = 's' and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' and t_s_firstyear.year = 1998 and t_s_secyear.year = 1998+1 and t_w_firstyear.year = 1998 and t_w_secyear.year = 1998+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end order by 3,1,2 limit 100; -- end query 1 in stream 0 using template query74.tpl ================================================ FILE: sample-queries-tpcds/query75.sql ================================================ -- start query 1 in stream 0 using template query75.tpl and seed 1819994127 WITH all_sales AS ( SELECT d_year ,i_brand_id ,i_class_id ,i_category_id ,i_manufact_id ,SUM(sales_cnt) AS sales_cnt ,SUM(sales_amt) AS sales_amt FROM (SELECT d_year ,i_brand_id ,i_class_id ,i_category_id ,i_manufact_id ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk JOIN date_dim ON d_date_sk=cs_sold_date_sk LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number AND cs_item_sk=cr_item_sk) WHERE i_category='Sports' UNION SELECT d_year ,i_brand_id ,i_class_id ,i_category_id ,i_manufact_id ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt FROM store_sales JOIN item ON i_item_sk=ss_item_sk JOIN date_dim ON d_date_sk=ss_sold_date_sk LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number AND ss_item_sk=sr_item_sk) WHERE i_category='Sports' UNION SELECT d_year ,i_brand_id ,i_class_id ,i_category_id ,i_manufact_id ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt FROM web_sales JOIN item ON i_item_sk=ws_item_sk JOIN date_dim ON d_date_sk=ws_sold_date_sk LEFT JOIN web_returns ON (ws_order_number=wr_order_number AND ws_item_sk=wr_item_sk) WHERE i_category='Sports') sales_detail GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) SELECT prev_yr.d_year AS prev_year ,curr_yr.d_year AS year ,curr_yr.i_brand_id ,curr_yr.i_class_id ,curr_yr.i_category_id ,curr_yr.i_manufact_id ,prev_yr.sales_cnt AS prev_yr_cnt ,curr_yr.sales_cnt AS curr_yr_cnt ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff FROM all_sales curr_yr, all_sales prev_yr WHERE curr_yr.i_brand_id=prev_yr.i_brand_id AND curr_yr.i_class_id=prev_yr.i_class_id AND curr_yr.i_category_id=prev_yr.i_category_id AND curr_yr.i_manufact_id=prev_yr.i_manufact_id AND curr_yr.d_year=2001 AND prev_yr.d_year=2001-1 AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 ORDER BY sales_cnt_diff,sales_amt_diff limit 100; -- end query 1 in stream 0 using template query75.tpl ================================================ FILE: sample-queries-tpcds/query76.sql ================================================ -- start query 1 in stream 0 using template query76.tpl and seed 2031708268 select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( SELECT 'store' as channel, 'ss_customer_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price FROM store_sales, item, date_dim WHERE ss_customer_sk IS NULL AND ss_sold_date_sk=d_date_sk AND ss_item_sk=i_item_sk UNION ALL SELECT 'web' as channel, 'ws_ship_addr_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price FROM web_sales, item, date_dim WHERE ws_ship_addr_sk IS NULL AND ws_sold_date_sk=d_date_sk AND ws_item_sk=i_item_sk UNION ALL SELECT 'catalog' as channel, 'cs_ship_mode_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price FROM catalog_sales, item, date_dim WHERE cs_ship_mode_sk IS NULL AND cs_sold_date_sk=d_date_sk AND cs_item_sk=i_item_sk) foo GROUP BY channel, col_name, d_year, d_qoy, i_category ORDER BY channel, col_name, d_year, d_qoy, i_category limit 100; -- end query 1 in stream 0 using template query76.tpl ================================================ FILE: sample-queries-tpcds/query77.sql ================================================ -- start query 1 in stream 0 using template query77.tpl and seed 1819994127 with ss as (select s_store_sk, sum(ss_ext_sales_price) as sales, sum(ss_net_profit) as profit from store_sales, date_dim, store where ss_sold_date_sk = d_date_sk and d_date between cast('2000-08-16' as date) and (cast('2000-08-16' as date) + 30 days) and ss_store_sk = s_store_sk group by s_store_sk) , sr as (select s_store_sk, sum(sr_return_amt) as returns, sum(sr_net_loss) as profit_loss from store_returns, date_dim, store where sr_returned_date_sk = d_date_sk and d_date between cast('2000-08-16' as date) and (cast('2000-08-16' as date) + 30 days) and sr_store_sk = s_store_sk group by s_store_sk), cs as (select cs_call_center_sk, sum(cs_ext_sales_price) as sales, sum(cs_net_profit) as profit from catalog_sales, date_dim where cs_sold_date_sk = d_date_sk and d_date between cast('2000-08-16' as date) and (cast('2000-08-16' as date) + 30 days) group by cs_call_center_sk ), cr as (select cr_call_center_sk, sum(cr_return_amount) as returns, sum(cr_net_loss) as profit_loss from catalog_returns, date_dim where cr_returned_date_sk = d_date_sk and d_date between cast('2000-08-16' as date) and (cast('2000-08-16' as date) + 30 days) group by cr_call_center_sk ), ws as ( select wp_web_page_sk, sum(ws_ext_sales_price) as sales, sum(ws_net_profit) as profit from web_sales, date_dim, web_page where ws_sold_date_sk = d_date_sk and d_date between cast('2000-08-16' as date) and (cast('2000-08-16' as date) + 30 days) and ws_web_page_sk = wp_web_page_sk group by wp_web_page_sk), wr as (select wp_web_page_sk, sum(wr_return_amt) as returns, sum(wr_net_loss) as profit_loss from web_returns, date_dim, web_page where wr_returned_date_sk = d_date_sk and d_date between cast('2000-08-16' as date) and (cast('2000-08-16' as date) + 30 days) and wr_web_page_sk = wp_web_page_sk group by wp_web_page_sk) select channel , id , sum(sales) as sales , sum(returns) as returns , sum(profit) as profit from (select 'store channel' as channel , ss.s_store_sk as id , sales , coalesce(returns, 0) as returns , (profit - coalesce(profit_loss,0)) as profit from ss left join sr on ss.s_store_sk = sr.s_store_sk union all select 'catalog channel' as channel , cs_call_center_sk as id , sales , returns , (profit - profit_loss) as profit from cs , cr union all select 'web channel' as channel , ws.wp_web_page_sk as id , sales , coalesce(returns, 0) returns , (profit - coalesce(profit_loss,0)) as profit from ws left join wr on ws.wp_web_page_sk = wr.wp_web_page_sk ) x group by rollup (channel, id) order by channel ,id limit 100; -- end query 1 in stream 0 using template query77.tpl ================================================ FILE: sample-queries-tpcds/query78.sql ================================================ -- start query 1 in stream 0 using template query78.tpl and seed 1819994127 with ws as (select d_year AS ws_sold_year, ws_item_sk, ws_bill_customer_sk ws_customer_sk, sum(ws_quantity) ws_qty, sum(ws_wholesale_cost) ws_wc, sum(ws_sales_price) ws_sp from web_sales left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk join date_dim on ws_sold_date_sk = d_date_sk where wr_order_number is null group by d_year, ws_item_sk, ws_bill_customer_sk ), cs as (select d_year AS cs_sold_year, cs_item_sk, cs_bill_customer_sk cs_customer_sk, sum(cs_quantity) cs_qty, sum(cs_wholesale_cost) cs_wc, sum(cs_sales_price) cs_sp from catalog_sales left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk join date_dim on cs_sold_date_sk = d_date_sk where cr_order_number is null group by d_year, cs_item_sk, cs_bill_customer_sk ), ss as (select d_year AS ss_sold_year, ss_item_sk, ss_customer_sk, sum(ss_quantity) ss_qty, sum(ss_wholesale_cost) ss_wc, sum(ss_sales_price) ss_sp from store_sales left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk join date_dim on ss_sold_date_sk = d_date_sk where sr_ticket_number is null group by d_year, ss_item_sk, ss_customer_sk ) select ss_customer_sk, round(ss_qty/(coalesce(ws_qty,0)+coalesce(cs_qty,0)),2) ratio, ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price from ss left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=ss_item_sk and cs_customer_sk=ss_customer_sk) where (coalesce(ws_qty,0)>0 or coalesce(cs_qty, 0)>0) and ss_sold_year=2001 order by ss_customer_sk, ss_qty desc, ss_wc desc, ss_sp desc, other_chan_qty, other_chan_wholesale_cost, other_chan_sales_price, ratio limit 100; -- end query 1 in stream 0 using template query78.tpl ================================================ FILE: sample-queries-tpcds/query79.sql ================================================ -- start query 1 in stream 0 using template query79.tpl and seed 2031708268 select c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit from (select ss_ticket_number ,ss_customer_sk ,store.s_city ,sum(ss_coupon_amt) amt ,sum(ss_net_profit) profit from store_sales,date_dim,store,household_demographics where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and (household_demographics.hd_dep_count = 0 or household_demographics.hd_vehicle_count > 3) and date_dim.d_dow = 1 and date_dim.d_year in (1998,1998+1,1998+2) and store.s_number_employees between 200 and 295 group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer where ss_customer_sk = c_customer_sk order by c_last_name,c_first_name,substr(s_city,1,30), profit limit 100; -- end query 1 in stream 0 using template query79.tpl ================================================ FILE: sample-queries-tpcds/query8.sql ================================================ -- start query 1 in stream 0 using template query8.tpl and seed 1766988859 select s_store_name ,sum(ss_net_profit) from store_sales ,date_dim ,store, (select ca_zip from ( SELECT substr(ca_zip,1,5) ca_zip FROM customer_address WHERE substr(ca_zip,1,5) IN ( '47602','16704','35863','28577','83910','36201', '58412','48162','28055','41419','80332', '38607','77817','24891','16226','18410', '21231','59345','13918','51089','20317', '17167','54585','67881','78366','47770', '18360','51717','73108','14440','21800', '89338','45859','65501','34948','25973', '73219','25333','17291','10374','18829', '60736','82620','41351','52094','19326', '25214','54207','40936','21814','79077', '25178','75742','77454','30621','89193', '27369','41232','48567','83041','71948', '37119','68341','14073','16891','62878', '49130','19833','24286','27700','40979', '50412','81504','94835','84844','71954', '39503','57649','18434','24987','12350', '86379','27413','44529','98569','16515', '27287','24255','21094','16005','56436', '91110','68293','56455','54558','10298', '83647','32754','27052','51766','19444', '13869','45645','94791','57631','20712', '37788','41807','46507','21727','71836', '81070','50632','88086','63991','20244', '31655','51782','29818','63792','68605', '94898','36430','57025','20601','82080', '33869','22728','35834','29086','92645', '98584','98072','11652','78093','57553', '43830','71144','53565','18700','90209', '71256','38353','54364','28571','96560', '57839','56355','50679','45266','84680', '34306','34972','48530','30106','15371', '92380','84247','92292','68852','13338', '34594','82602','70073','98069','85066', '47289','11686','98862','26217','47529', '63294','51793','35926','24227','14196', '24594','32489','99060','49472','43432', '49211','14312','88137','47369','56877', '20534','81755','15794','12318','21060', '73134','41255','63073','81003','73873', '66057','51184','51195','45676','92696', '70450','90669','98338','25264','38919', '59226','58581','60298','17895','19489', '52301','80846','95464','68770','51634', '19988','18367','18421','11618','67975', '25494','41352','95430','15734','62585', '97173','33773','10425','75675','53535', '17879','41967','12197','67998','79658', '59130','72592','14851','43933','68101', '50636','25717','71286','24660','58058', '72991','95042','15543','33122','69280', '11912','59386','27642','65177','17672', '33467','64592','36335','54010','18767', '63193','42361','49254','33113','33159', '36479','59080','11855','81963','31016', '49140','29392','41836','32958','53163', '13844','73146','23952','65148','93498', '14530','46131','58454','13376','13378', '83986','12320','17193','59852','46081', '98533','52389','13086','68843','31013', '13261','60560','13443','45533','83583', '11489','58218','19753','22911','25115', '86709','27156','32669','13123','51933', '39214','41331','66943','14155','69998', '49101','70070','35076','14242','73021', '59494','15782','29752','37914','74686', '83086','34473','15751','81084','49230', '91894','60624','17819','28810','63180', '56224','39459','55233','75752','43639', '55349','86057','62361','50788','31830', '58062','18218','85761','60083','45484', '21204','90229','70041','41162','35390', '16364','39500','68908','26689','52868', '81335','40146','11340','61527','61794', '71997','30415','59004','29450','58117', '69952','33562','83833','27385','61860', '96435','48333','23065','32961','84919', '61997','99132','22815','56600','68730', '48017','95694','32919','88217','27116', '28239','58032','18884','16791','21343', '97462','18569','75660','15475') intersect select ca_zip from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt FROM customer_address, customer WHERE ca_address_sk = c_current_addr_sk and c_preferred_cust_flag='Y' group by ca_zip having count(*) > 10)A1)A2) V1 where ss_store_sk = s_store_sk and ss_sold_date_sk = d_date_sk and d_qoy = 2 and d_year = 1998 and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) group by s_store_name order by s_store_name limit 100; -- end query 1 in stream 0 using template query8.tpl ================================================ FILE: sample-queries-tpcds/query80.sql ================================================ -- start query 1 in stream 0 using template query80.tpl and seed 1819994127 with ssr as (select s_store_id as store_id, sum(ss_ext_sales_price) as sales, sum(coalesce(sr_return_amt, 0)) as returns, sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit from store_sales left outer join store_returns on (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), date_dim, store, item, promotion where ss_sold_date_sk = d_date_sk and d_date between cast('2002-08-06' as date) and (cast('2002-08-06' as date) + 30 days) and ss_store_sk = s_store_sk and ss_item_sk = i_item_sk and i_current_price > 50 and ss_promo_sk = p_promo_sk and p_channel_tv = 'N' group by s_store_id) , csr as (select cp_catalog_page_id as catalog_page_id, sum(cs_ext_sales_price) as sales, sum(coalesce(cr_return_amount, 0)) as returns, sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit from catalog_sales left outer join catalog_returns on (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), date_dim, catalog_page, item, promotion where cs_sold_date_sk = d_date_sk and d_date between cast('2002-08-06' as date) and (cast('2002-08-06' as date) + 30 days) and cs_catalog_page_sk = cp_catalog_page_sk and cs_item_sk = i_item_sk and i_current_price > 50 and cs_promo_sk = p_promo_sk and p_channel_tv = 'N' group by cp_catalog_page_id) , wsr as (select web_site_id, sum(ws_ext_sales_price) as sales, sum(coalesce(wr_return_amt, 0)) as returns, sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit from web_sales left outer join web_returns on (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), date_dim, web_site, item, promotion where ws_sold_date_sk = d_date_sk and d_date between cast('2002-08-06' as date) and (cast('2002-08-06' as date) + 30 days) and ws_web_site_sk = web_site_sk and ws_item_sk = i_item_sk and i_current_price > 50 and ws_promo_sk = p_promo_sk and p_channel_tv = 'N' group by web_site_id) select channel , id , sum(sales) as sales , sum(returns) as returns , sum(profit) as profit from (select 'store channel' as channel , 'store' || store_id as id , sales , returns , profit from ssr union all select 'catalog channel' as channel , 'catalog_page' || catalog_page_id as id , sales , returns , profit from csr union all select 'web channel' as channel , 'web_site' || web_site_id as id , sales , returns , profit from wsr ) x group by rollup (channel, id) order by channel ,id limit 100; -- end query 1 in stream 0 using template query80.tpl ================================================ FILE: sample-queries-tpcds/query81.sql ================================================ -- start query 1 in stream 0 using template query81.tpl and seed 1819994127 with customer_total_return as (select cr_returning_customer_sk as ctr_customer_sk ,ca_state as ctr_state, sum(cr_return_amt_inc_tax) as ctr_total_return from catalog_returns ,date_dim ,customer_address where cr_returned_date_sk = d_date_sk and d_year =1998 and cr_returning_addr_sk = ca_address_sk group by cr_returning_customer_sk ,ca_state ) select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset ,ca_location_type,ctr_total_return from customer_total_return ctr1 ,customer_address ,customer where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 from customer_total_return ctr2 where ctr1.ctr_state = ctr2.ctr_state) and ca_address_sk = c_current_addr_sk and ca_state = 'TX' and ctr1.ctr_customer_sk = c_customer_sk order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset ,ca_location_type,ctr_total_return limit 100; -- end query 1 in stream 0 using template query81.tpl ================================================ FILE: sample-queries-tpcds/query82.sql ================================================ -- start query 1 in stream 0 using template query82.tpl and seed 55585014 select i_item_id ,i_item_desc ,i_current_price from item, inventory, date_dim, store_sales where i_current_price between 49 and 49+30 and inv_item_sk = i_item_sk and d_date_sk=inv_date_sk and d_date between cast('2001-01-28' as date) and (cast('2001-01-28' as date) + 60 days) and i_manufact_id in (80,675,292,17) and inv_quantity_on_hand between 100 and 500 and ss_item_sk = i_item_sk group by i_item_id,i_item_desc,i_current_price order by i_item_id limit 100; -- end query 1 in stream 0 using template query82.tpl ================================================ FILE: sample-queries-tpcds/query83.sql ================================================ -- start query 1 in stream 0 using template query83.tpl and seed 1930872976 with sr_items as (select i_item_id item_id, sum(sr_return_quantity) sr_item_qty from store_returns, item, date_dim where sr_item_sk = i_item_sk and d_date in (select d_date from date_dim where d_week_seq in (select d_week_seq from date_dim where d_date in ('2000-06-17','2000-08-22','2000-11-17'))) and sr_returned_date_sk = d_date_sk group by i_item_id), cr_items as (select i_item_id item_id, sum(cr_return_quantity) cr_item_qty from catalog_returns, item, date_dim where cr_item_sk = i_item_sk and d_date in (select d_date from date_dim where d_week_seq in (select d_week_seq from date_dim where d_date in ('2000-06-17','2000-08-22','2000-11-17'))) and cr_returned_date_sk = d_date_sk group by i_item_id), wr_items as (select i_item_id item_id, sum(wr_return_quantity) wr_item_qty from web_returns, item, date_dim where wr_item_sk = i_item_sk and d_date in (select d_date from date_dim where d_week_seq in (select d_week_seq from date_dim where d_date in ('2000-06-17','2000-08-22','2000-11-17'))) and wr_returned_date_sk = d_date_sk group by i_item_id) select sr_items.item_id ,sr_item_qty ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev ,cr_item_qty ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev ,wr_item_qty ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average from sr_items ,cr_items ,wr_items where sr_items.item_id=cr_items.item_id and sr_items.item_id=wr_items.item_id order by sr_items.item_id ,sr_item_qty limit 100; -- end query 1 in stream 0 using template query83.tpl ================================================ FILE: sample-queries-tpcds/query84.sql ================================================ -- start query 1 in stream 0 using template query84.tpl and seed 1819994127 select c_customer_id as customer_id , coalesce(c_last_name,'') || ', ' || coalesce(c_first_name,'') as customername from customer ,customer_address ,customer_demographics ,household_demographics ,income_band ,store_returns where ca_city = 'Hopewell' and c_current_addr_sk = ca_address_sk and ib_lower_bound >= 37855 and ib_upper_bound <= 37855 + 50000 and ib_income_band_sk = hd_income_band_sk and cd_demo_sk = c_current_cdemo_sk and hd_demo_sk = c_current_hdemo_sk and sr_cdemo_sk = cd_demo_sk order by c_customer_id limit 100; -- end query 1 in stream 0 using template query84.tpl ================================================ FILE: sample-queries-tpcds/query85.sql ================================================ -- start query 1 in stream 0 using template query85.tpl and seed 622697896 select substr(r_reason_desc,1,20) ,avg(ws_quantity) ,avg(wr_refunded_cash) ,avg(wr_fee) from web_sales, web_returns, web_page, customer_demographics cd1, customer_demographics cd2, customer_address, date_dim, reason where ws_web_page_sk = wp_web_page_sk and ws_item_sk = wr_item_sk and ws_order_number = wr_order_number and ws_sold_date_sk = d_date_sk and d_year = 2001 and cd1.cd_demo_sk = wr_refunded_cdemo_sk and cd2.cd_demo_sk = wr_returning_cdemo_sk and ca_address_sk = wr_refunded_addr_sk and r_reason_sk = wr_reason_sk and ( ( cd1.cd_marital_status = 'M' and cd1.cd_marital_status = cd2.cd_marital_status and cd1.cd_education_status = '4 yr Degree' and cd1.cd_education_status = cd2.cd_education_status and ws_sales_price between 100.00 and 150.00 ) or ( cd1.cd_marital_status = 'S' and cd1.cd_marital_status = cd2.cd_marital_status and cd1.cd_education_status = 'College' and cd1.cd_education_status = cd2.cd_education_status and ws_sales_price between 50.00 and 100.00 ) or ( cd1.cd_marital_status = 'D' and cd1.cd_marital_status = cd2.cd_marital_status and cd1.cd_education_status = 'Secondary' and cd1.cd_education_status = cd2.cd_education_status and ws_sales_price between 150.00 and 200.00 ) ) and ( ( ca_country = 'United States' and ca_state in ('TX', 'VA', 'CA') and ws_net_profit between 100 and 200 ) or ( ca_country = 'United States' and ca_state in ('AR', 'NE', 'MO') and ws_net_profit between 150 and 300 ) or ( ca_country = 'United States' and ca_state in ('IA', 'MS', 'WA') and ws_net_profit between 50 and 250 ) ) group by r_reason_desc order by substr(r_reason_desc,1,20) ,avg(ws_quantity) ,avg(wr_refunded_cash) ,avg(wr_fee) limit 100; -- end query 1 in stream 0 using template query85.tpl ================================================ FILE: sample-queries-tpcds/query86.sql ================================================ -- start query 1 in stream 0 using template query86.tpl and seed 1819994127 select sum(ws_net_paid) as total_sum ,i_category ,i_class ,grouping(i_category)+grouping(i_class) as lochierarchy ,rank() over ( partition by grouping(i_category)+grouping(i_class), case when grouping(i_class) = 0 then i_category end order by sum(ws_net_paid) desc) as rank_within_parent from web_sales ,date_dim d1 ,item where d1.d_month_seq between 1215 and 1215+11 and d1.d_date_sk = ws_sold_date_sk and i_item_sk = ws_item_sk group by rollup(i_category,i_class) order by lochierarchy desc, case when lochierarchy = 0 then i_category end, rank_within_parent limit 100; -- end query 1 in stream 0 using template query86.tpl ================================================ FILE: sample-queries-tpcds/query87.sql ================================================ -- start query 1 in stream 0 using template query87.tpl and seed 1819994127 select count(*) from ((select distinct c_last_name, c_first_name, d_date from store_sales, date_dim, customer where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_customer_sk = customer.c_customer_sk and d_month_seq between 1221 and 1221+11) except (select distinct c_last_name, c_first_name, d_date from catalog_sales, date_dim, customer where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk and d_month_seq between 1221 and 1221+11) except (select distinct c_last_name, c_first_name, d_date from web_sales, date_dim, customer where web_sales.ws_sold_date_sk = date_dim.d_date_sk and web_sales.ws_bill_customer_sk = customer.c_customer_sk and d_month_seq between 1221 and 1221+11) ) cool_cust ; -- end query 1 in stream 0 using template query87.tpl ================================================ FILE: sample-queries-tpcds/query88.sql ================================================ -- start query 1 in stream 0 using template query88.tpl and seed 318176889 select * from (select count(*) h8_30_to_9 from store_sales, household_demographics , time_dim, store where ss_sold_time_sk = time_dim.t_time_sk and ss_hdemo_sk = household_demographics.hd_demo_sk and ss_store_sk = s_store_sk and time_dim.t_hour = 8 and time_dim.t_minute >= 30 and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) and store.s_store_name = 'ese') s1, (select count(*) h9_to_9_30 from store_sales, household_demographics , time_dim, store where ss_sold_time_sk = time_dim.t_time_sk and ss_hdemo_sk = household_demographics.hd_demo_sk and ss_store_sk = s_store_sk and time_dim.t_hour = 9 and time_dim.t_minute < 30 and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) and store.s_store_name = 'ese') s2, (select count(*) h9_30_to_10 from store_sales, household_demographics , time_dim, store where ss_sold_time_sk = time_dim.t_time_sk and ss_hdemo_sk = household_demographics.hd_demo_sk and ss_store_sk = s_store_sk and time_dim.t_hour = 9 and time_dim.t_minute >= 30 and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) and store.s_store_name = 'ese') s3, (select count(*) h10_to_10_30 from store_sales, household_demographics , time_dim, store where ss_sold_time_sk = time_dim.t_time_sk and ss_hdemo_sk = household_demographics.hd_demo_sk and ss_store_sk = s_store_sk and time_dim.t_hour = 10 and time_dim.t_minute < 30 and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) and store.s_store_name = 'ese') s4, (select count(*) h10_30_to_11 from store_sales, household_demographics , time_dim, store where ss_sold_time_sk = time_dim.t_time_sk and ss_hdemo_sk = household_demographics.hd_demo_sk and ss_store_sk = s_store_sk and time_dim.t_hour = 10 and time_dim.t_minute >= 30 and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) and store.s_store_name = 'ese') s5, (select count(*) h11_to_11_30 from store_sales, household_demographics , time_dim, store where ss_sold_time_sk = time_dim.t_time_sk and ss_hdemo_sk = household_demographics.hd_demo_sk and ss_store_sk = s_store_sk and time_dim.t_hour = 11 and time_dim.t_minute < 30 and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) and store.s_store_name = 'ese') s6, (select count(*) h11_30_to_12 from store_sales, household_demographics , time_dim, store where ss_sold_time_sk = time_dim.t_time_sk and ss_hdemo_sk = household_demographics.hd_demo_sk and ss_store_sk = s_store_sk and time_dim.t_hour = 11 and time_dim.t_minute >= 30 and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) and store.s_store_name = 'ese') s7, (select count(*) h12_to_12_30 from store_sales, household_demographics , time_dim, store where ss_sold_time_sk = time_dim.t_time_sk and ss_hdemo_sk = household_demographics.hd_demo_sk and ss_store_sk = s_store_sk and time_dim.t_hour = 12 and time_dim.t_minute < 30 and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) and store.s_store_name = 'ese') s8 ; -- end query 1 in stream 0 using template query88.tpl ================================================ FILE: sample-queries-tpcds/query89.sql ================================================ -- start query 1 in stream 0 using template query89.tpl and seed 1719819282 select * from( select i_category, i_class, i_brand, s_store_name, s_company_name, d_moy, sum(ss_sales_price) sum_sales, avg(sum(ss_sales_price)) over (partition by i_category, i_brand, s_store_name, s_company_name) avg_monthly_sales from item, store_sales, date_dim, store where ss_item_sk = i_item_sk and ss_sold_date_sk = d_date_sk and ss_store_sk = s_store_sk and d_year in (2000) and ((i_category in ('Home','Music','Books') and i_class in ('glassware','classical','fiction') ) or (i_category in ('Jewelry','Sports','Women') and i_class in ('semi-precious','baseball','dresses') )) group by i_category, i_class, i_brand, s_store_name, s_company_name, d_moy) tmp1 where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 order by sum_sales - avg_monthly_sales, s_store_name limit 100; -- end query 1 in stream 0 using template query89.tpl ================================================ FILE: sample-queries-tpcds/query9.sql ================================================ -- start query 1 in stream 0 using template query9.tpl and seed 1490436826 select case when (select count(*) from store_sales where ss_quantity between 1 and 20) > 98972190 then (select avg(ss_ext_discount_amt) from store_sales where ss_quantity between 1 and 20) else (select avg(ss_net_profit) from store_sales where ss_quantity between 1 and 20) end bucket1 , case when (select count(*) from store_sales where ss_quantity between 21 and 40) > 160856845 then (select avg(ss_ext_discount_amt) from store_sales where ss_quantity between 21 and 40) else (select avg(ss_net_profit) from store_sales where ss_quantity between 21 and 40) end bucket2, case when (select count(*) from store_sales where ss_quantity between 41 and 60) > 12733327 then (select avg(ss_ext_discount_amt) from store_sales where ss_quantity between 41 and 60) else (select avg(ss_net_profit) from store_sales where ss_quantity between 41 and 60) end bucket3, case when (select count(*) from store_sales where ss_quantity between 61 and 80) > 96251173 then (select avg(ss_ext_discount_amt) from store_sales where ss_quantity between 61 and 80) else (select avg(ss_net_profit) from store_sales where ss_quantity between 61 and 80) end bucket4, case when (select count(*) from store_sales where ss_quantity between 81 and 100) > 80049606 then (select avg(ss_ext_discount_amt) from store_sales where ss_quantity between 81 and 100) else (select avg(ss_net_profit) from store_sales where ss_quantity between 81 and 100) end bucket5 from reason where r_reason_sk = 1 ; -- end query 1 in stream 0 using template query9.tpl ================================================ FILE: sample-queries-tpcds/query90.sql ================================================ -- start query 1 in stream 0 using template query90.tpl and seed 2031708268 select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio from ( select count(*) amc from web_sales, household_demographics , time_dim, web_page where ws_sold_time_sk = time_dim.t_time_sk and ws_ship_hdemo_sk = household_demographics.hd_demo_sk and ws_web_page_sk = web_page.wp_web_page_sk and time_dim.t_hour between 9 and 9+1 and household_demographics.hd_dep_count = 3 and web_page.wp_char_count between 5000 and 5200) at, ( select count(*) pmc from web_sales, household_demographics , time_dim, web_page where ws_sold_time_sk = time_dim.t_time_sk and ws_ship_hdemo_sk = household_demographics.hd_demo_sk and ws_web_page_sk = web_page.wp_web_page_sk and time_dim.t_hour between 16 and 16+1 and household_demographics.hd_dep_count = 3 and web_page.wp_char_count between 5000 and 5200) pt order by am_pm_ratio limit 100; -- end query 1 in stream 0 using template query90.tpl ================================================ FILE: sample-queries-tpcds/query91.sql ================================================ -- start query 1 in stream 0 using template query91.tpl and seed 1930872976 select cc_call_center_id Call_Center, cc_name Call_Center_Name, cc_manager Manager, sum(cr_net_loss) Returns_Loss from call_center, catalog_returns, date_dim, customer, customer_address, customer_demographics, household_demographics where cr_call_center_sk = cc_call_center_sk and cr_returned_date_sk = d_date_sk and cr_returning_customer_sk= c_customer_sk and cd_demo_sk = c_current_cdemo_sk and hd_demo_sk = c_current_hdemo_sk and ca_address_sk = c_current_addr_sk and d_year = 2000 and d_moy = 12 and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) and hd_buy_potential like 'Unknown%' and ca_gmt_offset = -7 group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status order by sum(cr_net_loss) desc; -- end query 1 in stream 0 using template query91.tpl ================================================ FILE: sample-queries-tpcds/query92.sql ================================================ -- start query 1 in stream 0 using template query92.tpl and seed 2031708268 select sum(ws_ext_discount_amt) as `Excess Discount Amount` from web_sales ,item ,date_dim where i_manufact_id = 356 and i_item_sk = ws_item_sk and d_date between '2001-03-12' and (cast('2001-03-12' as date) + 90 days) and d_date_sk = ws_sold_date_sk and ws_ext_discount_amt > ( SELECT 1.3 * avg(ws_ext_discount_amt) FROM web_sales ,date_dim WHERE ws_item_sk = i_item_sk and d_date between '2001-03-12' and (cast('2001-03-12' as date) + 90 days) and d_date_sk = ws_sold_date_sk ) order by sum(ws_ext_discount_amt) limit 100; -- end query 1 in stream 0 using template query92.tpl ================================================ FILE: sample-queries-tpcds/query93.sql ================================================ -- start query 1 in stream 0 using template query93.tpl and seed 1200409435 select ss_customer_sk ,sum(act_sales) sumsales from (select ss_item_sk ,ss_ticket_number ,ss_customer_sk ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price else (ss_quantity*ss_sales_price) end act_sales from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk and sr_ticket_number = ss_ticket_number) ,reason where sr_reason_sk = r_reason_sk and r_reason_desc = 'reason 66') t group by ss_customer_sk order by sumsales, ss_customer_sk limit 100; -- end query 1 in stream 0 using template query93.tpl ================================================ FILE: sample-queries-tpcds/query94.sql ================================================ -- start query 1 in stream 0 using template query94.tpl and seed 2031708268 select count(distinct ws_order_number) as `order count` ,sum(ws_ext_ship_cost) as `total shipping cost` ,sum(ws_net_profit) as `total net profit` from web_sales ws1 ,date_dim ,customer_address ,web_site where d_date between '1999-4-01' and (cast('1999-4-01' as date) + 60 days) and ws1.ws_ship_date_sk = d_date_sk and ws1.ws_ship_addr_sk = ca_address_sk and ca_state = 'NE' and ws1.ws_web_site_sk = web_site_sk and web_company_name = 'pri' and exists (select * from web_sales ws2 where ws1.ws_order_number = ws2.ws_order_number and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) and not exists(select * from web_returns wr1 where ws1.ws_order_number = wr1.wr_order_number) order by count(distinct ws_order_number) limit 100; -- end query 1 in stream 0 using template query94.tpl ================================================ FILE: sample-queries-tpcds/query95.sql ================================================ -- start query 1 in stream 0 using template query95.tpl and seed 2031708268 with ws_wh as (select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 from web_sales ws1,web_sales ws2 where ws1.ws_order_number = ws2.ws_order_number and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) select count(distinct ws_order_number) as `order count` ,sum(ws_ext_ship_cost) as `total shipping cost` ,sum(ws_net_profit) as `total net profit` from web_sales ws1 ,date_dim ,customer_address ,web_site where d_date between '2002-4-01' and (cast('2002-4-01' as date) + 60 days) and ws1.ws_ship_date_sk = d_date_sk and ws1.ws_ship_addr_sk = ca_address_sk and ca_state = 'AL' and ws1.ws_web_site_sk = web_site_sk and web_company_name = 'pri' and ws1.ws_order_number in (select ws_order_number from ws_wh) and ws1.ws_order_number in (select wr_order_number from web_returns,ws_wh where wr_order_number = ws_wh.ws_order_number) order by count(distinct ws_order_number) limit 100; -- end query 1 in stream 0 using template query95.tpl ================================================ FILE: sample-queries-tpcds/query96.sql ================================================ -- start query 1 in stream 0 using template query96.tpl and seed 1819994127 select count(*) from store_sales ,household_demographics ,time_dim, store where ss_sold_time_sk = time_dim.t_time_sk and ss_hdemo_sk = household_demographics.hd_demo_sk and ss_store_sk = s_store_sk and time_dim.t_hour = 16 and time_dim.t_minute >= 30 and household_demographics.hd_dep_count = 6 and store.s_store_name = 'ese' order by count(*) limit 100; -- end query 1 in stream 0 using template query96.tpl ================================================ FILE: sample-queries-tpcds/query97.sql ================================================ -- start query 1 in stream 0 using template query97.tpl and seed 1819994127 with ssci as ( select ss_customer_sk customer_sk ,ss_item_sk item_sk from store_sales,date_dim where ss_sold_date_sk = d_date_sk and d_month_seq between 1190 and 1190 + 11 group by ss_customer_sk ,ss_item_sk), csci as( select cs_bill_customer_sk customer_sk ,cs_item_sk item_sk from catalog_sales,date_dim where cs_sold_date_sk = d_date_sk and d_month_seq between 1190 and 1190 + 11 group by cs_bill_customer_sk ,cs_item_sk) select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk and ssci.item_sk = csci.item_sk) limit 100; -- end query 1 in stream 0 using template query97.tpl ================================================ FILE: sample-queries-tpcds/query98.sql ================================================ -- start query 1 in stream 0 using template query98.tpl and seed 345591136 select i_item_id ,i_item_desc ,i_category ,i_class ,i_current_price ,sum(ss_ext_sales_price) as itemrevenue ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over (partition by i_class) as revenueratio from store_sales ,item ,date_dim where ss_item_sk = i_item_sk and i_category in ('Home', 'Sports', 'Men') and ss_sold_date_sk = d_date_sk and d_date between cast('2002-01-05' as date) and (cast('2002-01-05' as date) + 30 days) group by i_item_id ,i_item_desc ,i_category ,i_class ,i_current_price order by i_category ,i_class ,i_item_id ,i_item_desc ,revenueratio; -- end query 1 in stream 0 using template query98.tpl ================================================ FILE: sample-queries-tpcds/query99.sql ================================================ -- start query 1 in stream 0 using template query99.tpl and seed 1819994127 select substr(w_warehouse_name,1,20) ,sm_type ,cc_name ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` from catalog_sales ,warehouse ,ship_mode ,call_center ,date_dim where d_month_seq between 1178 and 1178 + 11 and cs_ship_date_sk = d_date_sk and cs_warehouse_sk = w_warehouse_sk and cs_ship_mode_sk = sm_ship_mode_sk and cs_call_center_sk = cc_call_center_sk group by substr(w_warehouse_name,1,20) ,sm_type ,cc_name order by substr(w_warehouse_name,1,20) ,sm_type ,cc_name limit 100; -- end query 1 in stream 0 using template query99.tpl ================================================ FILE: sample-queries-tpch/README.md ================================================ Sample TPC-H Queries ==================== This directory contains sample TPC-H queries you can run once you have generated your data. Queries are compatible with Apache Hive 13 and up. ================================================ FILE: sample-queries-tpch/testbench-withATS.settings ================================================ set ambari.hive.db.schema.name=hive; set fs.file.impl.disable.cache=true; set fs.hdfs.impl.disable.cache=true; set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join=true; set hive.auto.convert.sortmerge.join=true; set hive.compactor.abortedtxn.threshold=1000; set hive.compactor.check.interval=300; set hive.compactor.delta.num.threshold=10; set hive.compactor.delta.pct.threshold=0.1f; set hive.compactor.initiator.on=false; set hive.compactor.worker.threads=0; set hive.compactor.worker.timeout=86400; set hive.compute.query.using.stats=true; set hive.enforce.bucketing=true; set hive.enforce.sorting=true; set hive.enforce.sortmergebucketmapjoin=true; set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.ATSHook; set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.ATSHook; set hive.exec.pre.hooks=org.apache.hadoop.hive.ql.hooks.ATSHook; set hive.execution.engine=mr; set hive.limit.pushdown.memory.usage=0.04; set hive.map.aggr=true; set hive.mapjoin.bucket.cache.size=10000; set hive.mapred.reduce.tasks.speculative.execution=false; set hive.metastore.cache.pinobjtypes=Table,Database,Type,FieldSchema,Order; set hive.metastore.client.socket.timeout=60; set hive.metastore.execute.setugi=true; set hive.metastore.warehouse.dir=/apps/hive/warehouse; set hive.optimize.bucketmapjoin.sortedmerge=false; set hive.optimize.bucketmapjoin=true; set hive.optimize.index.filter=true; set hive.optimize.reducededuplication.min.reducer=4; set hive.optimize.reducededuplication=true; set hive.orc.splits.include.file.footer=false; set hive.security.authorization.enabled=false; set hive.security.metastore.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider; set hive.server2.enable.doAs=false; set hive.server2.tez.default.queues=default; set hive.server2.tez.initialize.default.sessions=false; set hive.server2.tez.sessions.per.default.queue=1; set hive.stats.autogather=true; set hive.tez.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager; set hive.txn.max.open.batch=1000; set hive.txn.timeout=300; set hive.vectorized.execution.enabled=true; set hive.vectorized.groupby.checkinterval=1024; set hive.vectorized.groupby.flush.percent=1; set hive.vectorized.groupby.maxentries=1024; -- These values need to be tuned appropriately to your cluster. These examples are for reference. -- set hive.tez.container.size=4096; -- set hive.tez.java.opts=-Xmx3800m; -- set hive.auto.convert.join.noconditionaltask.size=1252698795; ================================================ FILE: sample-queries-tpch/testbench.settings ================================================ set ambari.hive.db.schema.name=hive; set fs.file.impl.disable.cache=true; set fs.hdfs.impl.disable.cache=true; set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join=true; set hive.auto.convert.sortmerge.join=true; set hive.compactor.abortedtxn.threshold=1000; set hive.compactor.check.interval=300; set hive.compactor.delta.num.threshold=10; set hive.compactor.delta.pct.threshold=0.1f; set hive.compactor.initiator.on=false; set hive.compactor.worker.threads=0; set hive.compactor.worker.timeout=86400; set hive.compute.query.using.stats=true; set hive.enforce.bucketing=true; set hive.enforce.sorting=true; set hive.enforce.sortmergebucketmapjoin=true; set hive.execution.engine=mr; set hive.limit.pushdown.memory.usage=0.04; set hive.map.aggr=true; set hive.mapjoin.bucket.cache.size=10000; set hive.mapred.reduce.tasks.speculative.execution=false; set hive.metastore.cache.pinobjtypes=Table,Database,Type,FieldSchema,Order; set hive.metastore.client.socket.timeout=60; set hive.metastore.execute.setugi=true; set hive.metastore.warehouse.dir=/apps/hive/warehouse; set hive.optimize.bucketmapjoin.sortedmerge=false; set hive.optimize.bucketmapjoin=true; set hive.optimize.index.filter=true; set hive.optimize.reducededuplication.min.reducer=4; set hive.optimize.reducededuplication=true; set hive.orc.splits.include.file.footer=false; set hive.security.authorization.enabled=false; set hive.security.metastore.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider; set hive.server2.enable.doAs=false; set hive.server2.tez.default.queues=default; set hive.server2.tez.initialize.default.sessions=false; set hive.server2.tez.sessions.per.default.queue=1; set hive.stats.autogather=true; set hive.tez.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager; set hive.txn.max.open.batch=1000; set hive.txn.timeout=300; set hive.vectorized.execution.enabled=true; set hive.vectorized.groupby.checkinterval=1024; set hive.vectorized.groupby.flush.percent=1; set hive.vectorized.groupby.maxentries=1024; -- These values need to be tuned appropriately to your cluster. These examples are for reference. -- set hive.tez.container.size=4096; -- set hive.tez.java.opts=-Xmx3800m; -- set hive.auto.convert.join.noconditionaltask.size=1252698795; ================================================ FILE: sample-queries-tpch/tpch_query1.sql ================================================ select l_returnflag, l_linestatus, sum(l_quantity) as sum_qty, sum(l_extendedprice) as sum_base_price, sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, avg(l_quantity) as avg_qty, avg(l_extendedprice) as avg_price, avg(l_discount) as avg_disc, count(*) as count_order from lineitem where l_shipdate <= '1998-09-16' group by l_returnflag, l_linestatus order by l_returnflag, l_linestatus; ================================================ FILE: sample-queries-tpch/tpch_query10.sql ================================================ select c_custkey, c_name, sum(l_extendedprice * (1 - l_discount)) as revenue, c_acctbal, n_name, c_address, c_phone, c_comment from customer, orders, lineitem, nation where c_custkey = o_custkey and l_orderkey = o_orderkey and o_orderdate >= '1993-07-01' and o_orderdate < '1993-10-01' and l_returnflag = 'R' and c_nationkey = n_nationkey group by c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment order by revenue desc limit 20; ================================================ FILE: sample-queries-tpch/tpch_query11.sql ================================================ drop view q11_part_tmp_cached; drop view q11_sum_tmp_cached; create view q11_part_tmp_cached as select ps_partkey, sum(ps_supplycost * ps_availqty) as part_value from partsupp, supplier, nation where ps_suppkey = s_suppkey and s_nationkey = n_nationkey and n_name = 'GERMANY' group by ps_partkey; create view q11_sum_tmp_cached as select sum(part_value) as total_value from q11_part_tmp_cached; select ps_partkey, part_value as value from ( select ps_partkey, part_value, total_value from q11_part_tmp_cached join q11_sum_tmp_cached ) a where part_value > total_value * 0.0001 order by value desc; ================================================ FILE: sample-queries-tpch/tpch_query12.sql ================================================ select l_shipmode, sum(case when o_orderpriority = '1-URGENT' or o_orderpriority = '2-HIGH' then 1 else 0 end) as high_line_count, sum(case when o_orderpriority <> '1-URGENT' and o_orderpriority <> '2-HIGH' then 1 else 0 end) as low_line_count from orders, lineitem where o_orderkey = l_orderkey and l_shipmode in ('REG AIR', 'MAIL') and l_commitdate < l_receiptdate and l_shipdate < l_commitdate and l_receiptdate >= '1995-01-01' and l_receiptdate < '1996-01-01' group by l_shipmode order by l_shipmode; ================================================ FILE: sample-queries-tpch/tpch_query13.sql ================================================ select c_count, count(*) as custdist from ( select c_custkey, count(o_orderkey) as c_count from customer left outer join orders on c_custkey = o_custkey and o_comment not like '%unusual%accounts%' group by c_custkey ) c_orders group by c_count order by custdist desc, c_count desc; ================================================ FILE: sample-queries-tpch/tpch_query14.sql ================================================ select 100.00 * sum(case when p_type like 'PROMO%' then l_extendedprice * (1 - l_discount) else 0 end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue from lineitem, part where l_partkey = p_partkey and l_shipdate >= '1995-08-01' and l_shipdate < '1995-09-01'; ================================================ FILE: sample-queries-tpch/tpch_query15.sql ================================================ drop view revenue_cached; drop view max_revenue_cached; create view revenue_cached as select l_suppkey as supplier_no, sum(l_extendedprice * (1 - l_discount)) as total_revenue from lineitem where l_shipdate >= '1996-01-01' and l_shipdate < '1996-04-01' group by l_suppkey; create view max_revenue_cached as select max(total_revenue) as max_revenue from revenue_cached; select s_suppkey, s_name, s_address, s_phone, total_revenue from supplier, revenue_cached, max_revenue_cached where s_suppkey = supplier_no and total_revenue = max_revenue order by s_suppkey; ================================================ FILE: sample-queries-tpch/tpch_query16.sql ================================================ select p_brand, p_type, p_size, count(distinct ps_suppkey) as supplier_cnt from partsupp, part where p_partkey = ps_partkey and p_brand <> 'Brand#34' and p_type not like 'ECONOMY BRUSHED%' and p_size in (22, 14, 27, 49, 21, 33, 35, 28) and partsupp.ps_suppkey not in ( select s_suppkey from supplier where s_comment like '%Customer%Complaints%' ) group by p_brand, p_type, p_size order by supplier_cnt desc, p_brand, p_type, p_size; ================================================ FILE: sample-queries-tpch/tpch_query17.sql ================================================ with q17_part as ( select p_partkey from part where p_brand = 'Brand#23' and p_container = 'MED BOX' ), q17_avg as ( select l_partkey as t_partkey, 0.2 * avg(l_quantity) as t_avg_quantity from lineitem where l_partkey IN (select p_partkey from q17_part) group by l_partkey ), q17_price as ( select l_quantity, l_partkey, l_extendedprice from lineitem where l_partkey IN (select p_partkey from q17_part) ) select cast(sum(l_extendedprice) / 7.0 as decimal(32,2)) as avg_yearly from q17_avg, q17_price where t_partkey = l_partkey and l_quantity < t_avg_quantity; ================================================ FILE: sample-queries-tpch/tpch_query18.sql ================================================ drop view q18_tmp_cached; drop table q18_large_volume_customer_cached; create view q18_tmp_cached as select l_orderkey, sum(l_quantity) as t_sum_quantity from lineitem where l_orderkey is not null group by l_orderkey; create table q18_large_volume_customer_cached as select c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity) from customer, orders, q18_tmp_cached t, lineitem l where c_custkey = o_custkey and o_orderkey = t.l_orderkey and o_orderkey is not null and t.t_sum_quantity > 300 and o_orderkey = l.l_orderkey and l.l_orderkey is not null group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice order by o_totalprice desc, o_orderdate limit 100; ================================================ FILE: sample-queries-tpch/tpch_query19.sql ================================================ select sum(l_extendedprice* (1 - l_discount)) as revenue from lineitem, part where ( p_partkey = l_partkey and p_brand = 'Brand#32' and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') and l_quantity >= 7 and l_quantity <= 7 + 10 and p_size between 1 and 5 and l_shipmode in ('AIR', 'AIR REG') and l_shipinstruct = 'DELIVER IN PERSON' ) or ( p_partkey = l_partkey and p_brand = 'Brand#35' and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') and l_quantity >= 15 and l_quantity <= 15 + 10 and p_size between 1 and 10 and l_shipmode in ('AIR', 'AIR REG') and l_shipinstruct = 'DELIVER IN PERSON' ) or ( p_partkey = l_partkey and p_brand = 'Brand#24' and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') and l_quantity >= 26 and l_quantity <= 26 + 10 and p_size between 1 and 15 and l_shipmode in ('AIR', 'AIR REG') and l_shipinstruct = 'DELIVER IN PERSON' ); ================================================ FILE: sample-queries-tpch/tpch_query2.sql ================================================ drop view q2_min_ps_supplycost; create view q2_min_ps_supplycost as select p_partkey as min_p_partkey, min(ps_supplycost) as min_ps_supplycost from part, partsupp, supplier, nation, region where p_partkey = ps_partkey and s_suppkey = ps_suppkey and s_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'EUROPE' group by p_partkey; select s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment from part, supplier, partsupp, nation, region, q2_min_ps_supplycost where p_partkey = ps_partkey and s_suppkey = ps_suppkey and p_size = 37 and p_type like '%COPPER' and s_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'EUROPE' and ps_supplycost = min_ps_supplycost and p_partkey = min_p_partkey order by s_acctbal desc, n_name, s_name, p_partkey limit 100; ================================================ FILE: sample-queries-tpch/tpch_query20.sql ================================================ -- explain formatted with tmp1 as ( select p_partkey from part where p_name like 'forest%' ), tmp2 as ( select s_name, s_address, s_suppkey from supplier, nation where s_nationkey = n_nationkey and n_name = 'CANADA' ), tmp3 as ( select l_partkey, 0.5 * sum(l_quantity) as sum_quantity, l_suppkey from lineitem, tmp2 where l_shipdate >= '1994-01-01' and l_shipdate <= '1995-01-01' and l_suppkey = s_suppkey group by l_partkey, l_suppkey ), tmp4 as ( select ps_partkey, ps_suppkey, ps_availqty from partsupp where ps_partkey IN (select p_partkey from tmp1) ), tmp5 as ( select ps_suppkey from tmp4, tmp3 where ps_partkey = l_partkey and ps_suppkey = l_suppkey and ps_availqty > sum_quantity ) select s_name, s_address from supplier where s_suppkey IN (select ps_suppkey from tmp5) order by s_name; ================================================ FILE: sample-queries-tpch/tpch_query21.sql ================================================ -- explain create temporary table l3 stored as orc as select l_orderkey, count(distinct l_suppkey) as cntSupp from lineitem where l_receiptdate > l_commitdate and l_orderkey is not null group by l_orderkey having cntSupp = 1 ; with location as ( select supplier.* from supplier, nation where s_nationkey = n_nationkey and n_name = 'SAUDI ARABIA' ) select s_name, count(*) as numwait from ( select li.l_suppkey, li.l_orderkey from lineitem li join orders o on li.l_orderkey = o.o_orderkey and o.o_orderstatus = 'F' join ( select l_orderkey, count(distinct l_suppkey) as cntSupp from lineitem group by l_orderkey ) l2 on li.l_orderkey = l2.l_orderkey and li.l_receiptdate > li.l_commitdate and l2.cntSupp > 1 ) l1 join l3 on l1.l_orderkey = l3.l_orderkey join location s on l1.l_suppkey = s.s_suppkey group by s_name order by numwait desc, s_name limit 100; ================================================ FILE: sample-queries-tpch/tpch_query22.sql ================================================ drop view q22_customer_tmp_cached; drop view q22_customer_tmp1_cached; drop view q22_orders_tmp_cached; create view if not exists q22_customer_tmp_cached as select c_acctbal, c_custkey, substr(c_phone, 1, 2) as cntrycode from customer where substr(c_phone, 1, 2) = '13' or substr(c_phone, 1, 2) = '31' or substr(c_phone, 1, 2) = '23' or substr(c_phone, 1, 2) = '29' or substr(c_phone, 1, 2) = '30' or substr(c_phone, 1, 2) = '18' or substr(c_phone, 1, 2) = '17'; create view if not exists q22_customer_tmp1_cached as select avg(c_acctbal) as avg_acctbal from q22_customer_tmp_cached where c_acctbal > 0.00; create view if not exists q22_orders_tmp_cached as select o_custkey from orders group by o_custkey; select cntrycode, count(1) as numcust, sum(c_acctbal) as totacctbal from ( select cntrycode, c_acctbal, avg_acctbal from q22_customer_tmp1_cached ct1 join ( select cntrycode, c_acctbal from q22_orders_tmp_cached ot right outer join q22_customer_tmp_cached ct on ct.c_custkey = ot.o_custkey where o_custkey is null ) ct2 ) a where c_acctbal > avg_acctbal group by cntrycode order by cntrycode; ================================================ FILE: sample-queries-tpch/tpch_query3.sql ================================================ select l_orderkey, sum(l_extendedprice * (1 - l_discount)) as revenue, o_orderdate, o_shippriority from customer, orders, lineitem where c_mktsegment = 'BUILDING' and c_custkey = o_custkey and l_orderkey = o_orderkey and o_orderdate < '1995-03-22' and l_shipdate > '1995-03-22' group by l_orderkey, o_orderdate, o_shippriority order by revenue desc, o_orderdate limit 10; ================================================ FILE: sample-queries-tpch/tpch_query4.sql ================================================ select o_orderpriority, count(*) as order_count from orders as o where o_orderdate >= '1996-05-01' and o_orderdate < '1996-08-01' and exists ( select * from lineitem where l_orderkey = o.o_orderkey and l_commitdate < l_receiptdate ) group by o_orderpriority order by o_orderpriority; ================================================ FILE: sample-queries-tpch/tpch_query5.sql ================================================ select n_name, sum(l_extendedprice * (1 - l_discount)) as revenue from customer, orders, lineitem, supplier, nation, region where c_custkey = o_custkey and l_orderkey = o_orderkey and l_suppkey = s_suppkey and c_nationkey = s_nationkey and s_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'AFRICA' and o_orderdate >= '1993-01-01' and o_orderdate < '1994-01-01' group by n_name order by revenue desc; ================================================ FILE: sample-queries-tpch/tpch_query6.sql ================================================ select sum(l_extendedprice * l_discount) as revenue from lineitem where l_shipdate >= '1993-01-01' and l_shipdate < '1994-01-01' and l_discount between 0.06 - 0.01 and 0.06 + 0.01 and l_quantity < 25; ================================================ FILE: sample-queries-tpch/tpch_query7.sql ================================================ select supp_nation, cust_nation, l_year, sum(volume) as revenue from ( select n1.n_name as supp_nation, n2.n_name as cust_nation, year(l_shipdate) as l_year, l_extendedprice * (1 - l_discount) as volume from supplier, lineitem, orders, customer, nation n1, nation n2 where s_suppkey = l_suppkey and o_orderkey = l_orderkey and c_custkey = o_custkey and s_nationkey = n1.n_nationkey and c_nationkey = n2.n_nationkey and ( (n1.n_name = 'KENYA' and n2.n_name = 'PERU') or (n1.n_name = 'PERU' and n2.n_name = 'KENYA') ) and l_shipdate between '1995-01-01' and '1996-12-31' ) as shipping group by supp_nation, cust_nation, l_year order by supp_nation, cust_nation, l_year; ================================================ FILE: sample-queries-tpch/tpch_query8.sql ================================================ select o_year, sum(case when nation = 'PERU' then volume else 0 end) / sum(volume) as mkt_share from ( select year(o_orderdate) as o_year, l_extendedprice * (1 - l_discount) as volume, n2.n_name as nation from part, supplier, lineitem, orders, customer, nation n1, nation n2, region where p_partkey = l_partkey and s_suppkey = l_suppkey and l_orderkey = o_orderkey and o_custkey = c_custkey and c_nationkey = n1.n_nationkey and n1.n_regionkey = r_regionkey and r_name = 'AMERICA' and s_nationkey = n2.n_nationkey and o_orderdate between '1995-01-01' and '1996-12-31' and p_type = 'ECONOMY BURNISHED NICKEL' ) as all_nations group by o_year order by o_year; ================================================ FILE: sample-queries-tpch/tpch_query9.sql ================================================ select nation, o_year, sum(amount) as sum_profit from ( select n_name as nation, year(o_orderdate) as o_year, l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount from part, supplier, lineitem, partsupp, orders, nation where s_suppkey = l_suppkey and ps_suppkey = l_suppkey and ps_partkey = l_partkey and p_partkey = l_partkey and o_orderkey = l_orderkey and s_nationkey = n_nationkey and p_name like '%plum%' ) as profit group by nation, o_year order by nation, o_year desc; ================================================ FILE: settings/init.sql ================================================ set hive.map.aggr=true; set mapreduce.reduce.speculative=false; set hive.auto.convert.join=true; set hive.optimize.reducededuplication.min.reducer=1; set hive.optimize.mapjoin.mapreduce=true; set hive.stats.autogather=true; set mapred.reduce.parallel.copies=30; -- set mapred.job.shuffle.input.buffer.percent=0.5; -- set mapred.job.reduce.input.buffer.percent=0.2; set mapred.map.child.java.opts=-server -Xmx2800m -Djava.net.preferIPv4Stack=true; set mapred.reduce.child.java.opts=-server -Xmx3800m -Djava.net.preferIPv4Stack=true; set mapreduce.map.memory.mb=3072; set mapreduce.reduce.memory.mb=4096; set hive.llap.memory.oversubscription.max.executors.per.query=8; set hive.llap.mapjoin.memory.oversubscribe.factor=0.3; set hive.auto.convert.join.hashtable.max.entries=-1; set hive.optimize.bucketmapjoin=false; set hive.convert.join.bucket.mapjoin.tez=false; set hive.auto.convert.join.shuffle.max.size=10000000000; set hive.tez.llap.min.reducer.per.executor=0.33; set hive.map.aggr.hash.min.reduction=0.99; set hive.optimize.sort.dynamic.partition.threshold=0; ================================================ FILE: settings/load-flat.sql ================================================ --set hive.enforce.bucketing=true; --set hive.enforce.sorting=true; set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.max.dynamic.partitions.pernode=1000000; set hive.exec.max.dynamic.partitions=1000000; set hive.exec.max.created.files=1000000; -- set mapreduce.input.fileinputformat.split.minsize=240000000; -- set mapreduce.input.fileinputformat.split.maxsize=240000000; -- set mapreduce.input.fileinputformat.split.minsize.per.node=240000000; -- set mapreduce.input.fileinputformat.split.minsize.per.rack=240000000; --set hive.exec.parallel=true; set hive.stats.autogather=true; -- set hive.support.concurrency=false; -- set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager; set hive.optimize.sort.dynamic.partition.threshold=0; ================================================ FILE: settings/load-partitioned.sql ================================================ -- set hive.enforce.bucketing=true; -- set hive.enforce.sorting=true; set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.max.dynamic.partitions.pernode=100000; set hive.exec.max.dynamic.partitions=100000; set hive.exec.max.created.files=1000000; set hive.exec.parallel=true; set hive.exec.reducers.max=${REDUCERS}; set hive.stats.autogather=true; set hive.optimize.sort.dynamic.partition=true; -- set mapred.job.reduce.input.buffer.percent=0.0; -- set mapreduce.input.fileinputformat.split.minsize=240000000; -- set mapreduce.input.fileinputformat.split.minsize.per.node=240000000; -- set mapreduce.input.fileinputformat.split.minsize.per.rack=240000000; set hive.optimize.sort.dynamic.partition=true; -- set hive.tez.java.opts=-XX:+PrintGCDetails -verbose:gc -XX:+PrintGCTimeStamps -XX:+UseNUMA -XX:+UseG1GC -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/; set tez.runtime.empty.partitions.info-via-events.enabled=true; set tez.runtime.report.partition.stats=true; -- fewer files for the NULL partition set hive.tez.auto.reducer.parallelism=true; set hive.tez.min.partition.factor=0.01; -- set mapred.map.child.java.opts=-server -Xmx2800m -Djava.net.preferIPv4Stack=true; -- set mapred.reduce.child.java.opts=-server -Xms1024m -Xmx3800m -Djava.net.preferIPv4Stack=true; -- set mapreduce.map.memory.mb=3072; -- set mapreduce.reduce.memory.mb=4096; -- set io.sort.mb=800; set hive.optimize.sort.dynamic.partition.threshold=0; ================================================ FILE: spark-queries-tpcds/LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ======================================================================= Apache Spark Subcomponents: The Apache Spark project contains subcomponents with separate copyright notices and license terms. Your use of the source code for the these subcomponents is subject to the terms and conditions of the following licenses. ======================================================================== For heapq (pyspark/heapq3.py): ======================================================================== See license/LICENSE-heapq.txt ======================================================================== For SnapTree: ======================================================================== See license/LICENSE-SnapTree.txt ======================================================================== For jbcrypt: ======================================================================== See license/LICENSE-jbcrypt.txt ======================================================================== BSD-style licenses ======================================================================== The following components are provided under a BSD-style license. See project link for details. The text of each license is also included at licenses/LICENSE-[project].txt. (BSD 3 Clause) netlib core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core) (BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.2.7 - https://github.com/jpmml/jpmml-model) (BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/) (BSD License) ANTLR 4.5.2-1 (org.antlr:antlr4:4.5.2-1 - http://wwww.antlr.org/) (BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org) (BSD licence) ANTLR StringTemplate (org.antlr:stringtemplate:3.2.1 - http://www.stringtemplate.org) (BSD License) Javolution (javolution:javolution:5.5.1 - http://javolution.org) (BSD) JLine (jline:jline:0.9.94 - http://jline.sourceforge.net) (BSD) ParaNamer Core (com.thoughtworks.paranamer:paranamer:2.3 - http://paranamer.codehaus.org/paranamer) (BSD) ParaNamer Core (com.thoughtworks.paranamer:paranamer:2.6 - http://paranamer.codehaus.org/paranamer) (BSD 3 Clause) Scala (http://www.scala-lang.org/download/#License) (Interpreter classes (all .scala files in repl/src/main/scala except for Main.Scala, SparkHelper.scala and ExecutorClassLoader.scala), and for SerializableMapWrapper in JavaUtils.scala) (BSD-like) Scala Actors library (org.scala-lang:scala-actors:2.11.8 - http://www.scala-lang.org/) (BSD-like) Scala Compiler (org.scala-lang:scala-compiler:2.11.8 - http://www.scala-lang.org/) (BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.11.8 - http://www.scala-lang.org/) (BSD-like) Scala Library (org.scala-lang:scala-library:2.11.8 - http://www.scala-lang.org/) (BSD-like) Scalap (org.scala-lang:scalap:2.11.8 - http://www.scala-lang.org/) (BSD-style) scalacheck (org.scalacheck:scalacheck_2.11:1.10.0 - http://www.scalacheck.org) (BSD-style) spire (org.spire-math:spire_2.11:0.7.1 - http://spire-math.org) (BSD-style) spire-macros (org.spire-math:spire-macros_2.11:0.7.1 - http://spire-math.org) (New BSD License) Kryo (com.esotericsoftware:kryo:3.0.3 - https://github.com/EsotericSoftware/kryo) (New BSD License) MinLog (com.esotericsoftware:minlog:1.3.0 - https://github.com/EsotericSoftware/minlog) (New BSD license) Protocol Buffer Java API (com.google.protobuf:protobuf-java:2.5.0 - http://code.google.com/p/protobuf) (New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf) (The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net) (The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net) (The New BSD License) Py4J (net.sf.py4j:py4j:0.10.6 - http://py4j.sourceforge.net/) (Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/) (BSD licence) sbt and sbt-launch-lib.bash (BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE) (BSD 3 Clause) DPark (https://github.com/douban/dpark/blob/master/LICENSE) (BSD 3 Clause) CloudPickle (https://github.com/cloudpipe/cloudpickle/blob/master/LICENSE) ======================================================================== MIT licenses ======================================================================== The following components are provided under the MIT License. See project link for details. The text of each license is also included at licenses/LICENSE-[project].txt. (MIT License) JCL 1.1.1 implemented over SLF4J (org.slf4j:jcl-over-slf4j:1.7.5 - http://www.slf4j.org) (MIT License) JUL to SLF4J bridge (org.slf4j:jul-to-slf4j:1.7.5 - http://www.slf4j.org) (MIT License) SLF4J API Module (org.slf4j:slf4j-api:1.7.5 - http://www.slf4j.org) (MIT License) SLF4J LOG4J-12 Binding (org.slf4j:slf4j-log4j12:1.7.5 - http://www.slf4j.org) (MIT License) pyrolite (org.spark-project:pyrolite:2.0.1 - http://pythonhosted.org/Pyro4/) (MIT License) scopt (com.github.scopt:scopt_2.11:3.2.0 - https://github.com/scopt/scopt) (The MIT License) Mockito (org.mockito:mockito-core:1.9.5 - http://www.mockito.org) (MIT License) jquery (https://jquery.org/license/) (MIT License) AnchorJS (https://github.com/bryanbraun/anchorjs) (MIT License) graphlib-dot (https://github.com/cpettitt/graphlib-dot) (MIT License) dagre-d3 (https://github.com/cpettitt/dagre-d3) (MIT License) sorttable (https://github.com/stuartlangridge/sorttable) (MIT License) boto (https://github.com/boto/boto/blob/develop/LICENSE) (MIT License) datatables (http://datatables.net/license) (MIT License) mustache (https://github.com/mustache/mustache/blob/master/LICENSE) (MIT License) cookies (http://code.google.com/p/cookies/wiki/License) (MIT License) blockUI (http://jquery.malsup.com/block/) (MIT License) RowsGroup (http://datatables.net/license/mit) (MIT License) jsonFormatter (http://www.jqueryscript.net/other/jQuery-Plugin-For-Pretty-JSON-Formatting-jsonFormatter.html) (MIT License) modernizr (https://github.com/Modernizr/Modernizr/blob/master/LICENSE) (MIT License) machinist (https://github.com/typelevel/machinist) ================================================ FILE: spark-queries-tpcds/README.md ================================================ These are the full 99 TPC-DS queries from Apache Spark 2.2. - https://github.com/apache/spark/tree/master/sql/core/src/test/resources/tpcds ================================================ FILE: spark-queries-tpcds/q1.sql ================================================ WITH customer_total_return AS ( SELECT sr_customer_sk AS ctr_customer_sk, sr_store_sk AS ctr_store_sk, sum(sr_return_amt) AS ctr_total_return FROM store_returns, date_dim WHERE sr_returned_date_sk = d_date_sk AND d_year = 2000 GROUP BY sr_customer_sk, sr_store_sk) SELECT c_customer_id FROM customer_total_return ctr1, store, customer WHERE ctr1.ctr_total_return > (SELECT avg(ctr_total_return) * 1.2 FROM customer_total_return ctr2 WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk) AND s_store_sk = ctr1.ctr_store_sk AND s_state = 'TN' AND ctr1.ctr_customer_sk = c_customer_sk ORDER BY c_customer_id LIMIT 100 ================================================ FILE: spark-queries-tpcds/q10.sql ================================================ SELECT cd_gender, cd_marital_status, cd_education_status, count(*) cnt1, cd_purchase_estimate, count(*) cnt2, cd_credit_rating, count(*) cnt3, cd_dep_count, count(*) cnt4, cd_dep_employed_count, count(*) cnt5, cd_dep_college_count, count(*) cnt6 FROM customer c, customer_address ca, customer_demographics WHERE c.c_current_addr_sk = ca.ca_address_sk AND ca_county IN ('Rush County', 'Toole County', 'Jefferson County', 'Dona Ana County', 'La Porte County') AND cd_demo_sk = c.c_current_cdemo_sk AND exists(SELECT * FROM store_sales, date_dim WHERE c.c_customer_sk = ss_customer_sk AND ss_sold_date_sk = d_date_sk AND d_year = 2002 AND d_moy BETWEEN 1 AND 1 + 3) AND (exists(SELECT * FROM web_sales, date_dim WHERE c.c_customer_sk = ws_bill_customer_sk AND ws_sold_date_sk = d_date_sk AND d_year = 2002 AND d_moy BETWEEN 1 AND 1 + 3) OR exists(SELECT * FROM catalog_sales, date_dim WHERE c.c_customer_sk = cs_ship_customer_sk AND cs_sold_date_sk = d_date_sk AND d_year = 2002 AND d_moy BETWEEN 1 AND 1 + 3)) GROUP BY cd_gender, cd_marital_status, cd_education_status, cd_purchase_estimate, cd_credit_rating, cd_dep_count, cd_dep_employed_count, cd_dep_college_count ORDER BY cd_gender, cd_marital_status, cd_education_status, cd_purchase_estimate, cd_credit_rating, cd_dep_count, cd_dep_employed_count, cd_dep_college_count LIMIT 100 ================================================ FILE: spark-queries-tpcds/q11.sql ================================================ WITH year_total AS ( SELECT c_customer_id customer_id, c_first_name customer_first_name, c_last_name customer_last_name, c_preferred_cust_flag customer_preferred_cust_flag, c_birth_country customer_birth_country, c_login customer_login, c_email_address customer_email_address, d_year dyear, sum(ss_ext_list_price - ss_ext_discount_amt) year_total, 's' sale_type FROM customer, store_sales, date_dim WHERE c_customer_sk = ss_customer_sk AND ss_sold_date_sk = d_date_sk GROUP BY c_customer_id , c_first_name , c_last_name , d_year , c_preferred_cust_flag , c_birth_country , c_login , c_email_address , d_year UNION ALL SELECT c_customer_id customer_id, c_first_name customer_first_name, c_last_name customer_last_name, c_preferred_cust_flag customer_preferred_cust_flag, c_birth_country customer_birth_country, c_login customer_login, c_email_address customer_email_address, d_year dyear, sum(ws_ext_list_price - ws_ext_discount_amt) year_total, 'w' sale_type FROM customer, web_sales, date_dim WHERE c_customer_sk = ws_bill_customer_sk AND ws_sold_date_sk = d_date_sk GROUP BY c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country, c_login, c_email_address, d_year) SELECT t_s_secyear.customer_preferred_cust_flag FROM year_total t_s_firstyear , year_total t_s_secyear , year_total t_w_firstyear , year_total t_w_secyear WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id AND t_s_firstyear.customer_id = t_w_secyear.customer_id AND t_s_firstyear.customer_id = t_w_firstyear.customer_id AND t_s_firstyear.sale_type = 's' AND t_w_firstyear.sale_type = 'w' AND t_s_secyear.sale_type = 's' AND t_w_secyear.sale_type = 'w' AND t_s_firstyear.dyear = 2001 AND t_s_secyear.dyear = 2001 + 1 AND t_w_firstyear.dyear = 2001 AND t_w_secyear.dyear = 2001 + 1 AND t_s_firstyear.year_total > 0 AND t_w_firstyear.year_total > 0 AND CASE WHEN t_w_firstyear.year_total > 0 THEN t_w_secyear.year_total / t_w_firstyear.year_total ELSE NULL END > CASE WHEN t_s_firstyear.year_total > 0 THEN t_s_secyear.year_total / t_s_firstyear.year_total ELSE NULL END ORDER BY t_s_secyear.customer_preferred_cust_flag LIMIT 100 ================================================ FILE: spark-queries-tpcds/q12.sql ================================================ SELECT i_item_desc, i_category, i_class, i_current_price, sum(ws_ext_sales_price) AS itemrevenue, sum(ws_ext_sales_price) * 100 / sum(sum(ws_ext_sales_price)) OVER (PARTITION BY i_class) AS revenueratio FROM web_sales, item, date_dim WHERE ws_item_sk = i_item_sk AND i_category IN ('Sports', 'Books', 'Home') AND ws_sold_date_sk = d_date_sk AND d_date BETWEEN cast('1999-02-22' AS DATE) AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) GROUP BY i_item_id, i_item_desc, i_category, i_class, i_current_price ORDER BY i_category, i_class, i_item_id, i_item_desc, revenueratio LIMIT 100 ================================================ FILE: spark-queries-tpcds/q13.sql ================================================ SELECT avg(ss_quantity), avg(ss_ext_sales_price), avg(ss_ext_wholesale_cost), sum(ss_ext_wholesale_cost) FROM store_sales , store , customer_demographics , household_demographics , customer_address , date_dim WHERE s_store_sk = ss_store_sk AND ss_sold_date_sk = d_date_sk AND d_year = 2001 AND ((ss_hdemo_sk = hd_demo_sk AND cd_demo_sk = ss_cdemo_sk AND cd_marital_status = 'M' AND cd_education_status = 'Advanced Degree' AND ss_sales_price BETWEEN 100.00 AND 150.00 AND hd_dep_count = 3 ) OR (ss_hdemo_sk = hd_demo_sk AND cd_demo_sk = ss_cdemo_sk AND cd_marital_status = 'S' AND cd_education_status = 'College' AND ss_sales_price BETWEEN 50.00 AND 100.00 AND hd_dep_count = 1 ) OR (ss_hdemo_sk = hd_demo_sk AND cd_demo_sk = ss_cdemo_sk AND cd_marital_status = 'W' AND cd_education_status = '2 yr Degree' AND ss_sales_price BETWEEN 150.00 AND 200.00 AND hd_dep_count = 1 )) AND ((ss_addr_sk = ca_address_sk AND ca_country = 'United States' AND ca_state IN ('TX', 'OH', 'TX') AND ss_net_profit BETWEEN 100 AND 200 ) OR (ss_addr_sk = ca_address_sk AND ca_country = 'United States' AND ca_state IN ('OR', 'NM', 'KY') AND ss_net_profit BETWEEN 150 AND 300 ) OR (ss_addr_sk = ca_address_sk AND ca_country = 'United States' AND ca_state IN ('VA', 'TX', 'MS') AND ss_net_profit BETWEEN 50 AND 250 )) ================================================ FILE: spark-queries-tpcds/q14a.sql ================================================ WITH cross_items AS (SELECT i_item_sk ss_item_sk FROM item, (SELECT iss.i_brand_id brand_id, iss.i_class_id class_id, iss.i_category_id category_id FROM store_sales, item iss, date_dim d1 WHERE ss_item_sk = iss.i_item_sk AND ss_sold_date_sk = d1.d_date_sk AND d1.d_year BETWEEN 1999 AND 1999 + 2 INTERSECT SELECT ics.i_brand_id, ics.i_class_id, ics.i_category_id FROM catalog_sales, item ics, date_dim d2 WHERE cs_item_sk = ics.i_item_sk AND cs_sold_date_sk = d2.d_date_sk AND d2.d_year BETWEEN 1999 AND 1999 + 2 INTERSECT SELECT iws.i_brand_id, iws.i_class_id, iws.i_category_id FROM web_sales, item iws, date_dim d3 WHERE ws_item_sk = iws.i_item_sk AND ws_sold_date_sk = d3.d_date_sk AND d3.d_year BETWEEN 1999 AND 1999 + 2) x WHERE i_brand_id = brand_id AND i_class_id = class_id AND i_category_id = category_id ), avg_sales AS (SELECT avg(quantity * list_price) average_sales FROM ( SELECT ss_quantity quantity, ss_list_price list_price FROM store_sales, date_dim WHERE ss_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 2001 UNION ALL SELECT cs_quantity quantity, cs_list_price list_price FROM catalog_sales, date_dim WHERE cs_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2 UNION ALL SELECT ws_quantity quantity, ws_list_price list_price FROM web_sales, date_dim WHERE ws_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2) x) SELECT channel, i_brand_id, i_class_id, i_category_id, sum(sales), sum(number_sales) FROM ( SELECT 'store' channel, i_brand_id, i_class_id, i_category_id, sum(ss_quantity * ss_list_price) sales, count(*) number_sales FROM store_sales, item, date_dim WHERE ss_item_sk IN (SELECT ss_item_sk FROM cross_items) AND ss_item_sk = i_item_sk AND ss_sold_date_sk = d_date_sk AND d_year = 1999 + 2 AND d_moy = 11 GROUP BY i_brand_id, i_class_id, i_category_id HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales FROM avg_sales) UNION ALL SELECT 'catalog' channel, i_brand_id, i_class_id, i_category_id, sum(cs_quantity * cs_list_price) sales, count(*) number_sales FROM catalog_sales, item, date_dim WHERE cs_item_sk IN (SELECT ss_item_sk FROM cross_items) AND cs_item_sk = i_item_sk AND cs_sold_date_sk = d_date_sk AND d_year = 1999 + 2 AND d_moy = 11 GROUP BY i_brand_id, i_class_id, i_category_id HAVING sum(cs_quantity * cs_list_price) > (SELECT average_sales FROM avg_sales) UNION ALL SELECT 'web' channel, i_brand_id, i_class_id, i_category_id, sum(ws_quantity * ws_list_price) sales, count(*) number_sales FROM web_sales, item, date_dim WHERE ws_item_sk IN (SELECT ss_item_sk FROM cross_items) AND ws_item_sk = i_item_sk AND ws_sold_date_sk = d_date_sk AND d_year = 1999 + 2 AND d_moy = 11 GROUP BY i_brand_id, i_class_id, i_category_id HAVING sum(ws_quantity * ws_list_price) > (SELECT average_sales FROM avg_sales) ) y GROUP BY ROLLUP (channel, i_brand_id, i_class_id, i_category_id) ORDER BY channel, i_brand_id, i_class_id, i_category_id LIMIT 100 ================================================ FILE: spark-queries-tpcds/q14b.sql ================================================ WITH cross_items AS (SELECT i_item_sk ss_item_sk FROM item, (SELECT iss.i_brand_id brand_id, iss.i_class_id class_id, iss.i_category_id category_id FROM store_sales, item iss, date_dim d1 WHERE ss_item_sk = iss.i_item_sk AND ss_sold_date_sk = d1.d_date_sk AND d1.d_year BETWEEN 1999 AND 1999 + 2 INTERSECT SELECT ics.i_brand_id, ics.i_class_id, ics.i_category_id FROM catalog_sales, item ics, date_dim d2 WHERE cs_item_sk = ics.i_item_sk AND cs_sold_date_sk = d2.d_date_sk AND d2.d_year BETWEEN 1999 AND 1999 + 2 INTERSECT SELECT iws.i_brand_id, iws.i_class_id, iws.i_category_id FROM web_sales, item iws, date_dim d3 WHERE ws_item_sk = iws.i_item_sk AND ws_sold_date_sk = d3.d_date_sk AND d3.d_year BETWEEN 1999 AND 1999 + 2) x WHERE i_brand_id = brand_id AND i_class_id = class_id AND i_category_id = category_id ), avg_sales AS (SELECT avg(quantity * list_price) average_sales FROM (SELECT ss_quantity quantity, ss_list_price list_price FROM store_sales, date_dim WHERE ss_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2 UNION ALL SELECT cs_quantity quantity, cs_list_price list_price FROM catalog_sales, date_dim WHERE cs_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2 UNION ALL SELECT ws_quantity quantity, ws_list_price list_price FROM web_sales, date_dim WHERE ws_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2) x) SELECT * FROM (SELECT 'store' channel, i_brand_id, i_class_id, i_category_id, sum(ss_quantity * ss_list_price) sales, count(*) number_sales FROM store_sales, item, date_dim WHERE ss_item_sk IN (SELECT ss_item_sk FROM cross_items) AND ss_item_sk = i_item_sk AND ss_sold_date_sk = d_date_sk AND d_week_seq = (SELECT d_week_seq FROM date_dim WHERE d_year = 1999 + 1 AND d_moy = 12 AND d_dom = 11) GROUP BY i_brand_id, i_class_id, i_category_id HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales FROM avg_sales)) this_year, (SELECT 'store' channel, i_brand_id, i_class_id, i_category_id, sum(ss_quantity * ss_list_price) sales, count(*) number_sales FROM store_sales, item, date_dim WHERE ss_item_sk IN (SELECT ss_item_sk FROM cross_items) AND ss_item_sk = i_item_sk AND ss_sold_date_sk = d_date_sk AND d_week_seq = (SELECT d_week_seq FROM date_dim WHERE d_year = 1999 AND d_moy = 12 AND d_dom = 11) GROUP BY i_brand_id, i_class_id, i_category_id HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales FROM avg_sales)) last_year WHERE this_year.i_brand_id = last_year.i_brand_id AND this_year.i_class_id = last_year.i_class_id AND this_year.i_category_id = last_year.i_category_id ORDER BY this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id LIMIT 100 ================================================ FILE: spark-queries-tpcds/q15.sql ================================================ SELECT ca_zip, sum(cs_sales_price) FROM catalog_sales, customer, customer_address, date_dim WHERE cs_bill_customer_sk = c_customer_sk AND c_current_addr_sk = ca_address_sk AND (substr(ca_zip, 1, 5) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') OR ca_state IN ('CA', 'WA', 'GA') OR cs_sales_price > 500) AND cs_sold_date_sk = d_date_sk AND d_qoy = 2 AND d_year = 2001 GROUP BY ca_zip ORDER BY ca_zip LIMIT 100 ================================================ FILE: spark-queries-tpcds/q16.sql ================================================ SELECT count(DISTINCT cs_order_number) AS `order count `, sum(cs_ext_ship_cost) AS `total shipping cost `, sum(cs_net_profit) AS `total net profit ` FROM catalog_sales cs1, date_dim, customer_address, call_center WHERE d_date BETWEEN '2002-02-01' AND (CAST('2002-02-01' AS DATE) + INTERVAL 60 days) AND cs1.cs_ship_date_sk = d_date_sk AND cs1.cs_ship_addr_sk = ca_address_sk AND ca_state = 'GA' AND cs1.cs_call_center_sk = cc_call_center_sk AND cc_county IN ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County') AND EXISTS(SELECT * FROM catalog_sales cs2 WHERE cs1.cs_order_number = cs2.cs_order_number AND cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) AND NOT EXISTS(SELECT * FROM catalog_returns cr1 WHERE cs1.cs_order_number = cr1.cr_order_number) ORDER BY count(DISTINCT cs_order_number) LIMIT 100 ================================================ FILE: spark-queries-tpcds/q17.sql ================================================ SELECT i_item_id, i_item_desc, s_state, count(ss_quantity) AS store_sales_quantitycount, avg(ss_quantity) AS store_sales_quantityave, stddev_samp(ss_quantity) AS store_sales_quantitystdev, stddev_samp(ss_quantity) / avg(ss_quantity) AS store_sales_quantitycov, count(sr_return_quantity) as_store_returns_quantitycount, avg(sr_return_quantity) as_store_returns_quantityave, stddev_samp(sr_return_quantity) as_store_returns_quantitystdev, stddev_samp(sr_return_quantity) / avg(sr_return_quantity) AS store_returns_quantitycov, count(cs_quantity) AS catalog_sales_quantitycount, avg(cs_quantity) AS catalog_sales_quantityave, stddev_samp(cs_quantity) / avg(cs_quantity) AS catalog_sales_quantitystdev, stddev_samp(cs_quantity) / avg(cs_quantity) AS catalog_sales_quantitycov FROM store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, store, item WHERE d1.d_quarter_name = '2001Q1' AND d1.d_date_sk = ss_sold_date_sk AND i_item_sk = ss_item_sk AND s_store_sk = ss_store_sk AND ss_customer_sk = sr_customer_sk AND ss_item_sk = sr_item_sk AND ss_ticket_number = sr_ticket_number AND sr_returned_date_sk = d2.d_date_sk AND d2.d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3') AND sr_customer_sk = cs_bill_customer_sk AND sr_item_sk = cs_item_sk AND cs_sold_date_sk = d3.d_date_sk AND d3.d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3') GROUP BY i_item_id, i_item_desc, s_state ORDER BY i_item_id, i_item_desc, s_state LIMIT 100 ================================================ FILE: spark-queries-tpcds/q18.sql ================================================ SELECT i_item_id, ca_country, ca_state, ca_county, avg(cast(cs_quantity AS DECIMAL(12, 2))) agg1, avg(cast(cs_list_price AS DECIMAL(12, 2))) agg2, avg(cast(cs_coupon_amt AS DECIMAL(12, 2))) agg3, avg(cast(cs_sales_price AS DECIMAL(12, 2))) agg4, avg(cast(cs_net_profit AS DECIMAL(12, 2))) agg5, avg(cast(c_birth_year AS DECIMAL(12, 2))) agg6, avg(cast(cd1.cd_dep_count AS DECIMAL(12, 2))) agg7 FROM catalog_sales, customer_demographics cd1, customer_demographics cd2, customer, customer_address, date_dim, item WHERE cs_sold_date_sk = d_date_sk AND cs_item_sk = i_item_sk AND cs_bill_cdemo_sk = cd1.cd_demo_sk AND cs_bill_customer_sk = c_customer_sk AND cd1.cd_gender = 'F' AND cd1.cd_education_status = 'Unknown' AND c_current_cdemo_sk = cd2.cd_demo_sk AND c_current_addr_sk = ca_address_sk AND c_birth_month IN (1, 6, 8, 9, 12, 2) AND d_year = 1998 AND ca_state IN ('MS', 'IN', 'ND', 'OK', 'NM', 'VA', 'MS') GROUP BY ROLLUP (i_item_id, ca_country, ca_state, ca_county) ORDER BY ca_country, ca_state, ca_county, i_item_id LIMIT 100 ================================================ FILE: spark-queries-tpcds/q19.sql ================================================ SELECT i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, sum(ss_ext_sales_price) ext_price FROM date_dim, store_sales, item, customer, customer_address, store WHERE d_date_sk = ss_sold_date_sk AND ss_item_sk = i_item_sk AND i_manager_id = 8 AND d_moy = 11 AND d_year = 1998 AND ss_customer_sk = c_customer_sk AND c_current_addr_sk = ca_address_sk AND substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5) AND ss_store_sk = s_store_sk GROUP BY i_brand, i_brand_id, i_manufact_id, i_manufact ORDER BY ext_price DESC, brand, brand_id, i_manufact_id, i_manufact LIMIT 100 ================================================ FILE: spark-queries-tpcds/q2.sql ================================================ WITH wscs AS ( SELECT sold_date_sk, sales_price FROM (SELECT ws_sold_date_sk sold_date_sk, ws_ext_sales_price sales_price FROM web_sales) x UNION ALL (SELECT cs_sold_date_sk sold_date_sk, cs_ext_sales_price sales_price FROM catalog_sales)), wswscs AS ( SELECT d_week_seq, sum(CASE WHEN (d_day_name = 'Sunday') THEN sales_price ELSE NULL END) sun_sales, sum(CASE WHEN (d_day_name = 'Monday') THEN sales_price ELSE NULL END) mon_sales, sum(CASE WHEN (d_day_name = 'Tuesday') THEN sales_price ELSE NULL END) tue_sales, sum(CASE WHEN (d_day_name = 'Wednesday') THEN sales_price ELSE NULL END) wed_sales, sum(CASE WHEN (d_day_name = 'Thursday') THEN sales_price ELSE NULL END) thu_sales, sum(CASE WHEN (d_day_name = 'Friday') THEN sales_price ELSE NULL END) fri_sales, sum(CASE WHEN (d_day_name = 'Saturday') THEN sales_price ELSE NULL END) sat_sales FROM wscs, date_dim WHERE d_date_sk = sold_date_sk GROUP BY d_week_seq) SELECT d_week_seq1, round(sun_sales1 / sun_sales2, 2), round(mon_sales1 / mon_sales2, 2), round(tue_sales1 / tue_sales2, 2), round(wed_sales1 / wed_sales2, 2), round(thu_sales1 / thu_sales2, 2), round(fri_sales1 / fri_sales2, 2), round(sat_sales1 / sat_sales2, 2) FROM (SELECT wswscs.d_week_seq d_week_seq1, sun_sales sun_sales1, mon_sales mon_sales1, tue_sales tue_sales1, wed_sales wed_sales1, thu_sales thu_sales1, fri_sales fri_sales1, sat_sales sat_sales1 FROM wswscs, date_dim WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001) y, (SELECT wswscs.d_week_seq d_week_seq2, sun_sales sun_sales2, mon_sales mon_sales2, tue_sales tue_sales2, wed_sales wed_sales2, thu_sales thu_sales2, fri_sales fri_sales2, sat_sales sat_sales2 FROM wswscs, date_dim WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001 + 1) z WHERE d_week_seq1 = d_week_seq2 - 53 ORDER BY d_week_seq1 ================================================ FILE: spark-queries-tpcds/q20.sql ================================================ SELECT i_item_desc, i_category, i_class, i_current_price, sum(cs_ext_sales_price) AS itemrevenue, sum(cs_ext_sales_price) * 100 / sum(sum(cs_ext_sales_price)) OVER (PARTITION BY i_class) AS revenueratio FROM catalog_sales, item, date_dim WHERE cs_item_sk = i_item_sk AND i_category IN ('Sports', 'Books', 'Home') AND cs_sold_date_sk = d_date_sk AND d_date BETWEEN cast('1999-02-22' AS DATE) AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) GROUP BY i_item_id, i_item_desc, i_category, i_class, i_current_price ORDER BY i_category, i_class, i_item_id, i_item_desc, revenueratio LIMIT 100 ================================================ FILE: spark-queries-tpcds/q21.sql ================================================ SELECT * FROM ( SELECT w_warehouse_name, i_item_id, sum(CASE WHEN (cast(d_date AS DATE) < cast('2000-03-11' AS DATE)) THEN inv_quantity_on_hand ELSE 0 END) AS inv_before, sum(CASE WHEN (cast(d_date AS DATE) >= cast('2000-03-11' AS DATE)) THEN inv_quantity_on_hand ELSE 0 END) AS inv_after FROM inventory, warehouse, item, date_dim WHERE i_current_price BETWEEN 0.99 AND 1.49 AND i_item_sk = inv_item_sk AND inv_warehouse_sk = w_warehouse_sk AND inv_date_sk = d_date_sk AND d_date BETWEEN (cast('2000-03-11' AS DATE) - INTERVAL 30 days) AND (cast('2000-03-11' AS DATE) + INTERVAL 30 days) GROUP BY w_warehouse_name, i_item_id) x WHERE (CASE WHEN inv_before > 0 THEN inv_after / inv_before ELSE NULL END) BETWEEN 2.0 / 3.0 AND 3.0 / 2.0 ORDER BY w_warehouse_name, i_item_id LIMIT 100 ================================================ FILE: spark-queries-tpcds/q22.sql ================================================ SELECT i_product_name, i_brand, i_class, i_category, avg(inv_quantity_on_hand) qoh FROM inventory, date_dim, item, warehouse WHERE inv_date_sk = d_date_sk AND inv_item_sk = i_item_sk AND inv_warehouse_sk = w_warehouse_sk AND d_month_seq BETWEEN 1200 AND 1200 + 11 GROUP BY ROLLUP (i_product_name, i_brand, i_class, i_category) ORDER BY qoh, i_product_name, i_brand, i_class, i_category LIMIT 100 ================================================ FILE: spark-queries-tpcds/q23a.sql ================================================ WITH frequent_ss_items AS (SELECT substr(i_item_desc, 1, 30) itemdesc, i_item_sk item_sk, d_date solddate, count(*) cnt FROM store_sales, date_dim, item WHERE ss_sold_date_sk = d_date_sk AND ss_item_sk = i_item_sk AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date HAVING count(*) > 4), max_store_sales AS (SELECT max(csales) tpcds_cmax FROM (SELECT c_customer_sk, sum(ss_quantity * ss_sales_price) csales FROM store_sales, customer, date_dim WHERE ss_customer_sk = c_customer_sk AND ss_sold_date_sk = d_date_sk AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) GROUP BY c_customer_sk) x), best_ss_customer AS (SELECT c_customer_sk, sum(ss_quantity * ss_sales_price) ssales FROM store_sales, customer WHERE ss_customer_sk = c_customer_sk GROUP BY c_customer_sk HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) * (SELECT * FROM max_store_sales)) SELECT sum(sales) FROM ((SELECT cs_quantity * cs_list_price sales FROM catalog_sales, date_dim WHERE d_year = 2000 AND d_moy = 2 AND cs_sold_date_sk = d_date_sk AND cs_item_sk IN (SELECT item_sk FROM frequent_ss_items) AND cs_bill_customer_sk IN (SELECT c_customer_sk FROM best_ss_customer)) UNION ALL (SELECT ws_quantity * ws_list_price sales FROM web_sales, date_dim WHERE d_year = 2000 AND d_moy = 2 AND ws_sold_date_sk = d_date_sk AND ws_item_sk IN (SELECT item_sk FROM frequent_ss_items) AND ws_bill_customer_sk IN (SELECT c_customer_sk FROM best_ss_customer))) y LIMIT 100 ================================================ FILE: spark-queries-tpcds/q23b.sql ================================================ WITH frequent_ss_items AS (SELECT substr(i_item_desc, 1, 30) itemdesc, i_item_sk item_sk, d_date solddate, count(*) cnt FROM store_sales, date_dim, item WHERE ss_sold_date_sk = d_date_sk AND ss_item_sk = i_item_sk AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date HAVING count(*) > 4), max_store_sales AS (SELECT max(csales) tpcds_cmax FROM (SELECT c_customer_sk, sum(ss_quantity * ss_sales_price) csales FROM store_sales, customer, date_dim WHERE ss_customer_sk = c_customer_sk AND ss_sold_date_sk = d_date_sk AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) GROUP BY c_customer_sk) x), best_ss_customer AS (SELECT c_customer_sk, sum(ss_quantity * ss_sales_price) ssales FROM store_sales , customer WHERE ss_customer_sk = c_customer_sk GROUP BY c_customer_sk HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) * (SELECT * FROM max_store_sales)) SELECT c_last_name, c_first_name, sales FROM ((SELECT c_last_name, c_first_name, sum(cs_quantity * cs_list_price) sales FROM catalog_sales, customer, date_dim WHERE d_year = 2000 AND d_moy = 2 AND cs_sold_date_sk = d_date_sk AND cs_item_sk IN (SELECT item_sk FROM frequent_ss_items) AND cs_bill_customer_sk IN (SELECT c_customer_sk FROM best_ss_customer) AND cs_bill_customer_sk = c_customer_sk GROUP BY c_last_name, c_first_name) UNION ALL (SELECT c_last_name, c_first_name, sum(ws_quantity * ws_list_price) sales FROM web_sales, customer, date_dim WHERE d_year = 2000 AND d_moy = 2 AND ws_sold_date_sk = d_date_sk AND ws_item_sk IN (SELECT item_sk FROM frequent_ss_items) AND ws_bill_customer_sk IN (SELECT c_customer_sk FROM best_ss_customer) AND ws_bill_customer_sk = c_customer_sk GROUP BY c_last_name, c_first_name)) y ORDER BY c_last_name, c_first_name, sales LIMIT 100 ================================================ FILE: spark-queries-tpcds/q24a.sql ================================================ WITH ssales AS (SELECT c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, i_current_price, i_manager_id, i_units, i_size, sum(ss_net_paid) netpaid FROM store_sales, store_returns, store, item, customer, customer_address WHERE ss_ticket_number = sr_ticket_number AND ss_item_sk = sr_item_sk AND ss_customer_sk = c_customer_sk AND ss_item_sk = i_item_sk AND ss_store_sk = s_store_sk AND c_birth_country = upper(ca_country) AND s_zip = ca_zip AND s_market_id = 8 GROUP BY c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, i_current_price, i_manager_id, i_units, i_size) SELECT c_last_name, c_first_name, s_store_name, sum(netpaid) paid FROM ssales WHERE i_color = 'pale' GROUP BY c_last_name, c_first_name, s_store_name HAVING sum(netpaid) > (SELECT 0.05 * avg(netpaid) FROM ssales) ================================================ FILE: spark-queries-tpcds/q24b.sql ================================================ WITH ssales AS (SELECT c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, i_current_price, i_manager_id, i_units, i_size, sum(ss_net_paid) netpaid FROM store_sales, store_returns, store, item, customer, customer_address WHERE ss_ticket_number = sr_ticket_number AND ss_item_sk = sr_item_sk AND ss_customer_sk = c_customer_sk AND ss_item_sk = i_item_sk AND ss_store_sk = s_store_sk AND c_birth_country = upper(ca_country) AND s_zip = ca_zip AND s_market_id = 8 GROUP BY c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, i_current_price, i_manager_id, i_units, i_size) SELECT c_last_name, c_first_name, s_store_name, sum(netpaid) paid FROM ssales WHERE i_color = 'chiffon' GROUP BY c_last_name, c_first_name, s_store_name HAVING sum(netpaid) > (SELECT 0.05 * avg(netpaid) FROM ssales) ================================================ FILE: spark-queries-tpcds/q25.sql ================================================ SELECT i_item_id, i_item_desc, s_store_id, s_store_name, sum(ss_net_profit) AS store_sales_profit, sum(sr_net_loss) AS store_returns_loss, sum(cs_net_profit) AS catalog_sales_profit FROM store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, store, item WHERE d1.d_moy = 4 AND d1.d_year = 2001 AND d1.d_date_sk = ss_sold_date_sk AND i_item_sk = ss_item_sk AND s_store_sk = ss_store_sk AND ss_customer_sk = sr_customer_sk AND ss_item_sk = sr_item_sk AND ss_ticket_number = sr_ticket_number AND sr_returned_date_sk = d2.d_date_sk AND d2.d_moy BETWEEN 4 AND 10 AND d2.d_year = 2001 AND sr_customer_sk = cs_bill_customer_sk AND sr_item_sk = cs_item_sk AND cs_sold_date_sk = d3.d_date_sk AND d3.d_moy BETWEEN 4 AND 10 AND d3.d_year = 2001 GROUP BY i_item_id, i_item_desc, s_store_id, s_store_name ORDER BY i_item_id, i_item_desc, s_store_id, s_store_name LIMIT 100 ================================================ FILE: spark-queries-tpcds/q26.sql ================================================ SELECT i_item_id, avg(cs_quantity) agg1, avg(cs_list_price) agg2, avg(cs_coupon_amt) agg3, avg(cs_sales_price) agg4 FROM catalog_sales, customer_demographics, date_dim, item, promotion WHERE cs_sold_date_sk = d_date_sk AND cs_item_sk = i_item_sk AND cs_bill_cdemo_sk = cd_demo_sk AND cs_promo_sk = p_promo_sk AND cd_gender = 'M' AND cd_marital_status = 'S' AND cd_education_status = 'College' AND (p_channel_email = 'N' OR p_channel_event = 'N') AND d_year = 2000 GROUP BY i_item_id ORDER BY i_item_id LIMIT 100 ================================================ FILE: spark-queries-tpcds/q27.sql ================================================ SELECT i_item_id, s_state, grouping(s_state) g_state, avg(ss_quantity) agg1, avg(ss_list_price) agg2, avg(ss_coupon_amt) agg3, avg(ss_sales_price) agg4 FROM store_sales, customer_demographics, date_dim, store, item WHERE ss_sold_date_sk = d_date_sk AND ss_item_sk = i_item_sk AND ss_store_sk = s_store_sk AND ss_cdemo_sk = cd_demo_sk AND cd_gender = 'M' AND cd_marital_status = 'S' AND cd_education_status = 'College' AND d_year = 2002 AND s_state IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN') GROUP BY ROLLUP (i_item_id, s_state) ORDER BY i_item_id, s_state LIMIT 100 ================================================ FILE: spark-queries-tpcds/q28.sql ================================================ SELECT * FROM (SELECT avg(ss_list_price) B1_LP, count(ss_list_price) B1_CNT, count(DISTINCT ss_list_price) B1_CNTD FROM store_sales WHERE ss_quantity BETWEEN 0 AND 5 AND (ss_list_price BETWEEN 8 AND 8 + 10 OR ss_coupon_amt BETWEEN 459 AND 459 + 1000 OR ss_wholesale_cost BETWEEN 57 AND 57 + 20)) B1, (SELECT avg(ss_list_price) B2_LP, count(ss_list_price) B2_CNT, count(DISTINCT ss_list_price) B2_CNTD FROM store_sales WHERE ss_quantity BETWEEN 6 AND 10 AND (ss_list_price BETWEEN 90 AND 90 + 10 OR ss_coupon_amt BETWEEN 2323 AND 2323 + 1000 OR ss_wholesale_cost BETWEEN 31 AND 31 + 20)) B2, (SELECT avg(ss_list_price) B3_LP, count(ss_list_price) B3_CNT, count(DISTINCT ss_list_price) B3_CNTD FROM store_sales WHERE ss_quantity BETWEEN 11 AND 15 AND (ss_list_price BETWEEN 142 AND 142 + 10 OR ss_coupon_amt BETWEEN 12214 AND 12214 + 1000 OR ss_wholesale_cost BETWEEN 79 AND 79 + 20)) B3, (SELECT avg(ss_list_price) B4_LP, count(ss_list_price) B4_CNT, count(DISTINCT ss_list_price) B4_CNTD FROM store_sales WHERE ss_quantity BETWEEN 16 AND 20 AND (ss_list_price BETWEEN 135 AND 135 + 10 OR ss_coupon_amt BETWEEN 6071 AND 6071 + 1000 OR ss_wholesale_cost BETWEEN 38 AND 38 + 20)) B4, (SELECT avg(ss_list_price) B5_LP, count(ss_list_price) B5_CNT, count(DISTINCT ss_list_price) B5_CNTD FROM store_sales WHERE ss_quantity BETWEEN 21 AND 25 AND (ss_list_price BETWEEN 122 AND 122 + 10 OR ss_coupon_amt BETWEEN 836 AND 836 + 1000 OR ss_wholesale_cost BETWEEN 17 AND 17 + 20)) B5, (SELECT avg(ss_list_price) B6_LP, count(ss_list_price) B6_CNT, count(DISTINCT ss_list_price) B6_CNTD FROM store_sales WHERE ss_quantity BETWEEN 26 AND 30 AND (ss_list_price BETWEEN 154 AND 154 + 10 OR ss_coupon_amt BETWEEN 7326 AND 7326 + 1000 OR ss_wholesale_cost BETWEEN 7 AND 7 + 20)) B6 LIMIT 100 ================================================ FILE: spark-queries-tpcds/q29.sql ================================================ SELECT i_item_id, i_item_desc, s_store_id, s_store_name, sum(ss_quantity) AS store_sales_quantity, sum(sr_return_quantity) AS store_returns_quantity, sum(cs_quantity) AS catalog_sales_quantity FROM store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, store, item WHERE d1.d_moy = 9 AND d1.d_year = 1999 AND d1.d_date_sk = ss_sold_date_sk AND i_item_sk = ss_item_sk AND s_store_sk = ss_store_sk AND ss_customer_sk = sr_customer_sk AND ss_item_sk = sr_item_sk AND ss_ticket_number = sr_ticket_number AND sr_returned_date_sk = d2.d_date_sk AND d2.d_moy BETWEEN 9 AND 9 + 3 AND d2.d_year = 1999 AND sr_customer_sk = cs_bill_customer_sk AND sr_item_sk = cs_item_sk AND cs_sold_date_sk = d3.d_date_sk AND d3.d_year IN (1999, 1999 + 1, 1999 + 2) GROUP BY i_item_id, i_item_desc, s_store_id, s_store_name ORDER BY i_item_id, i_item_desc, s_store_id, s_store_name LIMIT 100 ================================================ FILE: spark-queries-tpcds/q3.sql ================================================ SELECT dt.d_year, item.i_brand_id brand_id, item.i_brand brand, SUM(ss_ext_sales_price) sum_agg FROM date_dim dt, store_sales, item WHERE dt.d_date_sk = store_sales.ss_sold_date_sk AND store_sales.ss_item_sk = item.i_item_sk AND item.i_manufact_id = 128 AND dt.d_moy = 11 GROUP BY dt.d_year, item.i_brand, item.i_brand_id ORDER BY dt.d_year, sum_agg DESC, brand_id LIMIT 100 ================================================ FILE: spark-queries-tpcds/q30.sql ================================================ WITH customer_total_return AS (SELECT wr_returning_customer_sk AS ctr_customer_sk, ca_state AS ctr_state, sum(wr_return_amt) AS ctr_total_return FROM web_returns, date_dim, customer_address WHERE wr_returned_date_sk = d_date_sk AND d_year = 2002 AND wr_returning_addr_sk = ca_address_sk GROUP BY wr_returning_customer_sk, ca_state) SELECT c_customer_id, c_salutation, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_day, c_birth_month, c_birth_year, c_birth_country, c_login, c_email_address, c_last_review_date, ctr_total_return FROM customer_total_return ctr1, customer_address, customer WHERE ctr1.ctr_total_return > (SELECT avg(ctr_total_return) * 1.2 FROM customer_total_return ctr2 WHERE ctr1.ctr_state = ctr2.ctr_state) AND ca_address_sk = c_current_addr_sk AND ca_state = 'GA' AND ctr1.ctr_customer_sk = c_customer_sk ORDER BY c_customer_id, c_salutation, c_first_name, c_last_name, c_preferred_cust_flag , c_birth_day, c_birth_month, c_birth_year, c_birth_country, c_login, c_email_address , c_last_review_date, ctr_total_return LIMIT 100 ================================================ FILE: spark-queries-tpcds/q31.sql ================================================ WITH ss AS (SELECT ca_county, d_qoy, d_year, sum(ss_ext_sales_price) AS store_sales FROM store_sales, date_dim, customer_address WHERE ss_sold_date_sk = d_date_sk AND ss_addr_sk = ca_address_sk GROUP BY ca_county, d_qoy, d_year), ws AS (SELECT ca_county, d_qoy, d_year, sum(ws_ext_sales_price) AS web_sales FROM web_sales, date_dim, customer_address WHERE ws_sold_date_sk = d_date_sk AND ws_bill_addr_sk = ca_address_sk GROUP BY ca_county, d_qoy, d_year) SELECT ss1.ca_county, ss1.d_year, ws2.web_sales / ws1.web_sales web_q1_q2_increase, ss2.store_sales / ss1.store_sales store_q1_q2_increase, ws3.web_sales / ws2.web_sales web_q2_q3_increase, ss3.store_sales / ss2.store_sales store_q2_q3_increase FROM ss ss1, ss ss2, ss ss3, ws ws1, ws ws2, ws ws3 WHERE ss1.d_qoy = 1 AND ss1.d_year = 2000 AND ss1.ca_county = ss2.ca_county AND ss2.d_qoy = 2 AND ss2.d_year = 2000 AND ss2.ca_county = ss3.ca_county AND ss3.d_qoy = 3 AND ss3.d_year = 2000 AND ss1.ca_county = ws1.ca_county AND ws1.d_qoy = 1 AND ws1.d_year = 2000 AND ws1.ca_county = ws2.ca_county AND ws2.d_qoy = 2 AND ws2.d_year = 2000 AND ws1.ca_county = ws3.ca_county AND ws3.d_qoy = 3 AND ws3.d_year = 2000 AND CASE WHEN ws1.web_sales > 0 THEN ws2.web_sales / ws1.web_sales ELSE NULL END > CASE WHEN ss1.store_sales > 0 THEN ss2.store_sales / ss1.store_sales ELSE NULL END AND CASE WHEN ws2.web_sales > 0 THEN ws3.web_sales / ws2.web_sales ELSE NULL END > CASE WHEN ss2.store_sales > 0 THEN ss3.store_sales / ss2.store_sales ELSE NULL END ORDER BY ss1.ca_county ================================================ FILE: spark-queries-tpcds/q32.sql ================================================ SELECT 1 AS `excess discount amount ` FROM catalog_sales, item, date_dim WHERE i_manufact_id = 977 AND i_item_sk = cs_item_sk AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + interval 90 days) AND d_date_sk = cs_sold_date_sk AND cs_ext_discount_amt > ( SELECT 1.3 * avg(cs_ext_discount_amt) FROM catalog_sales, date_dim WHERE cs_item_sk = i_item_sk AND d_date BETWEEN '2000-01-27]' AND (cast('2000-01-27' AS DATE) + interval 90 days) AND d_date_sk = cs_sold_date_sk) LIMIT 100 ================================================ FILE: spark-queries-tpcds/q33.sql ================================================ WITH ss AS ( SELECT i_manufact_id, sum(ss_ext_sales_price) total_sales FROM store_sales, date_dim, customer_address, item WHERE i_manufact_id IN (SELECT i_manufact_id FROM item WHERE i_category IN ('Electronics')) AND ss_item_sk = i_item_sk AND ss_sold_date_sk = d_date_sk AND d_year = 1998 AND d_moy = 5 AND ss_addr_sk = ca_address_sk AND ca_gmt_offset = -5 GROUP BY i_manufact_id), cs AS (SELECT i_manufact_id, sum(cs_ext_sales_price) total_sales FROM catalog_sales, date_dim, customer_address, item WHERE i_manufact_id IN ( SELECT i_manufact_id FROM item WHERE i_category IN ('Electronics')) AND cs_item_sk = i_item_sk AND cs_sold_date_sk = d_date_sk AND d_year = 1998 AND d_moy = 5 AND cs_bill_addr_sk = ca_address_sk AND ca_gmt_offset = -5 GROUP BY i_manufact_id), ws AS ( SELECT i_manufact_id, sum(ws_ext_sales_price) total_sales FROM web_sales, date_dim, customer_address, item WHERE i_manufact_id IN (SELECT i_manufact_id FROM item WHERE i_category IN ('Electronics')) AND ws_item_sk = i_item_sk AND ws_sold_date_sk = d_date_sk AND d_year = 1998 AND d_moy = 5 AND ws_bill_addr_sk = ca_address_sk AND ca_gmt_offset = -5 GROUP BY i_manufact_id) SELECT i_manufact_id, sum(total_sales) total_sales FROM (SELECT * FROM ss UNION ALL SELECT * FROM cs UNION ALL SELECT * FROM ws) tmp1 GROUP BY i_manufact_id ORDER BY total_sales LIMIT 100 ================================================ FILE: spark-queries-tpcds/q34.sql ================================================ SELECT c_last_name, c_first_name, c_salutation, c_preferred_cust_flag, ss_ticket_number, cnt FROM (SELECT ss_ticket_number, ss_customer_sk, count(*) cnt FROM store_sales, date_dim, store, household_demographics WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk AND store_sales.ss_store_sk = store.s_store_sk AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk AND (date_dim.d_dom BETWEEN 1 AND 3 OR date_dim.d_dom BETWEEN 25 AND 28) AND (household_demographics.hd_buy_potential = '>10000' OR household_demographics.hd_buy_potential = 'unknown') AND household_demographics.hd_vehicle_count > 0 AND (CASE WHEN household_demographics.hd_vehicle_count > 0 THEN household_demographics.hd_dep_count / household_demographics.hd_vehicle_count ELSE NULL END) > 1.2 AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) AND store.s_county IN ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County') GROUP BY ss_ticket_number, ss_customer_sk) dn, customer WHERE ss_customer_sk = c_customer_sk AND cnt BETWEEN 15 AND 20 ORDER BY c_last_name, c_first_name, c_salutation, c_preferred_cust_flag DESC ================================================ FILE: spark-queries-tpcds/q35.sql ================================================ SELECT ca_state, cd_gender, cd_marital_status, count(*) cnt1, min(cd_dep_count), max(cd_dep_count), avg(cd_dep_count), cd_dep_employed_count, count(*) cnt2, min(cd_dep_employed_count), max(cd_dep_employed_count), avg(cd_dep_employed_count), cd_dep_college_count, count(*) cnt3, min(cd_dep_college_count), max(cd_dep_college_count), avg(cd_dep_college_count) FROM customer c, customer_address ca, customer_demographics WHERE c.c_current_addr_sk = ca.ca_address_sk AND cd_demo_sk = c.c_current_cdemo_sk AND exists(SELECT * FROM store_sales, date_dim WHERE c.c_customer_sk = ss_customer_sk AND ss_sold_date_sk = d_date_sk AND d_year = 2002 AND d_qoy < 4) AND (exists(SELECT * FROM web_sales, date_dim WHERE c.c_customer_sk = ws_bill_customer_sk AND ws_sold_date_sk = d_date_sk AND d_year = 2002 AND d_qoy < 4) OR exists(SELECT * FROM catalog_sales, date_dim WHERE c.c_customer_sk = cs_ship_customer_sk AND cs_sold_date_sk = d_date_sk AND d_year = 2002 AND d_qoy < 4)) GROUP BY ca_state, cd_gender, cd_marital_status, cd_dep_count, cd_dep_employed_count, cd_dep_college_count ORDER BY ca_state, cd_gender, cd_marital_status, cd_dep_count, cd_dep_employed_count, cd_dep_college_count LIMIT 100 ================================================ FILE: spark-queries-tpcds/q36.sql ================================================ SELECT sum(ss_net_profit) / sum(ss_ext_sales_price) AS gross_margin, i_category, i_class, grouping(i_category) + grouping(i_class) AS lochierarchy, rank() OVER ( PARTITION BY grouping(i_category) + grouping(i_class), CASE WHEN grouping(i_class) = 0 THEN i_category END ORDER BY sum(ss_net_profit) / sum(ss_ext_sales_price) ASC) AS rank_within_parent FROM store_sales, date_dim d1, item, store WHERE d1.d_year = 2001 AND d1.d_date_sk = ss_sold_date_sk AND i_item_sk = ss_item_sk AND s_store_sk = ss_store_sk AND s_state IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN') GROUP BY ROLLUP (i_category, i_class) ORDER BY lochierarchy DESC , CASE WHEN lochierarchy = 0 THEN i_category END , rank_within_parent LIMIT 100 ================================================ FILE: spark-queries-tpcds/q37.sql ================================================ SELECT i_item_id, i_item_desc, i_current_price FROM item, inventory, date_dim, catalog_sales WHERE i_current_price BETWEEN 68 AND 68 + 30 AND inv_item_sk = i_item_sk AND d_date_sk = inv_date_sk AND d_date BETWEEN cast('2000-02-01' AS DATE) AND (cast('2000-02-01' AS DATE) + INTERVAL 60 days) AND i_manufact_id IN (677, 940, 694, 808) AND inv_quantity_on_hand BETWEEN 100 AND 500 AND cs_item_sk = i_item_sk GROUP BY i_item_id, i_item_desc, i_current_price ORDER BY i_item_id LIMIT 100 ================================================ FILE: spark-queries-tpcds/q38.sql ================================================ SELECT count(*) FROM ( SELECT DISTINCT c_last_name, c_first_name, d_date FROM store_sales, date_dim, customer WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk AND store_sales.ss_customer_sk = customer.c_customer_sk AND d_month_seq BETWEEN 1200 AND 1200 + 11 INTERSECT SELECT DISTINCT c_last_name, c_first_name, d_date FROM catalog_sales, date_dim, customer WHERE catalog_sales.cs_sold_date_sk = date_dim.d_date_sk AND catalog_sales.cs_bill_customer_sk = customer.c_customer_sk AND d_month_seq BETWEEN 1200 AND 1200 + 11 INTERSECT SELECT DISTINCT c_last_name, c_first_name, d_date FROM web_sales, date_dim, customer WHERE web_sales.ws_sold_date_sk = date_dim.d_date_sk AND web_sales.ws_bill_customer_sk = customer.c_customer_sk AND d_month_seq BETWEEN 1200 AND 1200 + 11 ) hot_cust LIMIT 100 ================================================ FILE: spark-queries-tpcds/q39a.sql ================================================ WITH inv AS (SELECT w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy, stdev, mean, CASE mean WHEN 0 THEN NULL ELSE stdev / mean END cov FROM (SELECT w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy, stddev_samp(inv_quantity_on_hand) stdev, avg(inv_quantity_on_hand) mean FROM inventory, item, warehouse, date_dim WHERE inv_item_sk = i_item_sk AND inv_warehouse_sk = w_warehouse_sk AND inv_date_sk = d_date_sk AND d_year = 2001 GROUP BY w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy) foo WHERE CASE mean WHEN 0 THEN 0 ELSE stdev / mean END > 1) SELECT inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov, inv2.w_warehouse_sk, inv2.i_item_sk, inv2.d_moy, inv2.mean, inv2.cov FROM inv inv1, inv inv2 WHERE inv1.i_item_sk = inv2.i_item_sk AND inv1.w_warehouse_sk = inv2.w_warehouse_sk AND inv1.d_moy = 1 AND inv2.d_moy = 1 + 1 ORDER BY inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov , inv2.d_moy, inv2.mean, inv2.cov ================================================ FILE: spark-queries-tpcds/q39b.sql ================================================ WITH inv AS (SELECT w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy, stdev, mean, CASE mean WHEN 0 THEN NULL ELSE stdev / mean END cov FROM (SELECT w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy, stddev_samp(inv_quantity_on_hand) stdev, avg(inv_quantity_on_hand) mean FROM inventory, item, warehouse, date_dim WHERE inv_item_sk = i_item_sk AND inv_warehouse_sk = w_warehouse_sk AND inv_date_sk = d_date_sk AND d_year = 2001 GROUP BY w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy) foo WHERE CASE mean WHEN 0 THEN 0 ELSE stdev / mean END > 1) SELECT inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov, inv2.w_warehouse_sk, inv2.i_item_sk, inv2.d_moy, inv2.mean, inv2.cov FROM inv inv1, inv inv2 WHERE inv1.i_item_sk = inv2.i_item_sk AND inv1.w_warehouse_sk = inv2.w_warehouse_sk AND inv1.d_moy = 1 AND inv2.d_moy = 1 + 1 AND inv1.cov > 1.5 ORDER BY inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov , inv2.d_moy, inv2.mean, inv2.cov ================================================ FILE: spark-queries-tpcds/q4.sql ================================================ WITH year_total AS ( SELECT c_customer_id customer_id, c_first_name customer_first_name, c_last_name customer_last_name, c_preferred_cust_flag customer_preferred_cust_flag, c_birth_country customer_birth_country, c_login customer_login, c_email_address customer_email_address, d_year dyear, sum(((ss_ext_list_price - ss_ext_wholesale_cost - ss_ext_discount_amt) + ss_ext_sales_price) / 2) year_total, 's' sale_type FROM customer, store_sales, date_dim WHERE c_customer_sk = ss_customer_sk AND ss_sold_date_sk = d_date_sk GROUP BY c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country, c_login, c_email_address, d_year UNION ALL SELECT c_customer_id customer_id, c_first_name customer_first_name, c_last_name customer_last_name, c_preferred_cust_flag customer_preferred_cust_flag, c_birth_country customer_birth_country, c_login customer_login, c_email_address customer_email_address, d_year dyear, sum((((cs_ext_list_price - cs_ext_wholesale_cost - cs_ext_discount_amt) + cs_ext_sales_price) / 2)) year_total, 'c' sale_type FROM customer, catalog_sales, date_dim WHERE c_customer_sk = cs_bill_customer_sk AND cs_sold_date_sk = d_date_sk GROUP BY c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country, c_login, c_email_address, d_year UNION ALL SELECT c_customer_id customer_id, c_first_name customer_first_name, c_last_name customer_last_name, c_preferred_cust_flag customer_preferred_cust_flag, c_birth_country customer_birth_country, c_login customer_login, c_email_address customer_email_address, d_year dyear, sum((((ws_ext_list_price - ws_ext_wholesale_cost - ws_ext_discount_amt) + ws_ext_sales_price) / 2)) year_total, 'w' sale_type FROM customer, web_sales, date_dim WHERE c_customer_sk = ws_bill_customer_sk AND ws_sold_date_sk = d_date_sk GROUP BY c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country, c_login, c_email_address, d_year) SELECT t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name, t_s_secyear.customer_preferred_cust_flag, t_s_secyear.customer_birth_country, t_s_secyear.customer_login, t_s_secyear.customer_email_address FROM year_total t_s_firstyear, year_total t_s_secyear, year_total t_c_firstyear, year_total t_c_secyear, year_total t_w_firstyear, year_total t_w_secyear WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id AND t_s_firstyear.customer_id = t_c_secyear.customer_id AND t_s_firstyear.customer_id = t_c_firstyear.customer_id AND t_s_firstyear.customer_id = t_w_firstyear.customer_id AND t_s_firstyear.customer_id = t_w_secyear.customer_id AND t_s_firstyear.sale_type = 's' AND t_c_firstyear.sale_type = 'c' AND t_w_firstyear.sale_type = 'w' AND t_s_secyear.sale_type = 's' AND t_c_secyear.sale_type = 'c' AND t_w_secyear.sale_type = 'w' AND t_s_firstyear.dyear = 2001 AND t_s_secyear.dyear = 2001 + 1 AND t_c_firstyear.dyear = 2001 AND t_c_secyear.dyear = 2001 + 1 AND t_w_firstyear.dyear = 2001 AND t_w_secyear.dyear = 2001 + 1 AND t_s_firstyear.year_total > 0 AND t_c_firstyear.year_total > 0 AND t_w_firstyear.year_total > 0 AND CASE WHEN t_c_firstyear.year_total > 0 THEN t_c_secyear.year_total / t_c_firstyear.year_total ELSE NULL END > CASE WHEN t_s_firstyear.year_total > 0 THEN t_s_secyear.year_total / t_s_firstyear.year_total ELSE NULL END AND CASE WHEN t_c_firstyear.year_total > 0 THEN t_c_secyear.year_total / t_c_firstyear.year_total ELSE NULL END > CASE WHEN t_w_firstyear.year_total > 0 THEN t_w_secyear.year_total / t_w_firstyear.year_total ELSE NULL END ORDER BY t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name, t_s_secyear.customer_preferred_cust_flag, t_s_secyear.customer_birth_country, t_s_secyear.customer_login, t_s_secyear.customer_email_address LIMIT 100 ================================================ FILE: spark-queries-tpcds/q40.sql ================================================ SELECT w_state, i_item_id, sum(CASE WHEN (cast(d_date AS DATE) < cast('2000-03-11' AS DATE)) THEN cs_sales_price - coalesce(cr_refunded_cash, 0) ELSE 0 END) AS sales_before, sum(CASE WHEN (cast(d_date AS DATE) >= cast('2000-03-11' AS DATE)) THEN cs_sales_price - coalesce(cr_refunded_cash, 0) ELSE 0 END) AS sales_after FROM catalog_sales LEFT OUTER JOIN catalog_returns ON (cs_order_number = cr_order_number AND cs_item_sk = cr_item_sk) , warehouse, item, date_dim WHERE i_current_price BETWEEN 0.99 AND 1.49 AND i_item_sk = cs_item_sk AND cs_warehouse_sk = w_warehouse_sk AND cs_sold_date_sk = d_date_sk AND d_date BETWEEN (cast('2000-03-11' AS DATE) - INTERVAL 30 days) AND (cast('2000-03-11' AS DATE) + INTERVAL 30 days) GROUP BY w_state, i_item_id ORDER BY w_state, i_item_id LIMIT 100 ================================================ FILE: spark-queries-tpcds/q41.sql ================================================ SELECT DISTINCT (i_product_name) FROM item i1 WHERE i_manufact_id BETWEEN 738 AND 738 + 40 AND (SELECT count(*) AS item_cnt FROM item WHERE (i_manufact = i1.i_manufact AND ((i_category = 'Women' AND (i_color = 'powder' OR i_color = 'khaki') AND (i_units = 'Ounce' OR i_units = 'Oz') AND (i_size = 'medium' OR i_size = 'extra large') ) OR (i_category = 'Women' AND (i_color = 'brown' OR i_color = 'honeydew') AND (i_units = 'Bunch' OR i_units = 'Ton') AND (i_size = 'N/A' OR i_size = 'small') ) OR (i_category = 'Men' AND (i_color = 'floral' OR i_color = 'deep') AND (i_units = 'N/A' OR i_units = 'Dozen') AND (i_size = 'petite' OR i_size = 'large') ) OR (i_category = 'Men' AND (i_color = 'light' OR i_color = 'cornflower') AND (i_units = 'Box' OR i_units = 'Pound') AND (i_size = 'medium' OR i_size = 'extra large') ))) OR (i_manufact = i1.i_manufact AND ((i_category = 'Women' AND (i_color = 'midnight' OR i_color = 'snow') AND (i_units = 'Pallet' OR i_units = 'Gross') AND (i_size = 'medium' OR i_size = 'extra large') ) OR (i_category = 'Women' AND (i_color = 'cyan' OR i_color = 'papaya') AND (i_units = 'Cup' OR i_units = 'Dram') AND (i_size = 'N/A' OR i_size = 'small') ) OR (i_category = 'Men' AND (i_color = 'orange' OR i_color = 'frosted') AND (i_units = 'Each' OR i_units = 'Tbl') AND (i_size = 'petite' OR i_size = 'large') ) OR (i_category = 'Men' AND (i_color = 'forest' OR i_color = 'ghost') AND (i_units = 'Lb' OR i_units = 'Bundle') AND (i_size = 'medium' OR i_size = 'extra large') )))) > 0 ORDER BY i_product_name LIMIT 100 ================================================ FILE: spark-queries-tpcds/q42.sql ================================================ SELECT dt.d_year, item.i_category_id, item.i_category, sum(ss_ext_sales_price) FROM date_dim dt, store_sales, item WHERE dt.d_date_sk = store_sales.ss_sold_date_sk AND store_sales.ss_item_sk = item.i_item_sk AND item.i_manager_id = 1 AND dt.d_moy = 11 AND dt.d_year = 2000 GROUP BY dt.d_year , item.i_category_id , item.i_category ORDER BY sum(ss_ext_sales_price) DESC, dt.d_year , item.i_category_id , item.i_category LIMIT 100 ================================================ FILE: spark-queries-tpcds/q43.sql ================================================ SELECT s_store_name, s_store_id, sum(CASE WHEN (d_day_name = 'Sunday') THEN ss_sales_price ELSE NULL END) sun_sales, sum(CASE WHEN (d_day_name = 'Monday') THEN ss_sales_price ELSE NULL END) mon_sales, sum(CASE WHEN (d_day_name = 'Tuesday') THEN ss_sales_price ELSE NULL END) tue_sales, sum(CASE WHEN (d_day_name = 'Wednesday') THEN ss_sales_price ELSE NULL END) wed_sales, sum(CASE WHEN (d_day_name = 'Thursday') THEN ss_sales_price ELSE NULL END) thu_sales, sum(CASE WHEN (d_day_name = 'Friday') THEN ss_sales_price ELSE NULL END) fri_sales, sum(CASE WHEN (d_day_name = 'Saturday') THEN ss_sales_price ELSE NULL END) sat_sales FROM date_dim, store_sales, store WHERE d_date_sk = ss_sold_date_sk AND s_store_sk = ss_store_sk AND s_gmt_offset = -5 AND d_year = 2000 GROUP BY s_store_name, s_store_id ORDER BY s_store_name, s_store_id, sun_sales, mon_sales, tue_sales, wed_sales, thu_sales, fri_sales, sat_sales LIMIT 100 ================================================ FILE: spark-queries-tpcds/q44.sql ================================================ SELECT asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing FROM (SELECT * FROM (SELECT item_sk, rank() OVER ( ORDER BY rank_col ASC) rnk FROM (SELECT ss_item_sk item_sk, avg(ss_net_profit) rank_col FROM store_sales ss1 WHERE ss_store_sk = 4 GROUP BY ss_item_sk HAVING avg(ss_net_profit) > 0.9 * (SELECT avg(ss_net_profit) rank_col FROM store_sales WHERE ss_store_sk = 4 AND ss_addr_sk IS NULL GROUP BY ss_store_sk)) V1) V11 WHERE rnk < 11) asceding, (SELECT * FROM (SELECT item_sk, rank() OVER ( ORDER BY rank_col DESC) rnk FROM (SELECT ss_item_sk item_sk, avg(ss_net_profit) rank_col FROM store_sales ss1 WHERE ss_store_sk = 4 GROUP BY ss_item_sk HAVING avg(ss_net_profit) > 0.9 * (SELECT avg(ss_net_profit) rank_col FROM store_sales WHERE ss_store_sk = 4 AND ss_addr_sk IS NULL GROUP BY ss_store_sk)) V2) V21 WHERE rnk < 11) descending, item i1, item i2 WHERE asceding.rnk = descending.rnk AND i1.i_item_sk = asceding.item_sk AND i2.i_item_sk = descending.item_sk ORDER BY asceding.rnk LIMIT 100 ================================================ FILE: spark-queries-tpcds/q45.sql ================================================ SELECT ca_zip, ca_city, sum(ws_sales_price) FROM web_sales, customer, customer_address, date_dim, item WHERE ws_bill_customer_sk = c_customer_sk AND c_current_addr_sk = ca_address_sk AND ws_item_sk = i_item_sk AND (substr(ca_zip, 1, 5) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') OR i_item_id IN (SELECT i_item_id FROM item WHERE i_item_sk IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) ) ) AND ws_sold_date_sk = d_date_sk AND d_qoy = 2 AND d_year = 2001 GROUP BY ca_zip, ca_city ORDER BY ca_zip, ca_city LIMIT 100 ================================================ FILE: spark-queries-tpcds/q46.sql ================================================ SELECT c_last_name, c_first_name, ca_city, bought_city, ss_ticket_number, amt, profit FROM (SELECT ss_ticket_number, ss_customer_sk, ca_city bought_city, sum(ss_coupon_amt) amt, sum(ss_net_profit) profit FROM store_sales, date_dim, store, household_demographics, customer_address WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk AND store_sales.ss_store_sk = store.s_store_sk AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk AND store_sales.ss_addr_sk = customer_address.ca_address_sk AND (household_demographics.hd_dep_count = 4 OR household_demographics.hd_vehicle_count = 3) AND date_dim.d_dow IN (6, 0) AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) AND store.s_city IN ('Fairview', 'Midway', 'Fairview', 'Fairview', 'Fairview') GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, ca_city) dn, customer, customer_address current_addr WHERE ss_customer_sk = c_customer_sk AND customer.c_current_addr_sk = current_addr.ca_address_sk AND current_addr.ca_city <> bought_city ORDER BY c_last_name, c_first_name, ca_city, bought_city, ss_ticket_number LIMIT 100 ================================================ FILE: spark-queries-tpcds/q47.sql ================================================ WITH v1 AS ( SELECT i_category, i_brand, s_store_name, s_company_name, d_year, d_moy, sum(ss_sales_price) sum_sales, avg(sum(ss_sales_price)) OVER (PARTITION BY i_category, i_brand, s_store_name, s_company_name, d_year) avg_monthly_sales, rank() OVER (PARTITION BY i_category, i_brand, s_store_name, s_company_name ORDER BY d_year, d_moy) rn FROM item, store_sales, date_dim, store WHERE ss_item_sk = i_item_sk AND ss_sold_date_sk = d_date_sk AND ss_store_sk = s_store_sk AND ( d_year = 1999 OR (d_year = 1999 - 1 AND d_moy = 12) OR (d_year = 1999 + 1 AND d_moy = 1) ) GROUP BY i_category, i_brand, s_store_name, s_company_name, d_year, d_moy), v2 AS ( SELECT v1.i_category, v1.i_brand, v1.s_store_name, v1.s_company_name, v1.d_year, v1.d_moy, v1.avg_monthly_sales, v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum FROM v1, v1 v1_lag, v1 v1_lead WHERE v1.i_category = v1_lag.i_category AND v1.i_category = v1_lead.i_category AND v1.i_brand = v1_lag.i_brand AND v1.i_brand = v1_lead.i_brand AND v1.s_store_name = v1_lag.s_store_name AND v1.s_store_name = v1_lead.s_store_name AND v1.s_company_name = v1_lag.s_company_name AND v1.s_company_name = v1_lead.s_company_name AND v1.rn = v1_lag.rn + 1 AND v1.rn = v1_lead.rn - 1) SELECT * FROM v2 WHERE d_year = 1999 AND avg_monthly_sales > 0 AND CASE WHEN avg_monthly_sales > 0 THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales ELSE NULL END > 0.1 ORDER BY sum_sales - avg_monthly_sales, 3 LIMIT 100 ================================================ FILE: spark-queries-tpcds/q48.sql ================================================ SELECT sum(ss_quantity) FROM store_sales, store, customer_demographics, customer_address, date_dim WHERE s_store_sk = ss_store_sk AND ss_sold_date_sk = d_date_sk AND d_year = 2001 AND ( ( cd_demo_sk = ss_cdemo_sk AND cd_marital_status = 'M' AND cd_education_status = '4 yr Degree' AND ss_sales_price BETWEEN 100.00 AND 150.00 ) OR ( cd_demo_sk = ss_cdemo_sk AND cd_marital_status = 'D' AND cd_education_status = '2 yr Degree' AND ss_sales_price BETWEEN 50.00 AND 100.00 ) OR ( cd_demo_sk = ss_cdemo_sk AND cd_marital_status = 'S' AND cd_education_status = 'College' AND ss_sales_price BETWEEN 150.00 AND 200.00 ) ) AND ( ( ss_addr_sk = ca_address_sk AND ca_country = 'United States' AND ca_state IN ('CO', 'OH', 'TX') AND ss_net_profit BETWEEN 0 AND 2000 ) OR (ss_addr_sk = ca_address_sk AND ca_country = 'United States' AND ca_state IN ('OR', 'MN', 'KY') AND ss_net_profit BETWEEN 150 AND 3000 ) OR (ss_addr_sk = ca_address_sk AND ca_country = 'United States' AND ca_state IN ('VA', 'CA', 'MS') AND ss_net_profit BETWEEN 50 AND 25000 ) ) ================================================ FILE: spark-queries-tpcds/q49.sql ================================================ SELECT 'web' AS channel, web.item, web.return_ratio, web.return_rank, web.currency_rank FROM ( SELECT item, return_ratio, currency_ratio, rank() OVER ( ORDER BY return_ratio) AS return_rank, rank() OVER ( ORDER BY currency_ratio) AS currency_rank FROM (SELECT ws.ws_item_sk AS item, (cast(sum(coalesce(wr.wr_return_quantity, 0)) AS DECIMAL(15, 4)) / cast(sum(coalesce(ws.ws_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, (cast(sum(coalesce(wr.wr_return_amt, 0)) AS DECIMAL(15, 4)) / cast(sum(coalesce(ws.ws_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio FROM web_sales ws LEFT OUTER JOIN web_returns wr ON (ws.ws_order_number = wr.wr_order_number AND ws.ws_item_sk = wr.wr_item_sk) , date_dim WHERE wr.wr_return_amt > 10000 AND ws.ws_net_profit > 1 AND ws.ws_net_paid > 0 AND ws.ws_quantity > 0 AND ws_sold_date_sk = d_date_sk AND d_year = 2001 AND d_moy = 12 GROUP BY ws.ws_item_sk ) in_web ) web WHERE (web.return_rank <= 10 OR web.currency_rank <= 10) UNION SELECT 'catalog' AS channel, catalog.item, catalog.return_ratio, catalog.return_rank, catalog.currency_rank FROM ( SELECT item, return_ratio, currency_ratio, rank() OVER ( ORDER BY return_ratio) AS return_rank, rank() OVER ( ORDER BY currency_ratio) AS currency_rank FROM (SELECT cs.cs_item_sk AS item, (cast(sum(coalesce(cr.cr_return_quantity, 0)) AS DECIMAL(15, 4)) / cast(sum(coalesce(cs.cs_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, (cast(sum(coalesce(cr.cr_return_amount, 0)) AS DECIMAL(15, 4)) / cast(sum(coalesce(cs.cs_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio FROM catalog_sales cs LEFT OUTER JOIN catalog_returns cr ON (cs.cs_order_number = cr.cr_order_number AND cs.cs_item_sk = cr.cr_item_sk) , date_dim WHERE cr.cr_return_amount > 10000 AND cs.cs_net_profit > 1 AND cs.cs_net_paid > 0 AND cs.cs_quantity > 0 AND cs_sold_date_sk = d_date_sk AND d_year = 2001 AND d_moy = 12 GROUP BY cs.cs_item_sk ) in_cat ) catalog WHERE (catalog.return_rank <= 10 OR catalog.currency_rank <= 10) UNION SELECT 'store' AS channel, store.item, store.return_ratio, store.return_rank, store.currency_rank FROM ( SELECT item, return_ratio, currency_ratio, rank() OVER ( ORDER BY return_ratio) AS return_rank, rank() OVER ( ORDER BY currency_ratio) AS currency_rank FROM (SELECT sts.ss_item_sk AS item, (cast(sum(coalesce(sr.sr_return_quantity, 0)) AS DECIMAL(15, 4)) / cast(sum(coalesce(sts.ss_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, (cast(sum(coalesce(sr.sr_return_amt, 0)) AS DECIMAL(15, 4)) / cast(sum(coalesce(sts.ss_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio FROM store_sales sts LEFT OUTER JOIN store_returns sr ON (sts.ss_ticket_number = sr.sr_ticket_number AND sts.ss_item_sk = sr.sr_item_sk) , date_dim WHERE sr.sr_return_amt > 10000 AND sts.ss_net_profit > 1 AND sts.ss_net_paid > 0 AND sts.ss_quantity > 0 AND ss_sold_date_sk = d_date_sk AND d_year = 2001 AND d_moy = 12 GROUP BY sts.ss_item_sk ) in_store ) store WHERE (store.return_rank <= 10 OR store.currency_rank <= 10) ORDER BY 1, 4, 5 LIMIT 100 ================================================ FILE: spark-queries-tpcds/q5.sql ================================================ WITH ssr AS ( SELECT s_store_id, sum(sales_price) AS sales, sum(profit) AS profit, sum(return_amt) AS RETURNS, sum(net_loss) AS profit_loss FROM (SELECT ss_store_sk AS store_sk, ss_sold_date_sk AS date_sk, ss_ext_sales_price AS sales_price, ss_net_profit AS profit, cast(0 AS DECIMAL(7, 2)) AS return_amt, cast(0 AS DECIMAL(7, 2)) AS net_loss FROM store_sales UNION ALL SELECT sr_store_sk AS store_sk, sr_returned_date_sk AS date_sk, cast(0 AS DECIMAL(7, 2)) AS sales_price, cast(0 AS DECIMAL(7, 2)) AS profit, sr_return_amt AS return_amt, sr_net_loss AS net_loss FROM store_returns) salesreturns, date_dim, store WHERE date_sk = d_date_sk AND d_date BETWEEN cast('2000-08-23' AS DATE) AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days)) AND store_sk = s_store_sk GROUP BY s_store_id), csr AS ( SELECT cp_catalog_page_id, sum(sales_price) AS sales, sum(profit) AS profit, sum(return_amt) AS RETURNS, sum(net_loss) AS profit_loss FROM (SELECT cs_catalog_page_sk AS page_sk, cs_sold_date_sk AS date_sk, cs_ext_sales_price AS sales_price, cs_net_profit AS profit, cast(0 AS DECIMAL(7, 2)) AS return_amt, cast(0 AS DECIMAL(7, 2)) AS net_loss FROM catalog_sales UNION ALL SELECT cr_catalog_page_sk AS page_sk, cr_returned_date_sk AS date_sk, cast(0 AS DECIMAL(7, 2)) AS sales_price, cast(0 AS DECIMAL(7, 2)) AS profit, cr_return_amount AS return_amt, cr_net_loss AS net_loss FROM catalog_returns ) salesreturns, date_dim, catalog_page WHERE date_sk = d_date_sk AND d_date BETWEEN cast('2000-08-23' AS DATE) AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days)) AND page_sk = cp_catalog_page_sk GROUP BY cp_catalog_page_id) , wsr AS ( SELECT web_site_id, sum(sales_price) AS sales, sum(profit) AS profit, sum(return_amt) AS RETURNS, sum(net_loss) AS profit_loss FROM (SELECT ws_web_site_sk AS wsr_web_site_sk, ws_sold_date_sk AS date_sk, ws_ext_sales_price AS sales_price, ws_net_profit AS profit, cast(0 AS DECIMAL(7, 2)) AS return_amt, cast(0 AS DECIMAL(7, 2)) AS net_loss FROM web_sales UNION ALL SELECT ws_web_site_sk AS wsr_web_site_sk, wr_returned_date_sk AS date_sk, cast(0 AS DECIMAL(7, 2)) AS sales_price, cast(0 AS DECIMAL(7, 2)) AS profit, wr_return_amt AS return_amt, wr_net_loss AS net_loss FROM web_returns LEFT OUTER JOIN web_sales ON (wr_item_sk = ws_item_sk AND wr_order_number = ws_order_number) ) salesreturns, date_dim, web_site WHERE date_sk = d_date_sk AND d_date BETWEEN cast('2000-08-23' AS DATE) AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days)) AND wsr_web_site_sk = web_site_sk GROUP BY web_site_id) SELECT channel, id, sum(sales) AS sales, sum(returns) AS returns, sum(profit) AS profit FROM (SELECT 'store channel' AS channel, concat('store', s_store_id) AS id, sales, returns, (profit - profit_loss) AS profit FROM ssr UNION ALL SELECT 'catalog channel' AS channel, concat('catalog_page', cp_catalog_page_id) AS id, sales, returns, (profit - profit_loss) AS profit FROM csr UNION ALL SELECT 'web channel' AS channel, concat('web_site', web_site_id) AS id, sales, returns, (profit - profit_loss) AS profit FROM wsr ) x GROUP BY ROLLUP (channel, id) ORDER BY channel, id LIMIT 100 ================================================ FILE: spark-queries-tpcds/q50.sql ================================================ SELECT s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, s_suite_number, s_city, s_county, s_state, s_zip, sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk <= 30) THEN 1 ELSE 0 END) AS `30 days `, sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 30) AND (sr_returned_date_sk - ss_sold_date_sk <= 60) THEN 1 ELSE 0 END) AS `31 - 60 days `, sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 60) AND (sr_returned_date_sk - ss_sold_date_sk <= 90) THEN 1 ELSE 0 END) AS `61 - 90 days `, sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 90) AND (sr_returned_date_sk - ss_sold_date_sk <= 120) THEN 1 ELSE 0 END) AS `91 - 120 days `, sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 120) THEN 1 ELSE 0 END) AS `>120 days ` FROM store_sales, store_returns, store, date_dim d1, date_dim d2 WHERE d2.d_year = 2001 AND d2.d_moy = 8 AND ss_ticket_number = sr_ticket_number AND ss_item_sk = sr_item_sk AND ss_sold_date_sk = d1.d_date_sk AND sr_returned_date_sk = d2.d_date_sk AND ss_customer_sk = sr_customer_sk AND ss_store_sk = s_store_sk GROUP BY s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, s_suite_number, s_city, s_county, s_state, s_zip ORDER BY s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, s_suite_number, s_city, s_county, s_state, s_zip LIMIT 100 ================================================ FILE: spark-queries-tpcds/q51.sql ================================================ WITH web_v1 AS ( SELECT ws_item_sk item_sk, d_date, sum(sum(ws_sales_price)) OVER (PARTITION BY ws_item_sk ORDER BY d_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) cume_sales FROM web_sales, date_dim WHERE ws_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1200 AND 1200 + 11 AND ws_item_sk IS NOT NULL GROUP BY ws_item_sk, d_date), store_v1 AS ( SELECT ss_item_sk item_sk, d_date, sum(sum(ss_sales_price)) OVER (PARTITION BY ss_item_sk ORDER BY d_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) cume_sales FROM store_sales, date_dim WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1200 AND 1200 + 11 AND ss_item_sk IS NOT NULL GROUP BY ss_item_sk, d_date) SELECT * FROM (SELECT item_sk, d_date, web_sales, store_sales, max(web_sales) OVER (PARTITION BY item_sk ORDER BY d_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) web_cumulative, max(store_sales) OVER (PARTITION BY item_sk ORDER BY d_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) store_cumulative FROM (SELECT CASE WHEN web.item_sk IS NOT NULL THEN web.item_sk ELSE store.item_sk END item_sk, CASE WHEN web.d_date IS NOT NULL THEN web.d_date ELSE store.d_date END d_date, web.cume_sales web_sales, store.cume_sales store_sales FROM web_v1 web FULL OUTER JOIN store_v1 store ON (web.item_sk = store.item_sk AND web.d_date = store.d_date) ) x) y WHERE web_cumulative > store_cumulative ORDER BY item_sk, d_date LIMIT 100 ================================================ FILE: spark-queries-tpcds/q52.sql ================================================ SELECT dt.d_year, item.i_brand_id brand_id, item.i_brand brand, sum(ss_ext_sales_price) ext_price FROM date_dim dt, store_sales, item WHERE dt.d_date_sk = store_sales.ss_sold_date_sk AND store_sales.ss_item_sk = item.i_item_sk AND item.i_manager_id = 1 AND dt.d_moy = 11 AND dt.d_year = 2000 GROUP BY dt.d_year, item.i_brand, item.i_brand_id ORDER BY dt.d_year, ext_price DESC, brand_id LIMIT 100 ================================================ FILE: spark-queries-tpcds/q53.sql ================================================ SELECT * FROM (SELECT i_manufact_id, sum(ss_sales_price) sum_sales, avg(sum(ss_sales_price)) OVER (PARTITION BY i_manufact_id) avg_quarterly_sales FROM item, store_sales, date_dim, store WHERE ss_item_sk = i_item_sk AND ss_sold_date_sk = d_date_sk AND ss_store_sk = s_store_sk AND d_month_seq IN (1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6, 1200 + 7, 1200 + 8, 1200 + 9, 1200 + 10, 1200 + 11) AND ((i_category IN ('Books', 'Children', 'Electronics') AND i_class IN ('personal', 'portable', 'reference', 'self-help') AND i_brand IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) OR (i_category IN ('Women', 'Music', 'Men') AND i_class IN ('accessories', 'classical', 'fragrances', 'pants') AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) GROUP BY i_manufact_id, d_qoy) tmp1 WHERE CASE WHEN avg_quarterly_sales > 0 THEN abs(sum_sales - avg_quarterly_sales) / avg_quarterly_sales ELSE NULL END > 0.1 ORDER BY avg_quarterly_sales, sum_sales, i_manufact_id LIMIT 100 ================================================ FILE: spark-queries-tpcds/q54.sql ================================================ WITH my_customers AS ( SELECT DISTINCT c_customer_sk, c_current_addr_sk FROM (SELECT cs_sold_date_sk sold_date_sk, cs_bill_customer_sk customer_sk, cs_item_sk item_sk FROM catalog_sales UNION ALL SELECT ws_sold_date_sk sold_date_sk, ws_bill_customer_sk customer_sk, ws_item_sk item_sk FROM web_sales ) cs_or_ws_sales, item, date_dim, customer WHERE sold_date_sk = d_date_sk AND item_sk = i_item_sk AND i_category = 'Women' AND i_class = 'maternity' AND c_customer_sk = cs_or_ws_sales.customer_sk AND d_moy = 12 AND d_year = 1998 ) , my_revenue AS ( SELECT c_customer_sk, sum(ss_ext_sales_price) AS revenue FROM my_customers, store_sales, customer_address, store, date_dim WHERE c_current_addr_sk = ca_address_sk AND ca_county = s_county AND ca_state = s_state AND ss_sold_date_sk = d_date_sk AND c_customer_sk = ss_customer_sk AND d_month_seq BETWEEN (SELECT DISTINCT d_month_seq + 1 FROM date_dim WHERE d_year = 1998 AND d_moy = 12) AND (SELECT DISTINCT d_month_seq + 3 FROM date_dim WHERE d_year = 1998 AND d_moy = 12) GROUP BY c_customer_sk ) , segments AS (SELECT cast((revenue / 50) AS INT) AS segment FROM my_revenue) SELECT segment, count(*) AS num_customers, segment * 50 AS segment_base FROM segments GROUP BY segment ORDER BY segment, num_customers LIMIT 100 ================================================ FILE: spark-queries-tpcds/q55.sql ================================================ SELECT i_brand_id brand_id, i_brand brand, sum(ss_ext_sales_price) ext_price FROM date_dim, store_sales, item WHERE d_date_sk = ss_sold_date_sk AND ss_item_sk = i_item_sk AND i_manager_id = 28 AND d_moy = 11 AND d_year = 1999 GROUP BY i_brand, i_brand_id ORDER BY ext_price DESC, brand_id LIMIT 100 ================================================ FILE: spark-queries-tpcds/q56.sql ================================================ WITH ss AS ( SELECT i_item_id, sum(ss_ext_sales_price) total_sales FROM store_sales, date_dim, customer_address, item WHERE i_item_id IN (SELECT i_item_id FROM item WHERE i_color IN ('slate', 'blanched', 'burnished')) AND ss_item_sk = i_item_sk AND ss_sold_date_sk = d_date_sk AND d_year = 2001 AND d_moy = 2 AND ss_addr_sk = ca_address_sk AND ca_gmt_offset = -5 GROUP BY i_item_id), cs AS ( SELECT i_item_id, sum(cs_ext_sales_price) total_sales FROM catalog_sales, date_dim, customer_address, item WHERE i_item_id IN (SELECT i_item_id FROM item WHERE i_color IN ('slate', 'blanched', 'burnished')) AND cs_item_sk = i_item_sk AND cs_sold_date_sk = d_date_sk AND d_year = 2001 AND d_moy = 2 AND cs_bill_addr_sk = ca_address_sk AND ca_gmt_offset = -5 GROUP BY i_item_id), ws AS ( SELECT i_item_id, sum(ws_ext_sales_price) total_sales FROM web_sales, date_dim, customer_address, item WHERE i_item_id IN (SELECT i_item_id FROM item WHERE i_color IN ('slate', 'blanched', 'burnished')) AND ws_item_sk = i_item_sk AND ws_sold_date_sk = d_date_sk AND d_year = 2001 AND d_moy = 2 AND ws_bill_addr_sk = ca_address_sk AND ca_gmt_offset = -5 GROUP BY i_item_id) SELECT i_item_id, sum(total_sales) total_sales FROM (SELECT * FROM ss UNION ALL SELECT * FROM cs UNION ALL SELECT * FROM ws) tmp1 GROUP BY i_item_id ORDER BY total_sales LIMIT 100 ================================================ FILE: spark-queries-tpcds/q57.sql ================================================ WITH v1 AS ( SELECT i_category, i_brand, cc_name, d_year, d_moy, sum(cs_sales_price) sum_sales, avg(sum(cs_sales_price)) OVER (PARTITION BY i_category, i_brand, cc_name, d_year) avg_monthly_sales, rank() OVER (PARTITION BY i_category, i_brand, cc_name ORDER BY d_year, d_moy) rn FROM item, catalog_sales, date_dim, call_center WHERE cs_item_sk = i_item_sk AND cs_sold_date_sk = d_date_sk AND cc_call_center_sk = cs_call_center_sk AND ( d_year = 1999 OR (d_year = 1999 - 1 AND d_moy = 12) OR (d_year = 1999 + 1 AND d_moy = 1) ) GROUP BY i_category, i_brand, cc_name, d_year, d_moy), v2 AS ( SELECT v1.i_category, v1.i_brand, v1.cc_name, v1.d_year, v1.d_moy, v1.avg_monthly_sales, v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum FROM v1, v1 v1_lag, v1 v1_lead WHERE v1.i_category = v1_lag.i_category AND v1.i_category = v1_lead.i_category AND v1.i_brand = v1_lag.i_brand AND v1.i_brand = v1_lead.i_brand AND v1.cc_name = v1_lag.cc_name AND v1.cc_name = v1_lead.cc_name AND v1.rn = v1_lag.rn + 1 AND v1.rn = v1_lead.rn - 1) SELECT * FROM v2 WHERE d_year = 1999 AND avg_monthly_sales > 0 AND CASE WHEN avg_monthly_sales > 0 THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales ELSE NULL END > 0.1 ORDER BY sum_sales - avg_monthly_sales, 3 LIMIT 100 ================================================ FILE: spark-queries-tpcds/q58.sql ================================================ WITH ss_items AS (SELECT i_item_id item_id, sum(ss_ext_sales_price) ss_item_rev FROM store_sales, item, date_dim WHERE ss_item_sk = i_item_sk AND d_date IN (SELECT d_date FROM date_dim WHERE d_week_seq = (SELECT d_week_seq FROM date_dim WHERE d_date = '2000-01-03')) AND ss_sold_date_sk = d_date_sk GROUP BY i_item_id), cs_items AS (SELECT i_item_id item_id, sum(cs_ext_sales_price) cs_item_rev FROM catalog_sales, item, date_dim WHERE cs_item_sk = i_item_sk AND d_date IN (SELECT d_date FROM date_dim WHERE d_week_seq = (SELECT d_week_seq FROM date_dim WHERE d_date = '2000-01-03')) AND cs_sold_date_sk = d_date_sk GROUP BY i_item_id), ws_items AS (SELECT i_item_id item_id, sum(ws_ext_sales_price) ws_item_rev FROM web_sales, item, date_dim WHERE ws_item_sk = i_item_sk AND d_date IN (SELECT d_date FROM date_dim WHERE d_week_seq = (SELECT d_week_seq FROM date_dim WHERE d_date = '2000-01-03')) AND ws_sold_date_sk = d_date_sk GROUP BY i_item_id) SELECT ss_items.item_id, ss_item_rev, ss_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 ss_dev, cs_item_rev, cs_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 cs_dev, ws_item_rev, ws_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 ws_dev, (ss_item_rev + cs_item_rev + ws_item_rev) / 3 average FROM ss_items, cs_items, ws_items WHERE ss_items.item_id = cs_items.item_id AND ss_items.item_id = ws_items.item_id AND ss_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev AND ss_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev AND cs_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev AND cs_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev AND ws_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev AND ws_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev ORDER BY item_id, ss_item_rev LIMIT 100 ================================================ FILE: spark-queries-tpcds/q59.sql ================================================ WITH wss AS (SELECT d_week_seq, ss_store_sk, sum(CASE WHEN (d_day_name = 'Sunday') THEN ss_sales_price ELSE NULL END) sun_sales, sum(CASE WHEN (d_day_name = 'Monday') THEN ss_sales_price ELSE NULL END) mon_sales, sum(CASE WHEN (d_day_name = 'Tuesday') THEN ss_sales_price ELSE NULL END) tue_sales, sum(CASE WHEN (d_day_name = 'Wednesday') THEN ss_sales_price ELSE NULL END) wed_sales, sum(CASE WHEN (d_day_name = 'Thursday') THEN ss_sales_price ELSE NULL END) thu_sales, sum(CASE WHEN (d_day_name = 'Friday') THEN ss_sales_price ELSE NULL END) fri_sales, sum(CASE WHEN (d_day_name = 'Saturday') THEN ss_sales_price ELSE NULL END) sat_sales FROM store_sales, date_dim WHERE d_date_sk = ss_sold_date_sk GROUP BY d_week_seq, ss_store_sk ) SELECT s_store_name1, s_store_id1, d_week_seq1, sun_sales1 / sun_sales2, mon_sales1 / mon_sales2, tue_sales1 / tue_sales2, wed_sales1 / wed_sales2, thu_sales1 / thu_sales2, fri_sales1 / fri_sales2, sat_sales1 / sat_sales2 FROM (SELECT s_store_name s_store_name1, wss.d_week_seq d_week_seq1, s_store_id s_store_id1, sun_sales sun_sales1, mon_sales mon_sales1, tue_sales tue_sales1, wed_sales wed_sales1, thu_sales thu_sales1, fri_sales fri_sales1, sat_sales sat_sales1 FROM wss, store, date_dim d WHERE d.d_week_seq = wss.d_week_seq AND ss_store_sk = s_store_sk AND d_month_seq BETWEEN 1212 AND 1212 + 11) y, (SELECT s_store_name s_store_name2, wss.d_week_seq d_week_seq2, s_store_id s_store_id2, sun_sales sun_sales2, mon_sales mon_sales2, tue_sales tue_sales2, wed_sales wed_sales2, thu_sales thu_sales2, fri_sales fri_sales2, sat_sales sat_sales2 FROM wss, store, date_dim d WHERE d.d_week_seq = wss.d_week_seq AND ss_store_sk = s_store_sk AND d_month_seq BETWEEN 1212 + 12 AND 1212 + 23) x WHERE s_store_id1 = s_store_id2 AND d_week_seq1 = d_week_seq2 - 52 ORDER BY s_store_name1, s_store_id1, d_week_seq1 LIMIT 100 ================================================ FILE: spark-queries-tpcds/q6.sql ================================================ SELECT a.ca_state state, count(*) cnt FROM customer_address a, customer c, store_sales s, date_dim d, item i WHERE a.ca_address_sk = c.c_current_addr_sk AND c.c_customer_sk = s.ss_customer_sk AND s.ss_sold_date_sk = d.d_date_sk AND s.ss_item_sk = i.i_item_sk AND d.d_month_seq = (SELECT DISTINCT (d_month_seq) FROM date_dim WHERE d_year = 2000 AND d_moy = 1) AND i.i_current_price > 1.2 * (SELECT avg(j.i_current_price) FROM item j WHERE j.i_category = i.i_category) GROUP BY a.ca_state HAVING count(*) >= 10 ORDER BY cnt LIMIT 100 ================================================ FILE: spark-queries-tpcds/q60.sql ================================================ WITH ss AS ( SELECT i_item_id, sum(ss_ext_sales_price) total_sales FROM store_sales, date_dim, customer_address, item WHERE i_item_id IN (SELECT i_item_id FROM item WHERE i_category IN ('Music')) AND ss_item_sk = i_item_sk AND ss_sold_date_sk = d_date_sk AND d_year = 1998 AND d_moy = 9 AND ss_addr_sk = ca_address_sk AND ca_gmt_offset = -5 GROUP BY i_item_id), cs AS ( SELECT i_item_id, sum(cs_ext_sales_price) total_sales FROM catalog_sales, date_dim, customer_address, item WHERE i_item_id IN (SELECT i_item_id FROM item WHERE i_category IN ('Music')) AND cs_item_sk = i_item_sk AND cs_sold_date_sk = d_date_sk AND d_year = 1998 AND d_moy = 9 AND cs_bill_addr_sk = ca_address_sk AND ca_gmt_offset = -5 GROUP BY i_item_id), ws AS ( SELECT i_item_id, sum(ws_ext_sales_price) total_sales FROM web_sales, date_dim, customer_address, item WHERE i_item_id IN (SELECT i_item_id FROM item WHERE i_category IN ('Music')) AND ws_item_sk = i_item_sk AND ws_sold_date_sk = d_date_sk AND d_year = 1998 AND d_moy = 9 AND ws_bill_addr_sk = ca_address_sk AND ca_gmt_offset = -5 GROUP BY i_item_id) SELECT i_item_id, sum(total_sales) total_sales FROM (SELECT * FROM ss UNION ALL SELECT * FROM cs UNION ALL SELECT * FROM ws) tmp1 GROUP BY i_item_id ORDER BY i_item_id, total_sales LIMIT 100 ================================================ FILE: spark-queries-tpcds/q61.sql ================================================ SELECT promotions, total, cast(promotions AS DECIMAL(15, 4)) / cast(total AS DECIMAL(15, 4)) * 100 FROM (SELECT sum(ss_ext_sales_price) promotions FROM store_sales, store, promotion, date_dim, customer, customer_address, item WHERE ss_sold_date_sk = d_date_sk AND ss_store_sk = s_store_sk AND ss_promo_sk = p_promo_sk AND ss_customer_sk = c_customer_sk AND ca_address_sk = c_current_addr_sk AND ss_item_sk = i_item_sk AND ca_gmt_offset = -5 AND i_category = 'Jewelry' AND (p_channel_dmail = 'Y' OR p_channel_email = 'Y' OR p_channel_tv = 'Y') AND s_gmt_offset = -5 AND d_year = 1998 AND d_moy = 11) promotional_sales, (SELECT sum(ss_ext_sales_price) total FROM store_sales, store, date_dim, customer, customer_address, item WHERE ss_sold_date_sk = d_date_sk AND ss_store_sk = s_store_sk AND ss_customer_sk = c_customer_sk AND ca_address_sk = c_current_addr_sk AND ss_item_sk = i_item_sk AND ca_gmt_offset = -5 AND i_category = 'Jewelry' AND s_gmt_offset = -5 AND d_year = 1998 AND d_moy = 11) all_sales ORDER BY promotions, total LIMIT 100 ================================================ FILE: spark-queries-tpcds/q62.sql ================================================ SELECT substr(w_warehouse_name, 1, 20), sm_type, web_name, sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk <= 30) THEN 1 ELSE 0 END) AS `30 days `, sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 30) AND (ws_ship_date_sk - ws_sold_date_sk <= 60) THEN 1 ELSE 0 END) AS `31 - 60 days `, sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 60) AND (ws_ship_date_sk - ws_sold_date_sk <= 90) THEN 1 ELSE 0 END) AS `61 - 90 days `, sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 90) AND (ws_ship_date_sk - ws_sold_date_sk <= 120) THEN 1 ELSE 0 END) AS `91 - 120 days `, sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 120) THEN 1 ELSE 0 END) AS `>120 days ` FROM web_sales, warehouse, ship_mode, web_site, date_dim WHERE d_month_seq BETWEEN 1200 AND 1200 + 11 AND ws_ship_date_sk = d_date_sk AND ws_warehouse_sk = w_warehouse_sk AND ws_ship_mode_sk = sm_ship_mode_sk AND ws_web_site_sk = web_site_sk GROUP BY substr(w_warehouse_name, 1, 20), sm_type, web_name ORDER BY substr(w_warehouse_name, 1, 20), sm_type, web_name LIMIT 100 ================================================ FILE: spark-queries-tpcds/q63.sql ================================================ SELECT * FROM (SELECT i_manager_id, sum(ss_sales_price) sum_sales, avg(sum(ss_sales_price)) OVER (PARTITION BY i_manager_id) avg_monthly_sales FROM item , store_sales , date_dim , store WHERE ss_item_sk = i_item_sk AND ss_sold_date_sk = d_date_sk AND ss_store_sk = s_store_sk AND d_month_seq IN (1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6, 1200 + 7, 1200 + 8, 1200 + 9, 1200 + 10, 1200 + 11) AND ((i_category IN ('Books', 'Children', 'Electronics') AND i_class IN ('personal', 'portable', 'refernece', 'self-help') AND i_brand IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) OR (i_category IN ('Women', 'Music', 'Men') AND i_class IN ('accessories', 'classical', 'fragrances', 'pants') AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) GROUP BY i_manager_id, d_moy) tmp1 WHERE CASE WHEN avg_monthly_sales > 0 THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales ELSE NULL END > 0.1 ORDER BY i_manager_id , avg_monthly_sales , sum_sales LIMIT 100 ================================================ FILE: spark-queries-tpcds/q64.sql ================================================ WITH cs_ui AS (SELECT cs_item_sk, sum(cs_ext_list_price) AS sale, sum(cr_refunded_cash + cr_reversed_charge + cr_store_credit) AS refund FROM catalog_sales , catalog_returns WHERE cs_item_sk = cr_item_sk AND cs_order_number = cr_order_number GROUP BY cs_item_sk HAVING sum(cs_ext_list_price) > 2 * sum(cr_refunded_cash + cr_reversed_charge + cr_store_credit)), cross_sales AS (SELECT i_product_name product_name, i_item_sk item_sk, s_store_name store_name, s_zip store_zip, ad1.ca_street_number b_street_number, ad1.ca_street_name b_streen_name, ad1.ca_city b_city, ad1.ca_zip b_zip, ad2.ca_street_number c_street_number, ad2.ca_street_name c_street_name, ad2.ca_city c_city, ad2.ca_zip c_zip, d1.d_year AS syear, d2.d_year AS fsyear, d3.d_year s2year, count(*) cnt, sum(ss_wholesale_cost) s1, sum(ss_list_price) s2, sum(ss_coupon_amt) s3 FROM store_sales, store_returns, cs_ui, date_dim d1, date_dim d2, date_dim d3, store, customer, customer_demographics cd1, customer_demographics cd2, promotion, household_demographics hd1, household_demographics hd2, customer_address ad1, customer_address ad2, income_band ib1, income_band ib2, item WHERE ss_store_sk = s_store_sk AND ss_sold_date_sk = d1.d_date_sk AND ss_customer_sk = c_customer_sk AND ss_cdemo_sk = cd1.cd_demo_sk AND ss_hdemo_sk = hd1.hd_demo_sk AND ss_addr_sk = ad1.ca_address_sk AND ss_item_sk = i_item_sk AND ss_item_sk = sr_item_sk AND ss_ticket_number = sr_ticket_number AND ss_item_sk = cs_ui.cs_item_sk AND c_current_cdemo_sk = cd2.cd_demo_sk AND c_current_hdemo_sk = hd2.hd_demo_sk AND c_current_addr_sk = ad2.ca_address_sk AND c_first_sales_date_sk = d2.d_date_sk AND c_first_shipto_date_sk = d3.d_date_sk AND ss_promo_sk = p_promo_sk AND hd1.hd_income_band_sk = ib1.ib_income_band_sk AND hd2.hd_income_band_sk = ib2.ib_income_band_sk AND cd1.cd_marital_status <> cd2.cd_marital_status AND i_color IN ('purple', 'burlywood', 'indian', 'spring', 'floral', 'medium') AND i_current_price BETWEEN 64 AND 64 + 10 AND i_current_price BETWEEN 64 + 1 AND 64 + 15 GROUP BY i_product_name, i_item_sk, s_store_name, s_zip, ad1.ca_street_number, ad1.ca_street_name, ad1.ca_city, ad1.ca_zip, ad2.ca_street_number, ad2.ca_street_name, ad2.ca_city, ad2.ca_zip, d1.d_year, d2.d_year, d3.d_year ) SELECT cs1.product_name, cs1.store_name, cs1.store_zip, cs1.b_street_number, cs1.b_streen_name, cs1.b_city, cs1.b_zip, cs1.c_street_number, cs1.c_street_name, cs1.c_city, cs1.c_zip, cs1.syear, cs1.cnt, cs1.s1, cs1.s2, cs1.s3, cs2.s1, cs2.s2, cs2.s3, cs2.syear, cs2.cnt FROM cross_sales cs1, cross_sales cs2 WHERE cs1.item_sk = cs2.item_sk AND cs1.syear = 1999 AND cs2.syear = 1999 + 1 AND cs2.cnt <= cs1.cnt AND cs1.store_name = cs2.store_name AND cs1.store_zip = cs2.store_zip ORDER BY cs1.product_name, cs1.store_name, cs2.cnt ================================================ FILE: spark-queries-tpcds/q65.sql ================================================ SELECT s_store_name, i_item_desc, sc.revenue, i_current_price, i_wholesale_cost, i_brand FROM store, item, (SELECT ss_store_sk, avg(revenue) AS ave FROM (SELECT ss_store_sk, ss_item_sk, sum(ss_sales_price) AS revenue FROM store_sales, date_dim WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1176 AND 1176 + 11 GROUP BY ss_store_sk, ss_item_sk) sa GROUP BY ss_store_sk) sb, (SELECT ss_store_sk, ss_item_sk, sum(ss_sales_price) AS revenue FROM store_sales, date_dim WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1176 AND 1176 + 11 GROUP BY ss_store_sk, ss_item_sk) sc WHERE sb.ss_store_sk = sc.ss_store_sk AND sc.revenue <= 0.1 * sb.ave AND s_store_sk = sc.ss_store_sk AND i_item_sk = sc.ss_item_sk ORDER BY s_store_name, i_item_desc LIMIT 100 ================================================ FILE: spark-queries-tpcds/q66.sql ================================================ SELECT w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, ship_carriers, year, sum(jan_sales) AS jan_sales, sum(feb_sales) AS feb_sales, sum(mar_sales) AS mar_sales, sum(apr_sales) AS apr_sales, sum(may_sales) AS may_sales, sum(jun_sales) AS jun_sales, sum(jul_sales) AS jul_sales, sum(aug_sales) AS aug_sales, sum(sep_sales) AS sep_sales, sum(oct_sales) AS oct_sales, sum(nov_sales) AS nov_sales, sum(dec_sales) AS dec_sales, sum(jan_sales / w_warehouse_sq_ft) AS jan_sales_per_sq_foot, sum(feb_sales / w_warehouse_sq_ft) AS feb_sales_per_sq_foot, sum(mar_sales / w_warehouse_sq_ft) AS mar_sales_per_sq_foot, sum(apr_sales / w_warehouse_sq_ft) AS apr_sales_per_sq_foot, sum(may_sales / w_warehouse_sq_ft) AS may_sales_per_sq_foot, sum(jun_sales / w_warehouse_sq_ft) AS jun_sales_per_sq_foot, sum(jul_sales / w_warehouse_sq_ft) AS jul_sales_per_sq_foot, sum(aug_sales / w_warehouse_sq_ft) AS aug_sales_per_sq_foot, sum(sep_sales / w_warehouse_sq_ft) AS sep_sales_per_sq_foot, sum(oct_sales / w_warehouse_sq_ft) AS oct_sales_per_sq_foot, sum(nov_sales / w_warehouse_sq_ft) AS nov_sales_per_sq_foot, sum(dec_sales / w_warehouse_sq_ft) AS dec_sales_per_sq_foot, sum(jan_net) AS jan_net, sum(feb_net) AS feb_net, sum(mar_net) AS mar_net, sum(apr_net) AS apr_net, sum(may_net) AS may_net, sum(jun_net) AS jun_net, sum(jul_net) AS jul_net, sum(aug_net) AS aug_net, sum(sep_net) AS sep_net, sum(oct_net) AS oct_net, sum(nov_net) AS nov_net, sum(dec_net) AS dec_net FROM ( (SELECT w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, concat('DHL', ',', 'BARIAN') AS ship_carriers, d_year AS year, sum(CASE WHEN d_moy = 1 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS jan_sales, sum(CASE WHEN d_moy = 2 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS feb_sales, sum(CASE WHEN d_moy = 3 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS mar_sales, sum(CASE WHEN d_moy = 4 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS apr_sales, sum(CASE WHEN d_moy = 5 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS may_sales, sum(CASE WHEN d_moy = 6 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS jun_sales, sum(CASE WHEN d_moy = 7 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS jul_sales, sum(CASE WHEN d_moy = 8 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS aug_sales, sum(CASE WHEN d_moy = 9 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS sep_sales, sum(CASE WHEN d_moy = 10 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS oct_sales, sum(CASE WHEN d_moy = 11 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS nov_sales, sum(CASE WHEN d_moy = 12 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS dec_sales, sum(CASE WHEN d_moy = 1 THEN ws_net_paid * ws_quantity ELSE 0 END) AS jan_net, sum(CASE WHEN d_moy = 2 THEN ws_net_paid * ws_quantity ELSE 0 END) AS feb_net, sum(CASE WHEN d_moy = 3 THEN ws_net_paid * ws_quantity ELSE 0 END) AS mar_net, sum(CASE WHEN d_moy = 4 THEN ws_net_paid * ws_quantity ELSE 0 END) AS apr_net, sum(CASE WHEN d_moy = 5 THEN ws_net_paid * ws_quantity ELSE 0 END) AS may_net, sum(CASE WHEN d_moy = 6 THEN ws_net_paid * ws_quantity ELSE 0 END) AS jun_net, sum(CASE WHEN d_moy = 7 THEN ws_net_paid * ws_quantity ELSE 0 END) AS jul_net, sum(CASE WHEN d_moy = 8 THEN ws_net_paid * ws_quantity ELSE 0 END) AS aug_net, sum(CASE WHEN d_moy = 9 THEN ws_net_paid * ws_quantity ELSE 0 END) AS sep_net, sum(CASE WHEN d_moy = 10 THEN ws_net_paid * ws_quantity ELSE 0 END) AS oct_net, sum(CASE WHEN d_moy = 11 THEN ws_net_paid * ws_quantity ELSE 0 END) AS nov_net, sum(CASE WHEN d_moy = 12 THEN ws_net_paid * ws_quantity ELSE 0 END) AS dec_net FROM web_sales, warehouse, date_dim, time_dim, ship_mode WHERE ws_warehouse_sk = w_warehouse_sk AND ws_sold_date_sk = d_date_sk AND ws_sold_time_sk = t_time_sk AND ws_ship_mode_sk = sm_ship_mode_sk AND d_year = 2001 AND t_time BETWEEN 30838 AND 30838 + 28800 AND sm_carrier IN ('DHL', 'BARIAN') GROUP BY w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, d_year) UNION ALL (SELECT w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, concat('DHL', ',', 'BARIAN') AS ship_carriers, d_year AS year, sum(CASE WHEN d_moy = 1 THEN cs_sales_price * cs_quantity ELSE 0 END) AS jan_sales, sum(CASE WHEN d_moy = 2 THEN cs_sales_price * cs_quantity ELSE 0 END) AS feb_sales, sum(CASE WHEN d_moy = 3 THEN cs_sales_price * cs_quantity ELSE 0 END) AS mar_sales, sum(CASE WHEN d_moy = 4 THEN cs_sales_price * cs_quantity ELSE 0 END) AS apr_sales, sum(CASE WHEN d_moy = 5 THEN cs_sales_price * cs_quantity ELSE 0 END) AS may_sales, sum(CASE WHEN d_moy = 6 THEN cs_sales_price * cs_quantity ELSE 0 END) AS jun_sales, sum(CASE WHEN d_moy = 7 THEN cs_sales_price * cs_quantity ELSE 0 END) AS jul_sales, sum(CASE WHEN d_moy = 8 THEN cs_sales_price * cs_quantity ELSE 0 END) AS aug_sales, sum(CASE WHEN d_moy = 9 THEN cs_sales_price * cs_quantity ELSE 0 END) AS sep_sales, sum(CASE WHEN d_moy = 10 THEN cs_sales_price * cs_quantity ELSE 0 END) AS oct_sales, sum(CASE WHEN d_moy = 11 THEN cs_sales_price * cs_quantity ELSE 0 END) AS nov_sales, sum(CASE WHEN d_moy = 12 THEN cs_sales_price * cs_quantity ELSE 0 END) AS dec_sales, sum(CASE WHEN d_moy = 1 THEN cs_net_paid_inc_tax * cs_quantity ELSE 0 END) AS jan_net, sum(CASE WHEN d_moy = 2 THEN cs_net_paid_inc_tax * cs_quantity ELSE 0 END) AS feb_net, sum(CASE WHEN d_moy = 3 THEN cs_net_paid_inc_tax * cs_quantity ELSE 0 END) AS mar_net, sum(CASE WHEN d_moy = 4 THEN cs_net_paid_inc_tax * cs_quantity ELSE 0 END) AS apr_net, sum(CASE WHEN d_moy = 5 THEN cs_net_paid_inc_tax * cs_quantity ELSE 0 END) AS may_net, sum(CASE WHEN d_moy = 6 THEN cs_net_paid_inc_tax * cs_quantity ELSE 0 END) AS jun_net, sum(CASE WHEN d_moy = 7 THEN cs_net_paid_inc_tax * cs_quantity ELSE 0 END) AS jul_net, sum(CASE WHEN d_moy = 8 THEN cs_net_paid_inc_tax * cs_quantity ELSE 0 END) AS aug_net, sum(CASE WHEN d_moy = 9 THEN cs_net_paid_inc_tax * cs_quantity ELSE 0 END) AS sep_net, sum(CASE WHEN d_moy = 10 THEN cs_net_paid_inc_tax * cs_quantity ELSE 0 END) AS oct_net, sum(CASE WHEN d_moy = 11 THEN cs_net_paid_inc_tax * cs_quantity ELSE 0 END) AS nov_net, sum(CASE WHEN d_moy = 12 THEN cs_net_paid_inc_tax * cs_quantity ELSE 0 END) AS dec_net FROM catalog_sales, warehouse, date_dim, time_dim, ship_mode WHERE cs_warehouse_sk = w_warehouse_sk AND cs_sold_date_sk = d_date_sk AND cs_sold_time_sk = t_time_sk AND cs_ship_mode_sk = sm_ship_mode_sk AND d_year = 2001 AND t_time BETWEEN 30838 AND 30838 + 28800 AND sm_carrier IN ('DHL', 'BARIAN') GROUP BY w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, d_year ) ) x GROUP BY w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, ship_carriers, year ORDER BY w_warehouse_name LIMIT 100 ================================================ FILE: spark-queries-tpcds/q67.sql ================================================ SELECT * FROM (SELECT i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy, s_store_id, sumsales, rank() OVER (PARTITION BY i_category ORDER BY sumsales DESC) rk FROM (SELECT i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy, s_store_id, sum(coalesce(ss_sales_price * ss_quantity, 0)) sumsales FROM store_sales, date_dim, store, item WHERE ss_sold_date_sk = d_date_sk AND ss_item_sk = i_item_sk AND ss_store_sk = s_store_sk AND d_month_seq BETWEEN 1200 AND 1200 + 11 GROUP BY ROLLUP (i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy, s_store_id)) dw1) dw2 WHERE rk <= 100 ORDER BY i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy, s_store_id, sumsales, rk LIMIT 100 ================================================ FILE: spark-queries-tpcds/q68.sql ================================================ SELECT c_last_name, c_first_name, ca_city, bought_city, ss_ticket_number, extended_price, extended_tax, list_price FROM (SELECT ss_ticket_number, ss_customer_sk, ca_city bought_city, sum(ss_ext_sales_price) extended_price, sum(ss_ext_list_price) list_price, sum(ss_ext_tax) extended_tax FROM store_sales, date_dim, store, household_demographics, customer_address WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk AND store_sales.ss_store_sk = store.s_store_sk AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk AND store_sales.ss_addr_sk = customer_address.ca_address_sk AND date_dim.d_dom BETWEEN 1 AND 2 AND (household_demographics.hd_dep_count = 4 OR household_demographics.hd_vehicle_count = 3) AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) AND store.s_city IN ('Midway', 'Fairview') GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, ca_city) dn, customer, customer_address current_addr WHERE ss_customer_sk = c_customer_sk AND customer.c_current_addr_sk = current_addr.ca_address_sk AND current_addr.ca_city <> bought_city ORDER BY c_last_name, ss_ticket_number LIMIT 100 ================================================ FILE: spark-queries-tpcds/q69.sql ================================================ SELECT cd_gender, cd_marital_status, cd_education_status, count(*) cnt1, cd_purchase_estimate, count(*) cnt2, cd_credit_rating, count(*) cnt3 FROM customer c, customer_address ca, customer_demographics WHERE c.c_current_addr_sk = ca.ca_address_sk AND ca_state IN ('KY', 'GA', 'NM') AND cd_demo_sk = c.c_current_cdemo_sk AND exists(SELECT * FROM store_sales, date_dim WHERE c.c_customer_sk = ss_customer_sk AND ss_sold_date_sk = d_date_sk AND d_year = 2001 AND d_moy BETWEEN 4 AND 4 + 2) AND (NOT exists(SELECT * FROM web_sales, date_dim WHERE c.c_customer_sk = ws_bill_customer_sk AND ws_sold_date_sk = d_date_sk AND d_year = 2001 AND d_moy BETWEEN 4 AND 4 + 2) AND NOT exists(SELECT * FROM catalog_sales, date_dim WHERE c.c_customer_sk = cs_ship_customer_sk AND cs_sold_date_sk = d_date_sk AND d_year = 2001 AND d_moy BETWEEN 4 AND 4 + 2)) GROUP BY cd_gender, cd_marital_status, cd_education_status, cd_purchase_estimate, cd_credit_rating ORDER BY cd_gender, cd_marital_status, cd_education_status, cd_purchase_estimate, cd_credit_rating LIMIT 100 ================================================ FILE: spark-queries-tpcds/q7.sql ================================================ SELECT i_item_id, avg(ss_quantity) agg1, avg(ss_list_price) agg2, avg(ss_coupon_amt) agg3, avg(ss_sales_price) agg4 FROM store_sales, customer_demographics, date_dim, item, promotion WHERE ss_sold_date_sk = d_date_sk AND ss_item_sk = i_item_sk AND ss_cdemo_sk = cd_demo_sk AND ss_promo_sk = p_promo_sk AND cd_gender = 'M' AND cd_marital_status = 'S' AND cd_education_status = 'College' AND (p_channel_email = 'N' OR p_channel_event = 'N') AND d_year = 2000 GROUP BY i_item_id ORDER BY i_item_id LIMIT 100 ================================================ FILE: spark-queries-tpcds/q70.sql ================================================ SELECT sum(ss_net_profit) AS total_sum, s_state, s_county, grouping(s_state) + grouping(s_county) AS lochierarchy, rank() OVER ( PARTITION BY grouping(s_state) + grouping(s_county), CASE WHEN grouping(s_county) = 0 THEN s_state END ORDER BY sum(ss_net_profit) DESC) AS rank_within_parent FROM store_sales, date_dim d1, store WHERE d1.d_month_seq BETWEEN 1200 AND 1200 + 11 AND d1.d_date_sk = ss_sold_date_sk AND s_store_sk = ss_store_sk AND s_state IN (SELECT s_state FROM (SELECT s_state AS s_state, rank() OVER (PARTITION BY s_state ORDER BY sum(ss_net_profit) DESC) AS ranking FROM store_sales, store, date_dim WHERE d_month_seq BETWEEN 1200 AND 1200 + 11 AND d_date_sk = ss_sold_date_sk AND s_store_sk = ss_store_sk GROUP BY s_state) tmp1 WHERE ranking <= 5) GROUP BY ROLLUP (s_state, s_county) ORDER BY lochierarchy DESC , CASE WHEN lochierarchy = 0 THEN s_state END , rank_within_parent LIMIT 100 ================================================ FILE: spark-queries-tpcds/q71.sql ================================================ SELECT i_brand_id brand_id, i_brand brand, t_hour, t_minute, sum(ext_price) ext_price FROM item, (SELECT ws_ext_sales_price AS ext_price, ws_sold_date_sk AS sold_date_sk, ws_item_sk AS sold_item_sk, ws_sold_time_sk AS time_sk FROM web_sales, date_dim WHERE d_date_sk = ws_sold_date_sk AND d_moy = 11 AND d_year = 1999 UNION ALL SELECT cs_ext_sales_price AS ext_price, cs_sold_date_sk AS sold_date_sk, cs_item_sk AS sold_item_sk, cs_sold_time_sk AS time_sk FROM catalog_sales, date_dim WHERE d_date_sk = cs_sold_date_sk AND d_moy = 11 AND d_year = 1999 UNION ALL SELECT ss_ext_sales_price AS ext_price, ss_sold_date_sk AS sold_date_sk, ss_item_sk AS sold_item_sk, ss_sold_time_sk AS time_sk FROM store_sales, date_dim WHERE d_date_sk = ss_sold_date_sk AND d_moy = 11 AND d_year = 1999 ) AS tmp, time_dim WHERE sold_item_sk = i_item_sk AND i_manager_id = 1 AND time_sk = t_time_sk AND (t_meal_time = 'breakfast' OR t_meal_time = 'dinner') GROUP BY i_brand, i_brand_id, t_hour, t_minute ORDER BY ext_price DESC, brand_id ================================================ FILE: spark-queries-tpcds/q72.sql ================================================ SELECT i_item_desc, w_warehouse_name, d1.d_week_seq, count(CASE WHEN p_promo_sk IS NULL THEN 1 ELSE 0 END) no_promo, count(CASE WHEN p_promo_sk IS NOT NULL THEN 1 ELSE 0 END) promo, count(*) total_cnt FROM catalog_sales JOIN inventory ON (cs_item_sk = inv_item_sk) JOIN warehouse ON (w_warehouse_sk = inv_warehouse_sk) JOIN item ON (i_item_sk = cs_item_sk) JOIN customer_demographics ON (cs_bill_cdemo_sk = cd_demo_sk) JOIN household_demographics ON (cs_bill_hdemo_sk = hd_demo_sk) JOIN date_dim d1 ON (cs_sold_date_sk = d1.d_date_sk) JOIN date_dim d2 ON (inv_date_sk = d2.d_date_sk) JOIN date_dim d3 ON (cs_ship_date_sk = d3.d_date_sk) LEFT OUTER JOIN promotion ON (cs_promo_sk = p_promo_sk) LEFT OUTER JOIN catalog_returns ON (cr_item_sk = cs_item_sk AND cr_order_number = cs_order_number) WHERE d1.d_week_seq = d2.d_week_seq AND inv_quantity_on_hand < cs_quantity AND d3.d_date > (cast(d1.d_date AS DATE) + interval 5 days) AND hd_buy_potential = '>10000' AND d1.d_year = 1999 AND hd_buy_potential = '>10000' AND cd_marital_status = 'D' AND d1.d_year = 1999 GROUP BY i_item_desc, w_warehouse_name, d1.d_week_seq ORDER BY total_cnt DESC, i_item_desc, w_warehouse_name, d_week_seq LIMIT 100 ================================================ FILE: spark-queries-tpcds/q73.sql ================================================ SELECT c_last_name, c_first_name, c_salutation, c_preferred_cust_flag, ss_ticket_number, cnt FROM (SELECT ss_ticket_number, ss_customer_sk, count(*) cnt FROM store_sales, date_dim, store, household_demographics WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk AND store_sales.ss_store_sk = store.s_store_sk AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk AND date_dim.d_dom BETWEEN 1 AND 2 AND (household_demographics.hd_buy_potential = '>10000' OR household_demographics.hd_buy_potential = 'unknown') AND household_demographics.hd_vehicle_count > 0 AND CASE WHEN household_demographics.hd_vehicle_count > 0 THEN household_demographics.hd_dep_count / household_demographics.hd_vehicle_count ELSE NULL END > 1 AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) AND store.s_county IN ('Williamson County', 'Franklin Parish', 'Bronx County', 'Orange County') GROUP BY ss_ticket_number, ss_customer_sk) dj, customer WHERE ss_customer_sk = c_customer_sk AND cnt BETWEEN 1 AND 5 ORDER BY cnt DESC ================================================ FILE: spark-queries-tpcds/q74.sql ================================================ WITH year_total AS ( SELECT c_customer_id customer_id, c_first_name customer_first_name, c_last_name customer_last_name, d_year AS year, sum(ss_net_paid) year_total, 's' sale_type FROM customer, store_sales, date_dim WHERE c_customer_sk = ss_customer_sk AND ss_sold_date_sk = d_date_sk AND d_year IN (2001, 2001 + 1) GROUP BY c_customer_id, c_first_name, c_last_name, d_year UNION ALL SELECT c_customer_id customer_id, c_first_name customer_first_name, c_last_name customer_last_name, d_year AS year, sum(ws_net_paid) year_total, 'w' sale_type FROM customer, web_sales, date_dim WHERE c_customer_sk = ws_bill_customer_sk AND ws_sold_date_sk = d_date_sk AND d_year IN (2001, 2001 + 1) GROUP BY c_customer_id, c_first_name, c_last_name, d_year) SELECT t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name FROM year_total t_s_firstyear, year_total t_s_secyear, year_total t_w_firstyear, year_total t_w_secyear WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id AND t_s_firstyear.customer_id = t_w_secyear.customer_id AND t_s_firstyear.customer_id = t_w_firstyear.customer_id AND t_s_firstyear.sale_type = 's' AND t_w_firstyear.sale_type = 'w' AND t_s_secyear.sale_type = 's' AND t_w_secyear.sale_type = 'w' AND t_s_firstyear.year = 2001 AND t_s_secyear.year = 2001 + 1 AND t_w_firstyear.year = 2001 AND t_w_secyear.year = 2001 + 1 AND t_s_firstyear.year_total > 0 AND t_w_firstyear.year_total > 0 AND CASE WHEN t_w_firstyear.year_total > 0 THEN t_w_secyear.year_total / t_w_firstyear.year_total ELSE NULL END > CASE WHEN t_s_firstyear.year_total > 0 THEN t_s_secyear.year_total / t_s_firstyear.year_total ELSE NULL END ORDER BY 1, 1, 1 LIMIT 100 ================================================ FILE: spark-queries-tpcds/q75.sql ================================================ WITH all_sales AS ( SELECT d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id, SUM(sales_cnt) AS sales_cnt, SUM(sales_amt) AS sales_amt FROM ( SELECT d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id, cs_quantity - COALESCE(cr_return_quantity, 0) AS sales_cnt, cs_ext_sales_price - COALESCE(cr_return_amount, 0.0) AS sales_amt FROM catalog_sales JOIN item ON i_item_sk = cs_item_sk JOIN date_dim ON d_date_sk = cs_sold_date_sk LEFT JOIN catalog_returns ON (cs_order_number = cr_order_number AND cs_item_sk = cr_item_sk) WHERE i_category = 'Books' UNION SELECT d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id, ss_quantity - COALESCE(sr_return_quantity, 0) AS sales_cnt, ss_ext_sales_price - COALESCE(sr_return_amt, 0.0) AS sales_amt FROM store_sales JOIN item ON i_item_sk = ss_item_sk JOIN date_dim ON d_date_sk = ss_sold_date_sk LEFT JOIN store_returns ON (ss_ticket_number = sr_ticket_number AND ss_item_sk = sr_item_sk) WHERE i_category = 'Books' UNION SELECT d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id, ws_quantity - COALESCE(wr_return_quantity, 0) AS sales_cnt, ws_ext_sales_price - COALESCE(wr_return_amt, 0.0) AS sales_amt FROM web_sales JOIN item ON i_item_sk = ws_item_sk JOIN date_dim ON d_date_sk = ws_sold_date_sk LEFT JOIN web_returns ON (ws_order_number = wr_order_number AND ws_item_sk = wr_item_sk) WHERE i_category = 'Books') sales_detail GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) SELECT prev_yr.d_year AS prev_year, curr_yr.d_year AS year, curr_yr.i_brand_id, curr_yr.i_class_id, curr_yr.i_category_id, curr_yr.i_manufact_id, prev_yr.sales_cnt AS prev_yr_cnt, curr_yr.sales_cnt AS curr_yr_cnt, curr_yr.sales_cnt - prev_yr.sales_cnt AS sales_cnt_diff, curr_yr.sales_amt - prev_yr.sales_amt AS sales_amt_diff FROM all_sales curr_yr, all_sales prev_yr WHERE curr_yr.i_brand_id = prev_yr.i_brand_id AND curr_yr.i_class_id = prev_yr.i_class_id AND curr_yr.i_category_id = prev_yr.i_category_id AND curr_yr.i_manufact_id = prev_yr.i_manufact_id AND curr_yr.d_year = 2002 AND prev_yr.d_year = 2002 - 1 AND CAST(curr_yr.sales_cnt AS DECIMAL(17, 2)) / CAST(prev_yr.sales_cnt AS DECIMAL(17, 2)) < 0.9 ORDER BY sales_cnt_diff LIMIT 100 ================================================ FILE: spark-queries-tpcds/q76.sql ================================================ SELECT channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( SELECT 'store' AS channel, ss_store_sk col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price FROM store_sales, item, date_dim WHERE ss_store_sk IS NULL AND ss_sold_date_sk = d_date_sk AND ss_item_sk = i_item_sk UNION ALL SELECT 'web' AS channel, ws_ship_customer_sk col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price FROM web_sales, item, date_dim WHERE ws_ship_customer_sk IS NULL AND ws_sold_date_sk = d_date_sk AND ws_item_sk = i_item_sk UNION ALL SELECT 'catalog' AS channel, cs_ship_addr_sk col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price FROM catalog_sales, item, date_dim WHERE cs_ship_addr_sk IS NULL AND cs_sold_date_sk = d_date_sk AND cs_item_sk = i_item_sk) foo GROUP BY channel, col_name, d_year, d_qoy, i_category ORDER BY channel, col_name, d_year, d_qoy, i_category LIMIT 100 ================================================ FILE: spark-queries-tpcds/q77.sql ================================================ WITH ss AS (SELECT s_store_sk, sum(ss_ext_sales_price) AS sales, sum(ss_net_profit) AS profit FROM store_sales, date_dim, store WHERE ss_sold_date_sk = d_date_sk AND d_date BETWEEN cast('2000-08-03' AS DATE) AND (cast('2000-08-03' AS DATE) + INTERVAL 30 days) AND ss_store_sk = s_store_sk GROUP BY s_store_sk), sr AS (SELECT s_store_sk, sum(sr_return_amt) AS returns, sum(sr_net_loss) AS profit_loss FROM store_returns, date_dim, store WHERE sr_returned_date_sk = d_date_sk AND d_date BETWEEN cast('2000-08-03' AS DATE) AND (cast('2000-08-03' AS DATE) + INTERVAL 30 days) AND sr_store_sk = s_store_sk GROUP BY s_store_sk), cs AS (SELECT cs_call_center_sk, sum(cs_ext_sales_price) AS sales, sum(cs_net_profit) AS profit FROM catalog_sales, date_dim WHERE cs_sold_date_sk = d_date_sk AND d_date BETWEEN cast('2000-08-03' AS DATE) AND (cast('2000-08-03' AS DATE) + INTERVAL 30 days) GROUP BY cs_call_center_sk), cr AS (SELECT sum(cr_return_amount) AS returns, sum(cr_net_loss) AS profit_loss FROM catalog_returns, date_dim WHERE cr_returned_date_sk = d_date_sk AND d_date BETWEEN cast('2000-08-03' AS DATE) AND (cast('2000-08-03' AS DATE) + INTERVAL 30 days)), ws AS (SELECT wp_web_page_sk, sum(ws_ext_sales_price) AS sales, sum(ws_net_profit) AS profit FROM web_sales, date_dim, web_page WHERE ws_sold_date_sk = d_date_sk AND d_date BETWEEN cast('2000-08-03' AS DATE) AND (cast('2000-08-03' AS DATE) + INTERVAL 30 days) AND ws_web_page_sk = wp_web_page_sk GROUP BY wp_web_page_sk), wr AS (SELECT wp_web_page_sk, sum(wr_return_amt) AS returns, sum(wr_net_loss) AS profit_loss FROM web_returns, date_dim, web_page WHERE wr_returned_date_sk = d_date_sk AND d_date BETWEEN cast('2000-08-03' AS DATE) AND (cast('2000-08-03' AS DATE) + INTERVAL 30 days) AND wr_web_page_sk = wp_web_page_sk GROUP BY wp_web_page_sk) SELECT channel, id, sum(sales) AS sales, sum(returns) AS returns, sum(profit) AS profit FROM (SELECT 'store channel' AS channel, ss.s_store_sk AS id, sales, coalesce(returns, 0) AS returns, (profit - coalesce(profit_loss, 0)) AS profit FROM ss LEFT JOIN sr ON ss.s_store_sk = sr.s_store_sk UNION ALL SELECT 'catalog channel' AS channel, cs_call_center_sk AS id, sales, returns, (profit - profit_loss) AS profit FROM cs, cr UNION ALL SELECT 'web channel' AS channel, ws.wp_web_page_sk AS id, sales, coalesce(returns, 0) returns, (profit - coalesce(profit_loss, 0)) AS profit FROM ws LEFT JOIN wr ON ws.wp_web_page_sk = wr.wp_web_page_sk ) x GROUP BY ROLLUP (channel, id) ORDER BY channel, id LIMIT 100 ================================================ FILE: spark-queries-tpcds/q78.sql ================================================ WITH ws AS (SELECT d_year AS ws_sold_year, ws_item_sk, ws_bill_customer_sk ws_customer_sk, sum(ws_quantity) ws_qty, sum(ws_wholesale_cost) ws_wc, sum(ws_sales_price) ws_sp FROM web_sales LEFT JOIN web_returns ON wr_order_number = ws_order_number AND ws_item_sk = wr_item_sk JOIN date_dim ON ws_sold_date_sk = d_date_sk WHERE wr_order_number IS NULL GROUP BY d_year, ws_item_sk, ws_bill_customer_sk ), cs AS (SELECT d_year AS cs_sold_year, cs_item_sk, cs_bill_customer_sk cs_customer_sk, sum(cs_quantity) cs_qty, sum(cs_wholesale_cost) cs_wc, sum(cs_sales_price) cs_sp FROM catalog_sales LEFT JOIN catalog_returns ON cr_order_number = cs_order_number AND cs_item_sk = cr_item_sk JOIN date_dim ON cs_sold_date_sk = d_date_sk WHERE cr_order_number IS NULL GROUP BY d_year, cs_item_sk, cs_bill_customer_sk ), ss AS (SELECT d_year AS ss_sold_year, ss_item_sk, ss_customer_sk, sum(ss_quantity) ss_qty, sum(ss_wholesale_cost) ss_wc, sum(ss_sales_price) ss_sp FROM store_sales LEFT JOIN store_returns ON sr_ticket_number = ss_ticket_number AND ss_item_sk = sr_item_sk JOIN date_dim ON ss_sold_date_sk = d_date_sk WHERE sr_ticket_number IS NULL GROUP BY d_year, ss_item_sk, ss_customer_sk ) SELECT round(ss_qty / (coalesce(ws_qty + cs_qty, 1)), 2) ratio, ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, coalesce(ws_qty, 0) + coalesce(cs_qty, 0) other_chan_qty, coalesce(ws_wc, 0) + coalesce(cs_wc, 0) other_chan_wholesale_cost, coalesce(ws_sp, 0) + coalesce(cs_sp, 0) other_chan_sales_price FROM ss LEFT JOIN ws ON (ws_sold_year = ss_sold_year AND ws_item_sk = ss_item_sk AND ws_customer_sk = ss_customer_sk) LEFT JOIN cs ON (cs_sold_year = ss_sold_year AND cs_item_sk = ss_item_sk AND cs_customer_sk = ss_customer_sk) WHERE coalesce(ws_qty, 0) > 0 AND coalesce(cs_qty, 0) > 0 AND ss_sold_year = 2000 ORDER BY ratio, ss_qty DESC, ss_wc DESC, ss_sp DESC, other_chan_qty, other_chan_wholesale_cost, other_chan_sales_price, round(ss_qty / (coalesce(ws_qty + cs_qty, 1)), 2) LIMIT 100 ================================================ FILE: spark-queries-tpcds/q79.sql ================================================ SELECT c_last_name, c_first_name, substr(s_city, 1, 30), ss_ticket_number, amt, profit FROM (SELECT ss_ticket_number, ss_customer_sk, store.s_city, sum(ss_coupon_amt) amt, sum(ss_net_profit) profit FROM store_sales, date_dim, store, household_demographics WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk AND store_sales.ss_store_sk = store.s_store_sk AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk AND (household_demographics.hd_dep_count = 6 OR household_demographics.hd_vehicle_count > 2) AND date_dim.d_dow = 1 AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) AND store.s_number_employees BETWEEN 200 AND 295 GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, store.s_city) ms, customer WHERE ss_customer_sk = c_customer_sk ORDER BY c_last_name, c_first_name, substr(s_city, 1, 30), profit LIMIT 100 ================================================ FILE: spark-queries-tpcds/q8.sql ================================================ SELECT s_store_name, sum(ss_net_profit) FROM store_sales, date_dim, store, (SELECT ca_zip FROM ( (SELECT substr(ca_zip, 1, 5) ca_zip FROM customer_address WHERE substr(ca_zip, 1, 5) IN ( '24128','76232','65084','87816','83926','77556','20548', '26231','43848','15126','91137','61265','98294','25782', '17920','18426','98235','40081','84093','28577','55565', '17183','54601','67897','22752','86284','18376','38607', '45200','21756','29741','96765','23932','89360','29839', '25989','28898','91068','72550','10390','18845','47770', '82636','41367','76638','86198','81312','37126','39192', '88424','72175','81426','53672','10445','42666','66864', '66708','41248','48583','82276','18842','78890','49448', '14089','38122','34425','79077','19849','43285','39861', '66162','77610','13695','99543','83444','83041','12305', '57665','68341','25003','57834','62878','49130','81096', '18840','27700','23470','50412','21195','16021','76107', '71954','68309','18119','98359','64544','10336','86379', '27068','39736','98569','28915','24206','56529','57647', '54917','42961','91110','63981','14922','36420','23006', '67467','32754','30903','20260','31671','51798','72325', '85816','68621','13955','36446','41766','68806','16725', '15146','22744','35850','88086','51649','18270','52867', '39972','96976','63792','11376','94898','13595','10516', '90225','58943','39371','94945','28587','96576','57855', '28488','26105','83933','25858','34322','44438','73171', '30122','34102','22685','71256','78451','54364','13354', '45375','40558','56458','28286','45266','47305','69399', '83921','26233','11101','15371','69913','35942','15882', '25631','24610','44165','99076','33786','70738','26653', '14328','72305','62496','22152','10144','64147','48425', '14663','21076','18799','30450','63089','81019','68893', '24996','51200','51211','45692','92712','70466','79994', '22437','25280','38935','71791','73134','56571','14060', '19505','72425','56575','74351','68786','51650','20004', '18383','76614','11634','18906','15765','41368','73241', '76698','78567','97189','28545','76231','75691','22246', '51061','90578','56691','68014','51103','94167','57047', '14867','73520','15734','63435','25733','35474','24676', '94627','53535','17879','15559','53268','59166','11928', '59402','33282','45721','43933','68101','33515','36634', '71286','19736','58058','55253','67473','41918','19515', '36495','19430','22351','77191','91393','49156','50298', '87501','18652','53179','18767','63193','23968','65164', '68880','21286','72823','58470','67301','13394','31016', '70372','67030','40604','24317','45748','39127','26065', '77721','31029','31880','60576','24671','45549','13376', '50016','33123','19769','22927','97789','46081','72151', '15723','46136','51949','68100','96888','64528','14171', '79777','28709','11489','25103','32213','78668','22245', '15798','27156','37930','62971','21337','51622','67853', '10567','38415','15455','58263','42029','60279','37125', '56240','88190','50308','26859','64457','89091','82136', '62377','36233','63837','58078','17043','30010','60099', '28810','98025','29178','87343','73273','30469','64034', '39516','86057','21309','90257','67875','40162','11356', '73650','61810','72013','30431','22461','19512','13375', '55307','30625','83849','68908','26689','96451','38193', '46820','88885','84935','69035','83144','47537','56616', '94983','48033','69952','25486','61547','27385','61860', '58048','56910','16807','17871','35258','31387','35458', '35576')) INTERSECT (SELECT ca_zip FROM (SELECT substr(ca_zip, 1, 5) ca_zip, count(*) cnt FROM customer_address, customer WHERE ca_address_sk = c_current_addr_sk AND c_preferred_cust_flag = 'Y' GROUP BY ca_zip HAVING count(*) > 10) A1) ) A2 ) V1 WHERE ss_store_sk = s_store_sk AND ss_sold_date_sk = d_date_sk AND d_qoy = 2 AND d_year = 1998 AND (substr(s_zip, 1, 2) = substr(V1.ca_zip, 1, 2)) GROUP BY s_store_name ORDER BY s_store_name LIMIT 100 ================================================ FILE: spark-queries-tpcds/q80.sql ================================================ WITH ssr AS (SELECT s_store_id AS store_id, sum(ss_ext_sales_price) AS sales, sum(coalesce(sr_return_amt, 0)) AS returns, sum(ss_net_profit - coalesce(sr_net_loss, 0)) AS profit FROM store_sales LEFT OUTER JOIN store_returns ON (ss_item_sk = sr_item_sk AND ss_ticket_number = sr_ticket_number) , date_dim, store, item, promotion WHERE ss_sold_date_sk = d_date_sk AND d_date BETWEEN cast('2000-08-23' AS DATE) AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) AND ss_store_sk = s_store_sk AND ss_item_sk = i_item_sk AND i_current_price > 50 AND ss_promo_sk = p_promo_sk AND p_channel_tv = 'N' GROUP BY s_store_id), csr AS (SELECT cp_catalog_page_id AS catalog_page_id, sum(cs_ext_sales_price) AS sales, sum(coalesce(cr_return_amount, 0)) AS returns, sum(cs_net_profit - coalesce(cr_net_loss, 0)) AS profit FROM catalog_sales LEFT OUTER JOIN catalog_returns ON (cs_item_sk = cr_item_sk AND cs_order_number = cr_order_number) , date_dim, catalog_page, item, promotion WHERE cs_sold_date_sk = d_date_sk AND d_date BETWEEN cast('2000-08-23' AS DATE) AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) AND cs_catalog_page_sk = cp_catalog_page_sk AND cs_item_sk = i_item_sk AND i_current_price > 50 AND cs_promo_sk = p_promo_sk AND p_channel_tv = 'N' GROUP BY cp_catalog_page_id), wsr AS (SELECT web_site_id, sum(ws_ext_sales_price) AS sales, sum(coalesce(wr_return_amt, 0)) AS returns, sum(ws_net_profit - coalesce(wr_net_loss, 0)) AS profit FROM web_sales LEFT OUTER JOIN web_returns ON (ws_item_sk = wr_item_sk AND ws_order_number = wr_order_number) , date_dim, web_site, item, promotion WHERE ws_sold_date_sk = d_date_sk AND d_date BETWEEN cast('2000-08-23' AS DATE) AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) AND ws_web_site_sk = web_site_sk AND ws_item_sk = i_item_sk AND i_current_price > 50 AND ws_promo_sk = p_promo_sk AND p_channel_tv = 'N' GROUP BY web_site_id) SELECT channel, id, sum(sales) AS sales, sum(returns) AS returns, sum(profit) AS profit FROM (SELECT 'store channel' AS channel, concat('store', store_id) AS id, sales, returns, profit FROM ssr UNION ALL SELECT 'catalog channel' AS channel, concat('catalog_page', catalog_page_id) AS id, sales, returns, profit FROM csr UNION ALL SELECT 'web channel' AS channel, concat('web_site', web_site_id) AS id, sales, returns, profit FROM wsr) x GROUP BY ROLLUP (channel, id) ORDER BY channel, id LIMIT 100 ================================================ FILE: spark-queries-tpcds/q81.sql ================================================ WITH customer_total_return AS (SELECT cr_returning_customer_sk AS ctr_customer_sk, ca_state AS ctr_state, sum(cr_return_amt_inc_tax) AS ctr_total_return FROM catalog_returns, date_dim, customer_address WHERE cr_returned_date_sk = d_date_sk AND d_year = 2000 AND cr_returning_addr_sk = ca_address_sk GROUP BY cr_returning_customer_sk, ca_state ) SELECT c_customer_id, c_salutation, c_first_name, c_last_name, ca_street_number, ca_street_name, ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset, ca_location_type, ctr_total_return FROM customer_total_return ctr1, customer_address, customer WHERE ctr1.ctr_total_return > (SELECT avg(ctr_total_return) * 1.2 FROM customer_total_return ctr2 WHERE ctr1.ctr_state = ctr2.ctr_state) AND ca_address_sk = c_current_addr_sk AND ca_state = 'GA' AND ctr1.ctr_customer_sk = c_customer_sk ORDER BY c_customer_id, c_salutation, c_first_name, c_last_name, ca_street_number, ca_street_name , ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset , ca_location_type, ctr_total_return LIMIT 100 ================================================ FILE: spark-queries-tpcds/q82.sql ================================================ SELECT i_item_id, i_item_desc, i_current_price FROM item, inventory, date_dim, store_sales WHERE i_current_price BETWEEN 62 AND 62 + 30 AND inv_item_sk = i_item_sk AND d_date_sk = inv_date_sk AND d_date BETWEEN cast('2000-05-25' AS DATE) AND (cast('2000-05-25' AS DATE) + INTERVAL 60 days) AND i_manufact_id IN (129, 270, 821, 423) AND inv_quantity_on_hand BETWEEN 100 AND 500 AND ss_item_sk = i_item_sk GROUP BY i_item_id, i_item_desc, i_current_price ORDER BY i_item_id LIMIT 100 ================================================ FILE: spark-queries-tpcds/q83.sql ================================================ WITH sr_items AS (SELECT i_item_id item_id, sum(sr_return_quantity) sr_item_qty FROM store_returns, item, date_dim WHERE sr_item_sk = i_item_sk AND d_date IN (SELECT d_date FROM date_dim WHERE d_week_seq IN (SELECT d_week_seq FROM date_dim WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) AND sr_returned_date_sk = d_date_sk GROUP BY i_item_id), cr_items AS (SELECT i_item_id item_id, sum(cr_return_quantity) cr_item_qty FROM catalog_returns, item, date_dim WHERE cr_item_sk = i_item_sk AND d_date IN (SELECT d_date FROM date_dim WHERE d_week_seq IN (SELECT d_week_seq FROM date_dim WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) AND cr_returned_date_sk = d_date_sk GROUP BY i_item_id), wr_items AS (SELECT i_item_id item_id, sum(wr_return_quantity) wr_item_qty FROM web_returns, item, date_dim WHERE wr_item_sk = i_item_sk AND d_date IN (SELECT d_date FROM date_dim WHERE d_week_seq IN (SELECT d_week_seq FROM date_dim WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) AND wr_returned_date_sk = d_date_sk GROUP BY i_item_id) SELECT sr_items.item_id, sr_item_qty, sr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 sr_dev, cr_item_qty, cr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 cr_dev, wr_item_qty, wr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 wr_dev, (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 average FROM sr_items, cr_items, wr_items WHERE sr_items.item_id = cr_items.item_id AND sr_items.item_id = wr_items.item_id ORDER BY sr_items.item_id, sr_item_qty LIMIT 100 ================================================ FILE: spark-queries-tpcds/q84.sql ================================================ SELECT c_customer_id AS customer_id, concat(c_last_name, ', ', c_first_name) AS customername FROM customer , customer_address , customer_demographics , household_demographics , income_band , store_returns WHERE ca_city = 'Edgewood' AND c_current_addr_sk = ca_address_sk AND ib_lower_bound >= 38128 AND ib_upper_bound <= 38128 + 50000 AND ib_income_band_sk = hd_income_band_sk AND cd_demo_sk = c_current_cdemo_sk AND hd_demo_sk = c_current_hdemo_sk AND sr_cdemo_sk = cd_demo_sk ORDER BY c_customer_id LIMIT 100 ================================================ FILE: spark-queries-tpcds/q85.sql ================================================ SELECT substr(r_reason_desc, 1, 20), avg(ws_quantity), avg(wr_refunded_cash), avg(wr_fee) FROM web_sales, web_returns, web_page, customer_demographics cd1, customer_demographics cd2, customer_address, date_dim, reason WHERE ws_web_page_sk = wp_web_page_sk AND ws_item_sk = wr_item_sk AND ws_order_number = wr_order_number AND ws_sold_date_sk = d_date_sk AND d_year = 2000 AND cd1.cd_demo_sk = wr_refunded_cdemo_sk AND cd2.cd_demo_sk = wr_returning_cdemo_sk AND ca_address_sk = wr_refunded_addr_sk AND r_reason_sk = wr_reason_sk AND ( ( cd1.cd_marital_status = 'M' AND cd1.cd_marital_status = cd2.cd_marital_status AND cd1.cd_education_status = 'Advanced Degree' AND cd1.cd_education_status = cd2.cd_education_status AND ws_sales_price BETWEEN 100.00 AND 150.00 ) OR ( cd1.cd_marital_status = 'S' AND cd1.cd_marital_status = cd2.cd_marital_status AND cd1.cd_education_status = 'College' AND cd1.cd_education_status = cd2.cd_education_status AND ws_sales_price BETWEEN 50.00 AND 100.00 ) OR ( cd1.cd_marital_status = 'W' AND cd1.cd_marital_status = cd2.cd_marital_status AND cd1.cd_education_status = '2 yr Degree' AND cd1.cd_education_status = cd2.cd_education_status AND ws_sales_price BETWEEN 150.00 AND 200.00 ) ) AND ( ( ca_country = 'United States' AND ca_state IN ('IN', 'OH', 'NJ') AND ws_net_profit BETWEEN 100 AND 200 ) OR ( ca_country = 'United States' AND ca_state IN ('WI', 'CT', 'KY') AND ws_net_profit BETWEEN 150 AND 300 ) OR ( ca_country = 'United States' AND ca_state IN ('LA', 'IA', 'AR') AND ws_net_profit BETWEEN 50 AND 250 ) ) GROUP BY r_reason_desc ORDER BY substr(r_reason_desc, 1, 20) , avg(ws_quantity) , avg(wr_refunded_cash) , avg(wr_fee) LIMIT 100 ================================================ FILE: spark-queries-tpcds/q86.sql ================================================ SELECT sum(ws_net_paid) AS total_sum, i_category, i_class, grouping(i_category) + grouping(i_class) AS lochierarchy, rank() OVER ( PARTITION BY grouping(i_category) + grouping(i_class), CASE WHEN grouping(i_class) = 0 THEN i_category END ORDER BY sum(ws_net_paid) DESC) AS rank_within_parent FROM web_sales, date_dim d1, item WHERE d1.d_month_seq BETWEEN 1200 AND 1200 + 11 AND d1.d_date_sk = ws_sold_date_sk AND i_item_sk = ws_item_sk GROUP BY ROLLUP (i_category, i_class) ORDER BY lochierarchy DESC, CASE WHEN lochierarchy = 0 THEN i_category END, rank_within_parent LIMIT 100 ================================================ FILE: spark-queries-tpcds/q87.sql ================================================ SELECT count(*) FROM ((SELECT DISTINCT c_last_name, c_first_name, d_date FROM store_sales, date_dim, customer WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk AND store_sales.ss_customer_sk = customer.c_customer_sk AND d_month_seq BETWEEN 1200 AND 1200 + 11) EXCEPT (SELECT DISTINCT c_last_name, c_first_name, d_date FROM catalog_sales, date_dim, customer WHERE catalog_sales.cs_sold_date_sk = date_dim.d_date_sk AND catalog_sales.cs_bill_customer_sk = customer.c_customer_sk AND d_month_seq BETWEEN 1200 AND 1200 + 11) EXCEPT (SELECT DISTINCT c_last_name, c_first_name, d_date FROM web_sales, date_dim, customer WHERE web_sales.ws_sold_date_sk = date_dim.d_date_sk AND web_sales.ws_bill_customer_sk = customer.c_customer_sk AND d_month_seq BETWEEN 1200 AND 1200 + 11) ) cool_cust ================================================ FILE: spark-queries-tpcds/q88.sql ================================================ SELECT * FROM (SELECT count(*) h8_30_to_9 FROM store_sales, household_demographics, time_dim, store WHERE ss_sold_time_sk = time_dim.t_time_sk AND ss_hdemo_sk = household_demographics.hd_demo_sk AND ss_store_sk = s_store_sk AND time_dim.t_hour = 8 AND time_dim.t_minute >= 30 AND ( (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) OR (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) OR (household_demographics.hd_dep_count = 0 AND household_demographics.hd_vehicle_count <= 0 + 2)) AND store.s_store_name = 'ese') s1, (SELECT count(*) h9_to_9_30 FROM store_sales, household_demographics, time_dim, store WHERE ss_sold_time_sk = time_dim.t_time_sk AND ss_hdemo_sk = household_demographics.hd_demo_sk AND ss_store_sk = s_store_sk AND time_dim.t_hour = 9 AND time_dim.t_minute < 30 AND ( (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) OR (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) OR (household_demographics.hd_dep_count = 0 AND household_demographics.hd_vehicle_count <= 0 + 2)) AND store.s_store_name = 'ese') s2, (SELECT count(*) h9_30_to_10 FROM store_sales, household_demographics, time_dim, store WHERE ss_sold_time_sk = time_dim.t_time_sk AND ss_hdemo_sk = household_demographics.hd_demo_sk AND ss_store_sk = s_store_sk AND time_dim.t_hour = 9 AND time_dim.t_minute >= 30 AND ( (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) OR (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) OR (household_demographics.hd_dep_count = 0 AND household_demographics.hd_vehicle_count <= 0 + 2)) AND store.s_store_name = 'ese') s3, (SELECT count(*) h10_to_10_30 FROM store_sales, household_demographics, time_dim, store WHERE ss_sold_time_sk = time_dim.t_time_sk AND ss_hdemo_sk = household_demographics.hd_demo_sk AND ss_store_sk = s_store_sk AND time_dim.t_hour = 10 AND time_dim.t_minute < 30 AND ( (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) OR (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) OR (household_demographics.hd_dep_count = 0 AND household_demographics.hd_vehicle_count <= 0 + 2)) AND store.s_store_name = 'ese') s4, (SELECT count(*) h10_30_to_11 FROM store_sales, household_demographics, time_dim, store WHERE ss_sold_time_sk = time_dim.t_time_sk AND ss_hdemo_sk = household_demographics.hd_demo_sk AND ss_store_sk = s_store_sk AND time_dim.t_hour = 10 AND time_dim.t_minute >= 30 AND ( (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) OR (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) OR (household_demographics.hd_dep_count = 0 AND household_demographics.hd_vehicle_count <= 0 + 2)) AND store.s_store_name = 'ese') s5, (SELECT count(*) h11_to_11_30 FROM store_sales, household_demographics, time_dim, store WHERE ss_sold_time_sk = time_dim.t_time_sk AND ss_hdemo_sk = household_demographics.hd_demo_sk AND ss_store_sk = s_store_sk AND time_dim.t_hour = 11 AND time_dim.t_minute < 30 AND ( (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) OR (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) OR (household_demographics.hd_dep_count = 0 AND household_demographics.hd_vehicle_count <= 0 + 2)) AND store.s_store_name = 'ese') s6, (SELECT count(*) h11_30_to_12 FROM store_sales, household_demographics, time_dim, store WHERE ss_sold_time_sk = time_dim.t_time_sk AND ss_hdemo_sk = household_demographics.hd_demo_sk AND ss_store_sk = s_store_sk AND time_dim.t_hour = 11 AND time_dim.t_minute >= 30 AND ( (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) OR (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) OR (household_demographics.hd_dep_count = 0 AND household_demographics.hd_vehicle_count <= 0 + 2)) AND store.s_store_name = 'ese') s7, (SELECT count(*) h12_to_12_30 FROM store_sales, household_demographics, time_dim, store WHERE ss_sold_time_sk = time_dim.t_time_sk AND ss_hdemo_sk = household_demographics.hd_demo_sk AND ss_store_sk = s_store_sk AND time_dim.t_hour = 12 AND time_dim.t_minute < 30 AND ( (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) OR (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) OR (household_demographics.hd_dep_count = 0 AND household_demographics.hd_vehicle_count <= 0 + 2)) AND store.s_store_name = 'ese') s8 ================================================ FILE: spark-queries-tpcds/q89.sql ================================================ SELECT * FROM ( SELECT i_category, i_class, i_brand, s_store_name, s_company_name, d_moy, sum(ss_sales_price) sum_sales, avg(sum(ss_sales_price)) OVER (PARTITION BY i_category, i_brand, s_store_name, s_company_name) avg_monthly_sales FROM item, store_sales, date_dim, store WHERE ss_item_sk = i_item_sk AND ss_sold_date_sk = d_date_sk AND ss_store_sk = s_store_sk AND d_year IN (1999) AND ((i_category IN ('Books', 'Electronics', 'Sports') AND i_class IN ('computers', 'stereo', 'football')) OR (i_category IN ('Men', 'Jewelry', 'Women') AND i_class IN ('shirts', 'birdal', 'dresses'))) GROUP BY i_category, i_class, i_brand, s_store_name, s_company_name, d_moy) tmp1 WHERE CASE WHEN (avg_monthly_sales <> 0) THEN (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) ELSE NULL END > 0.1 ORDER BY sum_sales - avg_monthly_sales, s_store_name LIMIT 100 ================================================ FILE: spark-queries-tpcds/q9.sql ================================================ SELECT CASE WHEN (SELECT count(*) FROM store_sales WHERE ss_quantity BETWEEN 1 AND 20) > 62316685 THEN (SELECT avg(ss_ext_discount_amt) FROM store_sales WHERE ss_quantity BETWEEN 1 AND 20) ELSE (SELECT avg(ss_net_paid) FROM store_sales WHERE ss_quantity BETWEEN 1 AND 20) END bucket1, CASE WHEN (SELECT count(*) FROM store_sales WHERE ss_quantity BETWEEN 21 AND 40) > 19045798 THEN (SELECT avg(ss_ext_discount_amt) FROM store_sales WHERE ss_quantity BETWEEN 21 AND 40) ELSE (SELECT avg(ss_net_paid) FROM store_sales WHERE ss_quantity BETWEEN 21 AND 40) END bucket2, CASE WHEN (SELECT count(*) FROM store_sales WHERE ss_quantity BETWEEN 41 AND 60) > 365541424 THEN (SELECT avg(ss_ext_discount_amt) FROM store_sales WHERE ss_quantity BETWEEN 41 AND 60) ELSE (SELECT avg(ss_net_paid) FROM store_sales WHERE ss_quantity BETWEEN 41 AND 60) END bucket3, CASE WHEN (SELECT count(*) FROM store_sales WHERE ss_quantity BETWEEN 61 AND 80) > 216357808 THEN (SELECT avg(ss_ext_discount_amt) FROM store_sales WHERE ss_quantity BETWEEN 61 AND 80) ELSE (SELECT avg(ss_net_paid) FROM store_sales WHERE ss_quantity BETWEEN 61 AND 80) END bucket4, CASE WHEN (SELECT count(*) FROM store_sales WHERE ss_quantity BETWEEN 81 AND 100) > 184483884 THEN (SELECT avg(ss_ext_discount_amt) FROM store_sales WHERE ss_quantity BETWEEN 81 AND 100) ELSE (SELECT avg(ss_net_paid) FROM store_sales WHERE ss_quantity BETWEEN 81 AND 100) END bucket5 FROM reason WHERE r_reason_sk = 1 ================================================ FILE: spark-queries-tpcds/q90.sql ================================================ SELECT cast(amc AS DECIMAL(15, 4)) / cast(pmc AS DECIMAL(15, 4)) am_pm_ratio FROM (SELECT count(*) amc FROM web_sales, household_demographics, time_dim, web_page WHERE ws_sold_time_sk = time_dim.t_time_sk AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk AND ws_web_page_sk = web_page.wp_web_page_sk AND time_dim.t_hour BETWEEN 8 AND 8 + 1 AND household_demographics.hd_dep_count = 6 AND web_page.wp_char_count BETWEEN 5000 AND 5200) at, (SELECT count(*) pmc FROM web_sales, household_demographics, time_dim, web_page WHERE ws_sold_time_sk = time_dim.t_time_sk AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk AND ws_web_page_sk = web_page.wp_web_page_sk AND time_dim.t_hour BETWEEN 19 AND 19 + 1 AND household_demographics.hd_dep_count = 6 AND web_page.wp_char_count BETWEEN 5000 AND 5200) pt ORDER BY am_pm_ratio LIMIT 100 ================================================ FILE: spark-queries-tpcds/q91.sql ================================================ SELECT cc_call_center_id Call_Center, cc_name Call_Center_Name, cc_manager Manager, sum(cr_net_loss) Returns_Loss FROM call_center, catalog_returns, date_dim, customer, customer_address, customer_demographics, household_demographics WHERE cr_call_center_sk = cc_call_center_sk AND cr_returned_date_sk = d_date_sk AND cr_returning_customer_sk = c_customer_sk AND cd_demo_sk = c_current_cdemo_sk AND hd_demo_sk = c_current_hdemo_sk AND ca_address_sk = c_current_addr_sk AND d_year = 1998 AND d_moy = 11 AND ((cd_marital_status = 'M' AND cd_education_status = 'Unknown') OR (cd_marital_status = 'W' AND cd_education_status = 'Advanced Degree')) AND hd_buy_potential LIKE 'Unknown%' AND ca_gmt_offset = -7 GROUP BY cc_call_center_id, cc_name, cc_manager, cd_marital_status, cd_education_status ORDER BY sum(cr_net_loss) DESC ================================================ FILE: spark-queries-tpcds/q92.sql ================================================ SELECT sum(ws_ext_discount_amt) AS `Excess Discount Amount ` FROM web_sales, item, date_dim WHERE i_manufact_id = 350 AND i_item_sk = ws_item_sk AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 90 days) AND d_date_sk = ws_sold_date_sk AND ws_ext_discount_amt > ( SELECT 1.3 * avg(ws_ext_discount_amt) FROM web_sales, date_dim WHERE ws_item_sk = i_item_sk AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 90 days) AND d_date_sk = ws_sold_date_sk ) ORDER BY sum(ws_ext_discount_amt) LIMIT 100 ================================================ FILE: spark-queries-tpcds/q93.sql ================================================ SELECT ss_customer_sk, sum(act_sales) sumsales FROM (SELECT ss_item_sk, ss_ticket_number, ss_customer_sk, CASE WHEN sr_return_quantity IS NOT NULL THEN (ss_quantity - sr_return_quantity) * ss_sales_price ELSE (ss_quantity * ss_sales_price) END act_sales FROM store_sales LEFT OUTER JOIN store_returns ON (sr_item_sk = ss_item_sk AND sr_ticket_number = ss_ticket_number) , reason WHERE sr_reason_sk = r_reason_sk AND r_reason_desc = 'reason 28') t GROUP BY ss_customer_sk ORDER BY sumsales, ss_customer_sk LIMIT 100 ================================================ FILE: spark-queries-tpcds/q94.sql ================================================ SELECT count(DISTINCT ws_order_number) AS `order count `, sum(ws_ext_ship_cost) AS `total shipping cost `, sum(ws_net_profit) AS `total net profit ` FROM web_sales ws1, date_dim, customer_address, web_site WHERE d_date BETWEEN '1999-02-01' AND (CAST('1999-02-01' AS DATE) + INTERVAL 60 days) AND ws1.ws_ship_date_sk = d_date_sk AND ws1.ws_ship_addr_sk = ca_address_sk AND ca_state = 'IL' AND ws1.ws_web_site_sk = web_site_sk AND web_company_name = 'pri' AND EXISTS(SELECT * FROM web_sales ws2 WHERE ws1.ws_order_number = ws2.ws_order_number AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) AND NOT EXISTS(SELECT * FROM web_returns wr1 WHERE ws1.ws_order_number = wr1.wr_order_number) ORDER BY count(DISTINCT ws_order_number) LIMIT 100 ================================================ FILE: spark-queries-tpcds/q95.sql ================================================ WITH ws_wh AS (SELECT ws1.ws_order_number, ws1.ws_warehouse_sk wh1, ws2.ws_warehouse_sk wh2 FROM web_sales ws1, web_sales ws2 WHERE ws1.ws_order_number = ws2.ws_order_number AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) SELECT count(DISTINCT ws_order_number) AS `order count `, sum(ws_ext_ship_cost) AS `total shipping cost `, sum(ws_net_profit) AS `total net profit ` FROM web_sales ws1, date_dim, customer_address, web_site WHERE d_date BETWEEN '1999-02-01' AND (CAST('1999-02-01' AS DATE) + INTERVAL 60 DAY) AND ws1.ws_ship_date_sk = d_date_sk AND ws1.ws_ship_addr_sk = ca_address_sk AND ca_state = 'IL' AND ws1.ws_web_site_sk = web_site_sk AND web_company_name = 'pri' AND ws1.ws_order_number IN (SELECT ws_order_number FROM ws_wh) AND ws1.ws_order_number IN (SELECT wr_order_number FROM web_returns, ws_wh WHERE wr_order_number = ws_wh.ws_order_number) ORDER BY count(DISTINCT ws_order_number) LIMIT 100 ================================================ FILE: spark-queries-tpcds/q96.sql ================================================ SELECT count(*) FROM store_sales, household_demographics, time_dim, store WHERE ss_sold_time_sk = time_dim.t_time_sk AND ss_hdemo_sk = household_demographics.hd_demo_sk AND ss_store_sk = s_store_sk AND time_dim.t_hour = 20 AND time_dim.t_minute >= 30 AND household_demographics.hd_dep_count = 7 AND store.s_store_name = 'ese' ORDER BY count(*) LIMIT 100 ================================================ FILE: spark-queries-tpcds/q97.sql ================================================ WITH ssci AS ( SELECT ss_customer_sk customer_sk, ss_item_sk item_sk FROM store_sales, date_dim WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1200 AND 1200 + 11 GROUP BY ss_customer_sk, ss_item_sk), csci AS ( SELECT cs_bill_customer_sk customer_sk, cs_item_sk item_sk FROM catalog_sales, date_dim WHERE cs_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1200 AND 1200 + 11 GROUP BY cs_bill_customer_sk, cs_item_sk) SELECT sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NULL THEN 1 ELSE 0 END) store_only, sum(CASE WHEN ssci.customer_sk IS NULL AND csci.customer_sk IS NOT NULL THEN 1 ELSE 0 END) catalog_only, sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NOT NULL THEN 1 ELSE 0 END) store_and_catalog FROM ssci FULL OUTER JOIN csci ON (ssci.customer_sk = csci.customer_sk AND ssci.item_sk = csci.item_sk) LIMIT 100 ================================================ FILE: spark-queries-tpcds/q98.sql ================================================ SELECT i_item_desc, i_category, i_class, i_current_price, sum(ss_ext_sales_price) AS itemrevenue, sum(ss_ext_sales_price) * 100 / sum(sum(ss_ext_sales_price)) OVER (PARTITION BY i_class) AS revenueratio FROM store_sales, item, date_dim WHERE ss_item_sk = i_item_sk AND i_category IN ('Sports', 'Books', 'Home') AND ss_sold_date_sk = d_date_sk AND d_date BETWEEN cast('1999-02-22' AS DATE) AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) GROUP BY i_item_id, i_item_desc, i_category, i_class, i_current_price ORDER BY i_category, i_class, i_item_id, i_item_desc, revenueratio ================================================ FILE: spark-queries-tpcds/q99.sql ================================================ SELECT substr(w_warehouse_name, 1, 20), sm_type, cc_name, sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk <= 30) THEN 1 ELSE 0 END) AS `30 days `, sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 30) AND (cs_ship_date_sk - cs_sold_date_sk <= 60) THEN 1 ELSE 0 END) AS `31 - 60 days `, sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 60) AND (cs_ship_date_sk - cs_sold_date_sk <= 90) THEN 1 ELSE 0 END) AS `61 - 90 days `, sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 90) AND (cs_ship_date_sk - cs_sold_date_sk <= 120) THEN 1 ELSE 0 END) AS `91 - 120 days `, sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 120) THEN 1 ELSE 0 END) AS `>120 days ` FROM catalog_sales, warehouse, ship_mode, call_center, date_dim WHERE d_month_seq BETWEEN 1200 AND 1200 + 11 AND cs_ship_date_sk = d_date_sk AND cs_warehouse_sk = w_warehouse_sk AND cs_ship_mode_sk = sm_ship_mode_sk AND cs_call_center_sk = cc_call_center_sk GROUP BY substr(w_warehouse_name, 1, 20), sm_type, cc_name ORDER BY substr(w_warehouse_name, 1, 20), sm_type, cc_name LIMIT 100 ================================================ FILE: tpcds-build.sh ================================================ #!/bin/sh # Check for all the stuff I need to function. for f in gcc javac; do which $f > /dev/null 2>&1 if [ $? -ne 0 ]; then echo "Required program $f is missing. Please install or fix your path and try again." exit 1 fi done # Check if Maven is installed and install it if not. which mvn > /dev/null 2>&1 if [ $? -ne 0 ]; then SKIP=0 if [ -e "apache-maven-3.0.5-bin.tar.gz" ]; then SIZE=`du -b apache-maven-3.0.5-bin.tar.gz | cut -f 1` if [ $SIZE -eq 5144659 ]; then SKIP=1 fi fi if [ $SKIP -ne 1 ]; then echo "Maven not found, automatically installing it." curl -O https://downloads.apache.org/maven/maven-3/3.0.5/binaries/apache-maven-3.0.5-bin.tar.gz 2> /dev/null if [ $? -ne 0 ]; then echo "Failed to download Maven, check Internet connectivity and try again." exit 1 fi fi tar -zxf apache-maven-3.0.5-bin.tar.gz > /dev/null CWD=$(pwd) export MAVEN_HOME="$CWD/apache-maven-3.0.5" export PATH=$PATH:$MAVEN_HOME/bin fi echo "Building TPC-DS Data Generator" (cd tpcds-gen; make) echo "TPC-DS Data Generator built, you can now use tpcds-setup.sh to generate data." ================================================ FILE: tpcds-gen/Makefile ================================================ all: target/lib/dsdgen.jar target/tpcds-gen-1.0-SNAPSHOT.jar target/tpcds-gen-1.0-SNAPSHOT.jar: $(shell find -name *.java) mvn package target/tpcds_kit.zip: tpcds_kit.zip mkdir -p target/ cp tpcds_kit.zip target/tpcds_kit.zip tpcds_kit.zip: curl https://public-repo-1.hortonworks.com/hive-testbench/tpcds/README curl --output tpcds_kit.zip https://public-repo-1.hortonworks.com/hive-testbench/tpcds/TPCDS_Tools.zip target/lib/dsdgen.jar: target/tools/dsdgen cd target/; mkdir -p lib/; ( jar cvf lib/dsdgen.jar tools/ || gjar cvf lib/dsdgen.jar tools/ ) target/tools/dsdgen: target/tpcds_kit.zip test -d target/tools/ || (cd target; unzip tpcds_kit.zip) test -d target/tools/ || (cd target; mv */tools tools) cd target/tools; cat ../../patches/all/*.patch | patch -p0 cd target/tools; cat ../../patches/${MYOS}/*.patch | patch -p1 cd target/tools; make clean; make dsdgen clean: mvn clean ================================================ FILE: tpcds-gen/README.md ================================================ Mapreduce TPC-DS Generator ========================== This simplifies creating tpc-ds data-sets on large scales on a hadoop cluster. To get set up, you need to run $ make this will download the TPC-DS dsgen program, compile it and use maven to build the MR app wrapped around it. To generate the data-sets, you need to run (say, for scale = 200, parallelism = 100) $ hadoop jar target/tpcds-gen-1.0-SNAPSHOT.jar -d /tmp/store_sales/200/ -p 100 -s 200 This uses the existing parallelism in the driver.c of TPC-DS without modification and uses it to run the command on multiple machines instead of running in local fork mode. The command generates multiple files for each map task, resulting in each table having its own subdirectory. Assumptions made are that all machines in the cluster are OS/arch/lib identical. ================================================ FILE: tpcds-gen/patches/Darwin/macosx.patch ================================================ diff -rupN tools/Makefile.suite toolsnew/Makefile.suite --- tools/Makefile.suite 2012-04-25 11:03:50.000000000 -0700 +++ toolsnew/Makefile.suite 2014-06-25 13:15:00.000000000 -0700 @@ -38,8 +38,8 @@ ################ ## TARGET OS HERE ################ -# OS Values: AIX, LINUX, SOLARIS, NCR, HPUX -OS = LINUX +# OS Values: AIX, LINUX, SOLARIS, NCR, HPUX, OSX +OS = OSX ########### # No changes should be necessary below this point # Each compile variable is adjusted for the target platform using the OS setting above @@ -47,7 +47,8 @@ OS = LINUX # CC AIX_CC = xlC HPUX_CC = gcc -LINUX_CC = gcc +LINUX_CC = gcc +OSX_CC = gcc NCR_CC = cc SOLARIS_CC = gcc SOL86_CC = cc @@ -55,7 +56,8 @@ CC = $($(OS)_CC) # CFLAGS AIX_CFLAGS = -q64 -O3 -D_LARGE_FILES HPUX_CFLAGS = -O3 -Wall -LINUX_CFLAGS = -g -Wall +LINUX_CFLAGS = -g -Wall +OSX_CFLAGS = -g -Wall NCR_CFLAGS = -g SOLARIS_CFLAGS = -O3 -Wall SOL86_CFLAGS = -O3 @@ -65,6 +67,7 @@ CFLAGS = $(BASE_CFLAGS) -D$(OS) $($(OS AIX_EXE = HPUX_EXE = LINUX_EXE = +OSX_EXE = NCR_EXE = SOLARIS_EXE = SOL86_EXE = @@ -73,6 +76,7 @@ EXE = $($(OS)_EXE) AIX_LEX = flex HPUX_LEX = flex LINUX_LEX = lex +OSX_LEX = lex NCR_LEX = lex SOLARIS_LEX = lex SOL86_LEX = lex @@ -81,6 +85,7 @@ LEX = $($(OS)_LEX) AIX_LIBS = -lm HPUX_LIBS = -lm -ll LINUX_LIBS = -lm +OSX_LIBS = -lm NCR_LIBS = -lm -lc89 SOLARIS_LIBS = -ly -ll -lm SOL86_LIBS = -ly -ll -lm @@ -89,6 +94,7 @@ LIBS = $($(OS)_LIBS) AIX_YACC = yacc HPUX_YACC = bison -y LINUX_YACC = yacc +OSX_YACC = yacc NCR_YACC = yacc SOLARIS_YACC = yacc SOL86_YACC = yacc @@ -97,6 +103,7 @@ YACC = $($(OS)_YACC) AIX_YFLAGS = -d -v HPUX_YFLAGS = -y -d -v LINUX_YFLAGS = -d -v +OSX_YFLAGS = -d -v NCR_YFLAGS = -d -v SOLARIS_YFLAGS = -d -v SOL86_YFLAGS = -d -v diff -rupN tools/config.h toolsnew/config.h --- tools/config.h 2012-04-25 11:03:52.000000000 -0700 +++ toolsnew/config.h 2014-06-25 13:15:00.000000000 -0700 @@ -109,6 +109,18 @@ #define FLEX #endif /* LINUX */ +#ifdef OSX +#define SUPPORT_64BITS +#define HUGE_TYPE int64_t +#define HUGE_FORMAT "%lld" +#define HUGE_COUNT 1 +#define USE_STRING_H +#define USE_LIMITS_H +#define MAXINT INT_MAX +#define USE_STDLIB_H +#define FLEX +#endif /* OSX */ + #ifdef SOLARIS #define SUPPORT_64BITS #define HUGE_TYPE long long diff -rupN tools/makefile toolsnew/makefile --- tools/makefile 2012-04-25 11:03:54.000000000 -0700 +++ toolsnew/makefile 2014-06-25 13:15:00.000000000 -0700 @@ -38,8 +38,8 @@ ################ ## TARGET OS HERE ################ -# OS Values: AIX, LINUX, SOLARIS, NCR, HPUX -OS = LINUX +# OS Values: AIX, LINUX, SOLARIS, NCR, HPUX, OSX +OS = OSX ########### # No changes should be necessary below this point # Each compile variable is adjusted for the target platform using the OS setting above @@ -47,7 +47,8 @@ OS = LINUX # CC AIX_CC = xlC HPUX_CC = gcc -LINUX_CC = gcc +LINUX_CC = gcc +OSX_CC = gcc NCR_CC = cc SOLARIS_CC = gcc SOL86_CC = cc @@ -56,6 +57,7 @@ CC = $($(OS)_CC) AIX_CFLAGS = -q64 -O3 -D_LARGE_FILES HPUX_CFLAGS = -O3 -Wall LINUX_CFLAGS = -g -Wall +OSX_CFLAGS = -g -Wall -I/usr/include/malloc NCR_CFLAGS = -g SOLARIS_CFLAGS = -O3 -Wall SOL86_CFLAGS = -O3 @@ -65,6 +67,7 @@ CFLAGS = $(BASE_CFLAGS) -D$(OS) $($(OS AIX_EXE = HPUX_EXE = LINUX_EXE = +OSX_EXE = NCR_EXE = SOLARIS_EXE = SOL86_EXE = @@ -73,6 +76,7 @@ EXE = $($(OS)_EXE) AIX_LEX = flex HPUX_LEX = flex LINUX_LEX = lex +OSX_LEX = lex NCR_LEX = lex SOLARIS_LEX = lex SOL86_LEX = lex @@ -81,6 +85,7 @@ LEX = $($(OS)_LEX) AIX_LIBS = -lm HPUX_LIBS = -lm -ll LINUX_LIBS = -lm +OSX_LIBS = -lm NCR_LIBS = -lm -lc89 SOLARIS_LIBS = -ly -ll -lm SOL86_LIBS = -ly -ll -lm @@ -89,6 +94,7 @@ LIBS = $($(OS)_LIBS) AIX_YACC = yacc HPUX_YACC = bison -y LINUX_YACC = yacc +OSX_YACC = yacc NCR_YACC = yacc SOLARIS_YACC = yacc SOL86_YACC = yacc @@ -97,6 +103,7 @@ YACC = $($(OS)_YACC) AIX_YFLAGS = -d -v HPUX_YFLAGS = -y -d -v LINUX_YFLAGS = -d -v +OSX_YFLAGS = -d -v NCR_YFLAGS = -d -v SOLARIS_YFLAGS = -d -v SOL86_YFLAGS = -d -v ================================================ FILE: tpcds-gen/patches/all/tpcds-buffered.patch ================================================ diff --git print.c print.c index 1b64362..5108bd7 100644 --- print.c +++ print.c @@ -68,6 +68,7 @@ print_close(int tbl) fpOutfile = NULL; if (pTdef->outfile) { + fflush(pTdef->outfile); fclose(pTdef->outfile); pTdef->outfile = NULL; } @@ -536,7 +538,7 @@ print_end (int tbl) if (add_term) fwrite(term, 1, add_term, fpOutfile); fprintf (fpOutfile, "\n"); - fflush(fpOutfile); + //fflush(fpOutfile); return (res); } ================================================ FILE: tpcds-gen/patches/all/tpcds-strcpy.patch ================================================ diff --git r_params.c r_params.c index 4db16e5..9b1a8e6 100644 --- r_params.c +++ r_params.c @@ -46,7 +46,7 @@ #include "tdefs.h" #include "release.h" -#define PARAM_MAX_LEN 80 +#define PARAM_MAX_LEN PATH_MAX #ifndef TEST extern option_t options[]; @@ -275,7 +275,7 @@ set_str(char *var, char *val) nParam = fnd_param(var); if (nParam >= 0) { - strcpy(params[options[nParam].index], val); + strncpy(params[options[nParam].index], val, PARAM_MAX_LEN); options[nParam].flags |= OPT_SET; } ================================================ FILE: tpcds-gen/patches/all/tpcds_misspelled_header_guard.patch ================================================ --- w_store_sales.h.orig 2014-06-25 10:58:19.000000000 -0700 +++ w_store_sales.h 2014-06-25 10:58:51.000000000 -0700 @@ -34,7 +34,7 @@ * Gradient Systems */ #ifndef W_STORE_SALES_H -#define W_STORE_SLAES_H +#define W_STORE_SALES_H #include "constants.h" #include "pricing.h" ================================================ FILE: tpcds-gen/pom.xml ================================================ 4.0.0 org.notmysock.tpcds tpcds-gen 1.0-SNAPSHOT jar tpcds-gen http://maven.apache.org org.apache.hadoop hadoop-client 2.2.0 compile commons-cli commons-cli 1.1 compile org.mockito mockito-core 1.8.5 test junit junit 4.7 test maven-compiler-plugin 1.6 1.6 org.apache.maven.plugins maven-jar-plugin true lib/ org.notmysock.tpcds.GenTable org.apache.maven.plugins maven-dependency-plugin copy-dependencies package copy-dependencies ${project.build.directory}/lib central Central Repository https://repo.maven.apache.org/maven2 default false never central Central Repository https://repo.maven.apache.org/maven2 default false ================================================ FILE: tpcds-gen/src/main/java/org/notmysock/tpcds/GenTable.java ================================================ package org.notmysock.tpcds; import org.apache.hadoop.conf.*; import org.apache.hadoop.fs.*; import org.apache.hadoop.hdfs.*; import org.apache.hadoop.io.*; import org.apache.hadoop.util.*; import org.apache.hadoop.filecache.*; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.lib.input.*; import org.apache.hadoop.mapreduce.lib.output.*; import org.apache.hadoop.mapreduce.lib.reduce.*; import org.apache.commons.cli.*; import org.apache.commons.*; import java.io.*; import java.nio.*; import java.util.*; import java.net.*; import java.math.*; import java.security.*; public class GenTable extends Configured implements Tool { public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); int res = ToolRunner.run(conf, new GenTable(), args); System.exit(res); } @Override public int run(String[] args) throws Exception { String[] remainingArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs(); CommandLineParser parser = new BasicParser(); getConf().setInt("io.sort.mb", 4); org.apache.commons.cli.Options options = new org.apache.commons.cli.Options(); options.addOption("s","scale", true, "scale"); options.addOption("t","table", true, "table"); options.addOption("d","dir", true, "dir"); options.addOption("p", "parallel", true, "parallel"); CommandLine line = parser.parse(options, remainingArgs); if(!(line.hasOption("scale") && line.hasOption("dir"))) { HelpFormatter f = new HelpFormatter(); f.printHelp("GenTable", options); return 1; } int scale = Integer.parseInt(line.getOptionValue("scale")); String table = "all"; if(line.hasOption("table")) { table = line.getOptionValue("table"); } Path out = new Path(line.getOptionValue("dir")); int parallel = scale; if(line.hasOption("parallel")) { parallel = Integer.parseInt(line.getOptionValue("parallel")); } if(parallel == 1 || scale == 1) { System.err.println("The MR task does not work for scale=1 or parallel=1"); return 1; } Path in = genInput(table, scale, parallel); Path dsdgen = copyJar(new File("target/lib/dsdgen.jar")); URI dsuri = dsdgen.toUri(); URI link = new URI(dsuri.getScheme(), dsuri.getUserInfo(), dsuri.getHost(), dsuri.getPort(),dsuri.getPath(), dsuri.getQuery(),"dsdgen"); Configuration conf = getConf(); conf.setInt("mapred.task.timeout",0); conf.setInt("mapreduce.task.timeout",0); conf.setBoolean("mapreduce.map.output.compress", true); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.GzipCodec"); DistributedCache.addCacheArchive(link, conf); DistributedCache.createSymlink(conf); Job job = new Job(conf, "GenTable+"+table+"_"+scale); job.setJarByClass(getClass()); job.setNumReduceTasks(0); job.setMapperClass(DSDGen.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.setNumLinesPerSplit(job, 1); FileInputFormat.addInputPath(job, in); FileOutputFormat.setOutputPath(job, out); // use multiple output to only write the named files LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); MultipleOutputs.addNamedOutput(job, "text", TextOutputFormat.class, LongWritable.class, Text.class); boolean success = job.waitForCompletion(true); // cleanup FileSystem fs = FileSystem.get(getConf()); fs.delete(in, false); fs.delete(dsdgen, false); return 0; } public Path copyJar(File jar) throws Exception { MessageDigest md = MessageDigest.getInstance("MD5"); InputStream is = new FileInputStream(jar); try { is = new DigestInputStream(is, md); // read stream to EOF as normal... } finally { is.close(); } BigInteger md5 = new BigInteger(md.digest()); String md5hex = md5.toString(16); Path dst = new Path(String.format("/tmp/%s.jar",md5hex)); Path src = new Path(jar.toURI()); FileSystem fs = FileSystem.get(getConf()); fs.copyFromLocalFile(false, /*overwrite*/true, src, dst); return dst; } public Path genInput(String table, int scale, int parallel) throws Exception { long epoch = System.currentTimeMillis()/1000; Path in = new Path("/tmp/"+table+"_"+scale+"-"+epoch); FileSystem fs = FileSystem.get(getConf()); FSDataOutputStream out = fs.create(in); for(int i = 1; i <= parallel; i++) { if(table.equals("all")) { out.writeBytes(String.format("./dsdgen -dir $DIR -force Y -scale %d -parallel %d -child %d\n", scale, parallel, i)); } else { out.writeBytes(String.format("./dsdgen -dir $DIR -table %s -force Y -scale %d -parallel %d -child %d\n", table, scale, parallel, i)); } } out.close(); return in; } static String readToString(InputStream in) throws IOException { InputStreamReader is = new InputStreamReader(in); StringBuilder sb=new StringBuilder(); BufferedReader br = new BufferedReader(is); String read = br.readLine(); while(read != null) { //System.out.println(read); sb.append(read); read =br.readLine(); } return sb.toString(); } static final class DSDGen extends Mapper { private MultipleOutputs mos; protected void setup(Context context) throws IOException { mos = new MultipleOutputs(context); } protected void cleanup(Context context) throws IOException, InterruptedException { mos.close(); } protected void map(LongWritable offset, Text command, Mapper.Context context) throws IOException, InterruptedException { String parallel="1"; String child="1"; String[] cmd = command.toString().split(" "); for(int i=0; i/dev/null fi } if [ ! -f tpcds-gen/target/tpcds-gen-1.0-SNAPSHOT.jar ]; then echo "Please build the data generator with ./tpcds-build.sh first" exit 1 fi which hive > /dev/null 2>&1 if [ $? -ne 0 ]; then echo "Script must be run where Hive is installed" exit 1 fi # Tables in the TPC-DS schema. DIMS="date_dim time_dim item customer customer_demographics household_demographics customer_address store promotion warehouse ship_mode reason income_band call_center web_page catalog_page web_site" FACTS="store_sales store_returns web_sales web_returns catalog_sales catalog_returns inventory" # Get the parameters. SCALE=$1 DIR=$2 if [ "X$BUCKET_DATA" != "X" ]; then BUCKETS=13 RETURN_BUCKETS=13 else BUCKETS=1 RETURN_BUCKETS=1 fi if [ "X$DEBUG_SCRIPT" != "X" ]; then set -x fi # Sanity checking. if [ X"$SCALE" = "X" ]; then usage fi if [ X"$DIR" = "X" ]; then DIR=/tmp/tpcds-generate fi if [ $SCALE -eq 1 ]; then echo "Scale factor must be greater than 1" exit 1 fi # Do the actual data load. hdfs dfs -mkdir -p ${DIR} hdfs dfs -ls ${DIR}/${SCALE} > /dev/null if [ $? -ne 0 ]; then echo "Generating data at scale factor $SCALE." (cd tpcds-gen; hadoop jar target/*.jar -d ${DIR}/${SCALE}/ -s ${SCALE}) fi hdfs dfs -ls ${DIR}/${SCALE} > /dev/null if [ $? -ne 0 ]; then echo "Data generation failed, exiting." exit 1 fi hadoop fs -chmod -R 777 ${DIR}/${SCALE} echo "TPC-DS text data generation complete." HIVE="beeline -n hive -u 'jdbc:hive2://localhost:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2?tez.queue.name=default' " # Create the text/flat tables as external tables. These will be later be converted to ORCFile. echo "Loading text data into external tables." runcommand "$HIVE -i settings/load-flat.sql -f ddl-tpcds/text/alltables.sql --hivevar DB=tpcds_text_${SCALE} --hivevar LOCATION=${DIR}/${SCALE}" # Create the partitioned and bucketed tables. if [ "X$FORMAT" = "X" ]; then FORMAT=orc fi LOAD_FILE="load_${FORMAT}_${SCALE}.mk" SILENCE="2> /dev/null 1> /dev/null" if [ "X$DEBUG_SCRIPT" != "X" ]; then SILENCE="" fi echo -e "all: ${DIMS} ${FACTS}" > $LOAD_FILE i=1 total=24 DATABASE=tpcds_bin_partitioned_${FORMAT}_${SCALE} MAX_REDUCERS=2500 # maximum number of useful reducers for any scale REDUCERS=$((test ${SCALE} -gt ${MAX_REDUCERS} && echo ${MAX_REDUCERS}) || echo ${SCALE}) # Populate the smaller tables. for t in ${DIMS} do COMMAND="$HIVE -i settings/load-partitioned.sql -f ddl-tpcds/bin_partitioned/${t}.sql \ --hivevar DB=${DATABASE} --hivevar SOURCE=tpcds_text_${SCALE} \ --hivevar SCALE=${SCALE} \ --hivevar REDUCERS=${REDUCERS} \ --hivevar FILE=${FORMAT}" echo -e "${t}:\n\t@$COMMAND $SILENCE && echo 'Optimizing table $t ($i/$total).'" >> $LOAD_FILE i=`expr $i + 1` done for t in ${FACTS} do COMMAND="$HIVE -i settings/load-partitioned.sql -f ddl-tpcds/bin_partitioned/${t}.sql \ --hivevar DB=${DATABASE} \ --hivevar SCALE=${SCALE} \ --hivevar SOURCE=tpcds_text_${SCALE} --hivevar BUCKETS=${BUCKETS} \ --hivevar RETURN_BUCKETS=${RETURN_BUCKETS} --hivevar REDUCERS=${REDUCERS} --hivevar FILE=${FORMAT}" echo -e "${t}:\n\t@$COMMAND $SILENCE && echo 'Optimizing table $t ($i/$total).'" >> $LOAD_FILE i=`expr $i + 1` done make -j 1 -f $LOAD_FILE echo "Loading constraints" runcommand "$HIVE -f ddl-tpcds/bin_partitioned/add_constraints.sql --hivevar DB=${DATABASE}" echo "Data loaded into database ${DATABASE}." ================================================ FILE: tpch-build.sh ================================================ #!/bin/sh # Check for all the stuff I need to function. for f in gcc javac; do which $f > /dev/null 2>&1 if [ $? -ne 0 ]; then echo "Required program $f is missing. Please install or fix your path and try again." exit 1 fi done # Check if Maven is installed and install it if not. which mvn > /dev/null 2>&1 if [ $? -ne 0 ]; then SKIP=0 if [ -e "apache-maven-3.0.5-bin.tar.gz" ]; then SIZE=`du -b apache-maven-3.0.5-bin.tar.gz | cut -f 1` if [ $SIZE -eq 5144659 ]; then SKIP=1 fi fi if [ $SKIP -ne 1 ]; then echo "Maven not found, automatically installing it." curl -O https://downloads.apache.org/maven/maven-3/3.0.5/binaries/apache-maven-3.0.5-bin.tar.gz 2> /dev/null if [ $? -ne 0 ]; then echo "Failed to download Maven, check Internet connectivity and try again." exit 1 fi fi tar -zxf apache-maven-3.0.5-bin.tar.gz > /dev/null CWD=$(pwd) export MAVEN_HOME="$CWD/apache-maven-3.0.5" export PATH=$PATH:$MAVEN_HOME/bin fi echo "Building TPC-H Data Generator" (cd tpch-gen; make) echo "TPC-H Data Generator built, you can now use tpch-setup.sh to generate data." ================================================ FILE: tpch-gen/Makefile ================================================ MYOS=$(shell uname -s) all: target/lib/dbgen.jar target/tpch-gen-1.0-SNAPSHOT.jar target/tpch-gen-1.0-SNAPSHOT.jar: $(shell find -name *.java) mvn package target/tpch_kit.zip: tpch_kit.zip mkdir -p target/ cp tpch_kit.zip target/tpch_kit.zip tpch_kit.zip: curl http://dev.hortonworks.com.s3.amazonaws.com/hive-testbench/tpch/README curl --output tpch_kit.zip http://dev.hortonworks.com.s3.amazonaws.com/hive-testbench/tpch/tpch_kit.zip target/lib/dbgen.jar: target/tools/dbgen cd target/; mkdir -p lib/; ( jar cvf lib/dbgen.jar tools/ || gjar cvf lib/dbgen.jar tools/ ) target/tools/dbgen: target/tpch_kit.zip test -d target/tools/ || (cd target; unzip tpch_kit.zip -x __MACOSX/; ln -sf $$PWD/*/dbgen/ tools) cd target/tools; cat ../../../patches/${MYOS}/*.patch | patch -p0 cd target/tools; make -f makefile.suite clean; make -f makefile.suite CC=gcc DATABASE=ORACLE MACHINE=LINUX WORKLOAD=TPCH clean: mvn clean ================================================ FILE: tpch-gen/README.md ================================================ Mapreduce TPC-H Generator ========================= This simplifies creating tpc-h data-sets on large scales on a hadoop cluster. To get set up, you need to run $ make this will download the TPC-h dbgen program, compile it and use maven to build the MR app wrapped around it. To generate the data-sets, you need to run (say, for scale = 200, parallelism = 100) $ hadoop jar target/tpch-gen-1.0-SNAPSHOT.jar -d /user/hive/external/200/ -p 100 -s 200 This uses the existing parallelism in the dbgen program without modification and uses it to run the command on multiple machines. The command generates multiple files for each map task, resulting in each table having its own subdirectory. Assumptions made are that all machines in the cluster are OS/arch/lib identical. ================================================ FILE: tpch-gen/ddl/orc.sql ================================================ set hive.stats.autogather=true; set hive.stats.dbclass=fs; create table if not exists lineitem (L_ORDERKEY BIGINT, L_PARTKEY BIGINT, L_SUPPKEY BIGINT, L_LINENUMBER INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING) STORED AS ORC TBLPROPERTIES ("orc.compress"="SNAPPY") ; create table if not exists part (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING) STORED AS ORC TBLPROPERTIES ("orc.compress"="SNAPPY") ; create table if not exists supplier (S_SUPPKEY BIGINT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) STORED AS ORC TBLPROPERTIES ("orc.compress"="SNAPPY") ; create table if not exists partsupp (PS_PARTKEY BIGINT, PS_SUPPKEY BIGINT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING) STORED AS ORC TBLPROPERTIES ("orc.compress"="SNAPPY") ; create table if not exists nation (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING) STORED AS ORC TBLPROPERTIES ("orc.compress"="SNAPPY") ; create table if not exists region (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING) STORED AS ORC TBLPROPERTIES ("orc.compress"="SNAPPY") ; create table if not exists customer (C_CUSTKEY BIGINT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING) STORED AS ORC TBLPROPERTIES ("orc.compress"="SNAPPY") ; create table if not exists orders (O_ORDERKEY BIGINT, O_CUSTKEY BIGINT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING) STORED AS ORC TBLPROPERTIES ("orc.compress"="SNAPPY") ; insert overwrite table nation select * from ${SOURCE}.nation; insert overwrite table region select * from ${SOURCE}.region; insert overwrite table part select * from ${SOURCE}.part; insert overwrite table supplier select * from ${SOURCE}.supplier; insert overwrite table partsupp select * from ${SOURCE}.partsupp; insert overwrite table customer select * from ${SOURCE}.customer; insert overwrite table lineitem select * from ${SOURCE}.lineitem; insert overwrite table orders select * from ${SOURCE}.orders; ================================================ FILE: tpch-gen/ddl/text.sql ================================================ create external table lineitem (L_ORDERKEY BIGINT, L_PARTKEY BIGINT, L_SUPPKEY BIGINT, L_LINENUMBER INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '${LOCATION}/lineitem'; create external table part (P_PARTKEY BIGINT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '${LOCATION}/part/'; create external table supplier (S_SUPPKEY BIGINT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '${LOCATION}/supplier/'; create external table partsupp (PS_PARTKEY BIGINT, PS_SUPPKEY BIGINT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION'${LOCATION}/partsupp'; create external table nation (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '${LOCATION}/nation'; create external table region (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '${LOCATION}/region'; create external table customer (C_CUSTKEY BIGINT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '${LOCATION}/customer'; create external table orders (O_ORDERKEY BIGINT, O_CUSTKEY BIGINT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '${LOCATION}/orders'; ================================================ FILE: tpch-gen/patches/Darwin/macosx.patch ================================================ --- makefile.suite.orig 2014-06-25 15:40:27.000000000 -0700 +++ makefile.suite 2014-06-25 15:42:03.000000000 -0700 @@ -110,7 +110,7 @@ MACHINE = WORKLOAD = # -CFLAGS = -g -DDBNAME=\"dss\" -D$(MACHINE) -D$(DATABASE) -D$(WORKLOAD) -DRNG_TEST -D_FILE_OFFSET_BITS=64 +CFLAGS = -g -DDBNAME=\"dss\" -D$(MACHINE) -D$(DATABASE) -D$(WORKLOAD) -DRNG_TEST -D_FILE_OFFSET_BITS=64 -I/usr/include/malloc LDFLAGS = -O # The OBJ,EXE and LIB macros will need to be changed for compilation under # Windows NT ================================================ FILE: tpch-gen/pom.xml ================================================ 4.0.0 org.notmysock.tpch tpch-gen 1.0-SNAPSHOT jar tpch-gen http://maven.apache.org org.apache.hadoop hadoop-client 3.1.1 compile commons-cli commons-cli 1.1 compile org.mockito mockito-core 1.8.5 test junit junit 4.7 test maven-compiler-plugin 1.6 1.6 org.apache.maven.plugins maven-jar-plugin true lib/ org.notmysock.tpch.GenTable org.apache.maven.plugins maven-dependency-plugin copy-dependencies package copy-dependencies ${project.build.directory}/lib central Central Repository https://repo.maven.apache.org/maven2 default false never central Central Repository https://repo.maven.apache.org/maven2 default false ================================================ FILE: tpch-gen/src/main/java/org/notmysock/tpch/GenTable.java ================================================ package org.notmysock.tpch; import org.apache.hadoop.conf.*; import org.apache.hadoop.fs.*; import org.apache.hadoop.hdfs.*; import org.apache.hadoop.io.*; import org.apache.hadoop.io.compress.DefaultCodec; import org.apache.hadoop.io.compress.SnappyCodec; import org.apache.hadoop.util.*; import org.apache.hadoop.filecache.*; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.lib.input.*; import org.apache.hadoop.mapreduce.lib.output.*; import org.apache.hadoop.mapreduce.lib.reduce.*; import org.apache.commons.cli.*; import org.apache.commons.*; import java.io.*; import java.nio.*; import java.util.*; import java.net.*; import java.math.*; import java.security.*; public class GenTable extends Configured implements Tool { private static enum TableMappings { ALL("all"), CUSTOMERS("c"), SUPPLIERS("s"), NATION("l"), ORDERS("o"), PARTS("p"); /* -T c -- generate cutomers ONLY -T l -- generate nation/region ONLY -T o -- generate orders/lineitem ONLY -T p -- generate parts/partsupp ONLY -T s -- generate suppliers ONLY */ final String option; TableMappings(String option) { this.option = option; } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); int res = ToolRunner.run(conf, new GenTable(), args); System.exit(res); } @Override public int run(String[] args) throws Exception { String[] remainingArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs(); CommandLineParser parser = new BasicParser(); getConf().setInt("io.sort.mb", 4); org.apache.commons.cli.Options options = new org.apache.commons.cli.Options(); options.addOption("s","scale", true, "scale"); options.addOption("t","table", true, "table"); options.addOption("d","dir", true, "dir"); options.addOption("p", "parallel", true, "parallel"); options.addOption("text", "text", false, "text"); options.addOption("snappy", "snappy", false, "snappy"); CommandLine line = parser.parse(options, remainingArgs); if(!(line.hasOption("scale") && line.hasOption("dir"))) { HelpFormatter f = new HelpFormatter(); f.printHelp("GenTable", options); return 1; } int scale = Integer.parseInt(line.getOptionValue("scale")); String table = "all"; if(line.hasOption("table")) { table = line.getOptionValue("table"); table = TableMappings.valueOf(table.toUpperCase()).option; } Path out = new Path(line.getOptionValue("dir")); int parallel = scale; if(line.hasOption("parallel")) { parallel = Integer.parseInt(line.getOptionValue("parallel")); } if(parallel == 1 || scale == 1) { System.err.println("The MR task does not work for scale=1 or parallel=1"); return 1; } Path in = genInput(table, scale, parallel); Path dbgen = copyJar(new File("target/lib/dbgen.jar")); URI dsuri = dbgen.toUri(); URI link = new URI(dsuri.getScheme(), dsuri.getUserInfo(), dsuri.getHost(), dsuri.getPort(),dsuri.getPath(), dsuri.getQuery(),"dbgen"); Configuration conf = getConf(); conf.setInt("mapred.task.timeout",0); conf.setInt("mapreduce.task.timeout",0); DistributedCache.addCacheArchive(link, conf); Job job = new Job(conf, "GenTable+"+table+"_"+scale); job.setJarByClass(getClass()); job.setNumReduceTasks(0); job.setMapperClass(dbgen.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.setNumLinesPerSplit(job, 1); FileInputFormat.addInputPath(job, in); FileOutputFormat.setOutputPath(job, out); // use multiple output to only write the named files LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); MultipleOutputs.addNamedOutput(job, "text", TextOutputFormat.class, LongWritable.class, Text.class); if (line.hasOption("snappy") || (line.hasOption("text") == false)) { TextOutputFormat.setCompressOutput(job, true); if (line.hasOption("snappy")) { TextOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); } else { TextOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); } } boolean success = job.waitForCompletion(true); // cleanup FileSystem fs = FileSystem.get(getConf()); fs.delete(in, false); fs.delete(dbgen, false); return 0; } public Path copyJar(File jar) throws Exception { MessageDigest md = MessageDigest.getInstance("MD5"); InputStream is = new FileInputStream(jar); try { is = new DigestInputStream(is, md); // read stream to EOF as normal... } finally { is.close(); } BigInteger md5 = new BigInteger(md.digest()); String md5hex = md5.toString(16); Path dst = new Path(String.format("/tmp/%s.jar",md5hex)); Path src = new Path(jar.toURI()); FileSystem fs = FileSystem.get(getConf()); fs.copyFromLocalFile(false, /*overwrite*/true, src, dst); return dst; } public Path genInput(String table, int scale, int parallel) throws Exception { long epoch = System.currentTimeMillis()/1000; Path in = new Path("/tmp/"+table+"_"+scale+"-"+epoch); FileSystem fs = FileSystem.get(getConf()); FSDataOutputStream out = fs.create(in); for(int i = 1; i <= parallel; i++) { if(table.equals("all")) { out.writeBytes(String.format("$DIR/dbgen/tools/dbgen -b $DIR/dbgen/tools/dists.dss -f -s %d -C %d -S %d\n", scale, parallel, i)); } else { out.writeBytes(String.format("$DIR/dbgen/tools/dbgen -b $DIR/dbgen/tools/dists.dss -f -s %d -C %d -S %d -T %s\n", scale, parallel, i, table)); } } out.close(); return in; } static String readToString(InputStream in) throws IOException { InputStreamReader is = new InputStreamReader(in); StringBuilder sb=new StringBuilder(); BufferedReader br = new BufferedReader(is); String read = br.readLine(); while(read != null) { //System.out.println(read); sb.append(read); read =br.readLine(); } return sb.toString(); } static final class dbgen extends Mapper { private MultipleOutputs mos; protected void setup(Context context) throws IOException { mos = new MultipleOutputs(context); } protected void cleanup(Context context) throws IOException, InterruptedException { mos.close(); } protected void map(LongWritable offset, Text command, Mapper.Context context) throws IOException, InterruptedException { String parallel="1"; String child="1"; String[] cmd = command.toString().split(" "); for(int i=0; i/dev/null fi } if [ ! -f tpch-gen/target/tpch-gen-1.0-SNAPSHOT.jar ]; then echo "Please build the data generator with ./tpch-build.sh first" exit 1 fi which hive > /dev/null 2>&1 if [ $? -ne 0 ]; then echo "Script must be run where Hive is installed" exit 1 fi # Tables in the TPC-H schema. TABLES="part partsupp supplier customer orders lineitem nation region" # Get the parameters. SCALE=$1 DIR=$2 BUCKETS=13 if [ "X$DEBUG_SCRIPT" != "X" ]; then set -x fi # Sanity checking. if [ X"$SCALE" = "X" ]; then usage fi if [ X"$DIR" = "X" ]; then DIR=/tmp/tpch-generate fi if [ $SCALE -eq 1 ]; then echo "Scale factor must be greater than 1" exit 1 fi # Do the actual data load. hdfs dfs -mkdir -p ${DIR} hdfs dfs -ls ${DIR}/${SCALE}/lineitem > /dev/null if [ $? -ne 0 ]; then echo "Generating data at scale factor $SCALE." (cd tpch-gen; hadoop jar target/*.jar -d ${DIR}/${SCALE}/ -s ${SCALE}) fi hdfs dfs -ls ${DIR}/${SCALE}/lineitem > /dev/null if [ $? -ne 0 ]; then echo "Data generation failed, exiting." exit 1 fi echo "TPC-H text data generation complete." # Create the text/flat tables as external tables. These will be later be converted to ORCFile. echo "Loading text data into external tables." runcommand "hive -i settings/load-flat.sql -f ddl-tpch/bin_flat/alltables.sql -d DB=tpch_text_${SCALE} -d LOCATION=${DIR}/${SCALE}" # Create the optimized tables. i=1 total=8 if test $SCALE -le 1000; then SCHEMA_TYPE=flat else SCHEMA_TYPE=partitioned fi DATABASE=tpch_${SCHEMA_TYPE}_orc_${SCALE} MAX_REDUCERS=2600 # ~7 years of data REDUCERS=$((test ${SCALE} -gt ${MAX_REDUCERS} && echo ${MAX_REDUCERS}) || echo ${SCALE}) for t in ${TABLES} do echo "Optimizing table $t ($i/$total)." COMMAND="hive -i settings/load-${SCHEMA_TYPE}.sql -f ddl-tpch/bin_${SCHEMA_TYPE}/${t}.sql \ -d DB=${DATABASE} \ -d SOURCE=tpch_text_${SCALE} -d BUCKETS=${BUCKETS} \ -d SCALE=${SCALE} -d REDUCERS=${REDUCERS} \ -d FILE=orc" runcommand "$COMMAND" if [ $? -ne 0 ]; then echo "Command failed, try 'export DEBUG_SCRIPT=ON' and re-running" exit 1 fi i=`expr $i + 1` done hive -i settings/load-${SCHEMA_TYPE}.sql -f ddl-tpch/bin_${SCHEMA_TYPE}/analyze.sql --database ${DATABASE}; echo "Data loaded into database ${DATABASE}."