From 0aa62de0850ee484fedbcc1c69e4959cdb32473b Mon Sep 17 00:00:00 2001 From: Ivan Ievlev <51958262+ivanievlev@users.noreply.github.com> Date: Tue, 16 Nov 2021 12:55:19 +0300 Subject: [PATCH 1/6] Set 10 for EVERY parameter in skewed branch --- 03_ddl/rollout.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/03_ddl/rollout.sh b/03_ddl/rollout.sh index b994f803..63e2100b 100755 --- a/03_ddl/rollout.sh +++ b/03_ddl/rollout.sh @@ -13,7 +13,8 @@ SINGLE_USER_ITERATIONS=$5 #luka multiplying qiantity of partitions with EVERY=1 parameter in DDL -#EVERY for web_returns was "180" +#EVERY is used for web_returns, it was "180" in a classic TPC-DS RunningJon +# Change it less than 180 to create more partitions EVERY="10" From 8f6efd57549f17b4c89a00a3ca8e4dcfafc93a6c Mon Sep 17 00:00:00 2001 From: Ivan Ievlev Date: Wed, 17 Nov 2021 16:12:16 +0300 Subject: [PATCH 2/6] Added skewed values for warehouse and inventory --- 04_load/009.gpdb.warehouse.sql | 16 ++++++++++++++++ 04_load/018.gpdb.inventory.sql | 1 + 2 files changed, 17 insertions(+) diff --git a/04_load/009.gpdb.warehouse.sql b/04_load/009.gpdb.warehouse.sql index 660903be..5ac29c38 100644 --- a/04_load/009.gpdb.warehouse.sql +++ b/04_load/009.gpdb.warehouse.sql @@ -1,2 +1,18 @@ TRUNCATE table tpcds.warehouse; INSERT INTO tpcds.warehouse SELECT * FROM ext_tpcds.warehouse; +insert into tpcds.warehouse select +777 as w_warehouse_sk +,'LUKAAAAAAAAAAAAA' as w_warehouse_id +,'Luka WH' as w_warehouse_name +,w_warehouse_sq_ft +,w_street_number +,w_street_name +,w_street_type +,w_suite_number +,w_city +,w_county +,w_state +,w_zip +,w_country +,w_gmt_offset +from tpcds.warehouse where w_warehouse_sk = 1; \ No newline at end of file diff --git a/04_load/018.gpdb.inventory.sql b/04_load/018.gpdb.inventory.sql index 5a67c92e..dda88393 100644 --- a/04_load/018.gpdb.inventory.sql +++ b/04_load/018.gpdb.inventory.sql @@ -1,2 +1,3 @@ TRUNCATE table tpcds.inventory; INSERT INTO tpcds.inventory SELECT * FROM ext_tpcds.inventory; +update tpcds.inventory set inv_warehouse_sk = 777 where inv_warehouse_sk < 10; \ No newline at end of file From becf9ef05d7ad7cd06430a9a1a5ac0b26cdee78f Mon Sep 17 00:00:00 2001 From: Ivan Ievlev Date: Wed, 17 Nov 2021 19:34:31 +0300 Subject: [PATCH 3/6] Partitioned test with EVERY 1 --- 03_ddl/012.gpdb.inventory.sql | 2 +- 03_ddl/018.gpdb.store_returns.sql | 2 +- 03_ddl/rollout.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/03_ddl/012.gpdb.inventory.sql b/03_ddl/012.gpdb.inventory.sql index 8eed7210..cae227d7 100644 --- a/03_ddl/012.gpdb.inventory.sql +++ b/03_ddl/012.gpdb.inventory.sql @@ -7,5 +7,5 @@ CREATE TABLE tpcds.inventory ( WITH (:LARGE_STORAGE) :DISTRIBUTED_BY partition by range(inv_date_sk) -(start(2450815) INCLUSIVE end(2453005) INCLUSIVE every (100), +(start(2450815) INCLUSIVE end(2453005) INCLUSIVE every (1), default partition others); diff --git a/03_ddl/018.gpdb.store_returns.sql b/03_ddl/018.gpdb.store_returns.sql index cdbac222..2c196a49 100644 --- a/03_ddl/018.gpdb.store_returns.sql +++ b/03_ddl/018.gpdb.store_returns.sql @@ -23,5 +23,5 @@ CREATE TABLE tpcds.store_returns ( WITH (:MEDIUM_STORAGE) :DISTRIBUTED_BY partition by range(sr_returned_date_sk) -(start(2450815) INCLUSIVE end(2453005) INCLUSIVE every (100), +(start(2450815) INCLUSIVE end(2453005) INCLUSIVE every (1), default partition others); diff --git a/03_ddl/rollout.sh b/03_ddl/rollout.sh index 63e2100b..31add74a 100755 --- a/03_ddl/rollout.sh +++ b/03_ddl/rollout.sh @@ -15,7 +15,7 @@ SINGLE_USER_ITERATIONS=$5 #luka multiplying qiantity of partitions with EVERY=1 parameter in DDL #EVERY is used for web_returns, it was "180" in a classic TPC-DS RunningJon # Change it less than 180 to create more partitions -EVERY="10" +EVERY="1" From 25aca0655d6b35ef48de5f3a5871571c440d6a44 Mon Sep 17 00:00:00 2001 From: Ivan Ievlev Date: Wed, 17 Nov 2021 17:30:14 +0300 Subject: [PATCH 4/6] Added variable REPO_BRANCH --- tpcds.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tpcds.sh b/tpcds.sh index 2e1d5f77..2aef41ae 100755 --- a/tpcds.sh +++ b/tpcds.sh @@ -27,6 +27,11 @@ check_variables() echo "REPO_URL=\"https://github.com/ivanievlev/TPC-DS\"" >> $MYVAR new_variable=$(($new_variable + 1)) fi + local count=$(grep "REPO_BRANCH=" $MYVAR | wc -l) + if [ "$count" -eq "0" ]; then + echo "REPO_BRANCH=\"master\"" >> $MYVAR + new_variable=$(($new_variable + 1)) + fi local count=$(grep "ADMIN_USER=" $MYVAR | wc -l) if [ "$count" -eq "0" ]; then echo "ADMIN_USER=\"gpadmin\"" >> $MYVAR @@ -233,7 +238,7 @@ repo_init() if [ "$internet_down" -eq "0" ]; then git config --global user.email "$ADMIN_USER@$HOSTNAME" git config --global user.name "$ADMIN_USER" - su -c "cd $INSTALL_DIR/$REPO; GIT_SSL_NO_VERIFY=true; git fetch --all; git reset --hard origin/master" $ADMIN_USER + su -c "cd $INSTALL_DIR/$REPO; GIT_SSL_NO_VERIFY=true; git checkout $REPO_BRANCH; git fetch --all; git reset --hard" $ADMIN_USER fi fi } From e4a916178d68aab4ac4868894454fea1d851af8f Mon Sep 17 00:00:00 2001 From: Ivan Ievlev Date: Wed, 17 Nov 2021 22:55:15 +0300 Subject: [PATCH 5/6] Remove depth from git clone --- tpcds.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpcds.sh b/tpcds.sh index 2aef41ae..a10963b8 100755 --- a/tpcds.sh +++ b/tpcds.sh @@ -232,7 +232,7 @@ repo_init() echo "-------------------------------------------------------------------------" mkdir $INSTALL_DIR/$REPO chown $ADMIN_USER $INSTALL_DIR/$REPO - su -c "cd $INSTALL_DIR; GIT_SSL_NO_VERIFY=true; git clone --depth=1 $REPO_URL" $ADMIN_USER + su -c "cd $INSTALL_DIR; GIT_SSL_NO_VERIFY=true; git clone $REPO_URL" $ADMIN_USER fi else if [ "$internet_down" -eq "0" ]; then From ce9c7261b5c075ea8e17c2ce4eaf667fd3b6c84f Mon Sep 17 00:00:00 2001 From: Ivan Ievlev Date: Wed, 17 Nov 2021 23:03:43 +0300 Subject: [PATCH 6/6] Added checkout Repo_Branch --- tpcds.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpcds.sh b/tpcds.sh index a10963b8..4df517d9 100755 --- a/tpcds.sh +++ b/tpcds.sh @@ -232,7 +232,7 @@ repo_init() echo "-------------------------------------------------------------------------" mkdir $INSTALL_DIR/$REPO chown $ADMIN_USER $INSTALL_DIR/$REPO - su -c "cd $INSTALL_DIR; GIT_SSL_NO_VERIFY=true; git clone $REPO_URL" $ADMIN_USER + su -c "cd $INSTALL_DIR; GIT_SSL_NO_VERIFY=true; git clone $REPO_URL; git checkout $REPO_BRANCH;" $ADMIN_USER fi else if [ "$internet_down" -eq "0" ]; then