Week of June 11 Detailed Log

SELECT * FROM bigquery-public-data.samples.shakespeare Limit 10

Enable GCP Shell

gcloud auth login

$ bq ls

$ bq ls publicdata:samples

$ bq mk spikeysales_dataset

$ bq ls

$ bq rm spikeysales_dataset

$ bq show publicdata:samples

$ bq show publicdata:samples.shakespeare

$  bq query “SELECT * FROM  publicdata:samples.shakespeare LIMIT 10”

Creating and Editing Access to a Dataset

bq show –format=prettyjson spikeysales_dataset > dataset_spikeysales.json

bq update –source dataset_spikeysales.json spikeysales_dataset

bq show –format=prettyjson spikeysales_dataset

Creating and Querying Ingestion Time Partitioned Tables

bq show –schema –format=prettyjson spikeysales_dataset.e_commerce_data

[

  {

    “mode”: “NULLABLE”,

    “name”: “RowID”,

    “type”: “INTEGER”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “OrderID”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “OrderDate”,

    “type”: “DATE”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “ShipDate”,

    “type”: “DATE”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “ShipMode”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “CustomerID”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “CustomerName”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Segment”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Address_Country”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Address_City”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Address_State”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Address_Postal_Code”,

    “type”: “INTEGER”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Address_Region”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Product_ID”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Category”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Sub_Category”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Product_Name”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Sales”,

    “type”: “FLOAT”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Quantity”,

    “type”: “INTEGER”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Discount”,

    “type”: “FLOAT”

  },

 {

    “mode”: “NULLABLE”,

    “name”: “Profit”,

    “type”: “FLOAT”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “row_number”,

    “type”: “INTEGER”

  }

]

Creating Column Based Partitioned Tables Using the Command Line

bq mk –table –schema OrderID:STRING,OrderDate:DATE,Quantity:INTEGER –time_partitioning_field OrderDate spikey-bq-279714:spikeysales_dataset.order_data

bq –location=US query –nouse_legacy_sql –destination_table=spikey-bq-279714:spikeysales_dataset.order_data ‘SELECT OrderID,OrderDate,Quantity FROM spikeysales_dataset.e_commerce_data WHERE OrderDate is NOT NULL LIMIT 1000’

bq show spikeysales_dataset.order_data

bq –location=US query –nouse_legacy_sql ‘SELECT * FROM `spikey-bq-279714.spikeysales_dataset.order_data` WHERE Orderdate=DATE(“2014-11-01”)’

Populating Data into a Table with Nested Fields Using STRUCT

bq show –schema –format=prettyjson spikeysales_dataset.e_commerce_partitioned

[

  {

    “mode”: “NULLABLE”,

    “name”: “RowID”,

    “type”: “INTEGER”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “OrderID”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “OrderDate”,

    “type”: “DATE”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “ShipDate”,

    “type”: “DATE”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “ShipMode”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “CustomerID”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “CustomerName”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Segment”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Address_Country”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Address_City”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Address_State”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Address_Postal_Code”,

    “type”: “INTEGER”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Address_Region”,

    “type”: “STRING”

  },

{

    “mode”: “NULLABLE”,

    “name”: “Product_ID”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Category”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Sub_Category”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Product_Name”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Sales”,

    “type”: “FLOAT”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Quantity”,

    “type”: “INTEGER”

  },

 {

    “mode”: “NULLABLE”,

    “name”: “Discount”,

    “type”: “FLOAT”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Profit”,

    “type”: “FLOAT”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “row_number”,

    “type”: “INTEGER”

  }

]

Design Schema with STRUCT

[

  {

    “mode”: “NULLABLE”,

    “name”: “RowID”,

    “type”: “INTEGER”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “OrderInfo”,

    “type”: “RECORD”,

    “fields”: [ 

      {

        “mode”: “NULLABLE”,

        “name”: “OrderID”,

        “type”: “STRING”

      },

      {

        “mode”: “NULLABLE”,

        “name”: “OrderDate”,

        “type”: “DATE”

      },

      {

        “mode”: “NULLABLE”,

        “name”: “Sales”,

        “type”: “FLOAT”

      },

      {

        “mode”: “NULLABLE”,

        “name”: “Quantity”,

        “type”: “INTEGER”

      }

              ]

  },

  {

    “mode”: “NULLABLE”,

    “name”: “ShipDate”,

    “type”: “DATE”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “ShipMode”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “CustomerID”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “CustomerName”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Segment”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Address”,

    “type”: “RECORD”,

    “fields”: [ 

       {

          “mode”: “NULLABLE”,

          “name”: “Country”,

          “type”: “STRING”

       },

       {

           “mode”: “NULLABLE”,

           “name”: “City”,

           “type”: “STRING”

       },

       {

           “mode”: “NULLABLE”,

           “name”: “State”,

           “type”: “STRING”

       },

       {

           “mode”: “NULLABLE”,

           “name”: “Postal_Code”,

           “type”: “INTEGER”

       },

       {

           “mode”: “NULLABLE”,

           “name”: “Region”,

           “type”: “STRING”

       }

                 ]

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Product_ID”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Category”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Sub_Category”,

    “type”: “STRING”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Product_Name”,

    “type”: “STRING”

  },

 {

    “mode”: “NULLABLE”,

    “name”: “Discount”,

    “type”: “FLOAT”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “Profit”,

    “type”: “FLOAT”

  },

  {

    “mode”: “NULLABLE”,

    “name”: “row_number”,

    “type”: “INTEGER”

  }

]

Populate Data from existing Table

INSERT INTO

   `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested` 

   (ROWID,

    OrderInfo, 

    ShipDate, 

    ShipMode, 

    CustomerID, 

    CustomerName, 

    Segment, 

    Address, 

    Product_ID, 

    Category, 

    Sub_Category, 

    Product_Name, 

    Discount, 

    Profit )

SELECT 

RowID, 

STRUCT (OrderID, OrderDate, Sales, Quantity),

ShipDate, 

ShipMode, 

CustomerID, 

CustomerName, 

Segment, 

STRUCT (Address_Country, 

       Address_City, 

       Address_State, 

       Address_Postal_Code, 

       Address_Region), 

Product_ID, 

Category, 

Sub_Category, 

Product_Name, 

Discount, 

Profit

FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_partitioned` 

Working with Repeated Fields

INSERT INTO 

`spikey-bq-279714.spikeysales_dataset.customer_order_details`

(customer_name,order_id, product_names)

VALUES

  (“John”, 434, [‘Trousers’, ‘Notebook’, ‘Football’,’Avocado’])

Populating Tables with Repeated Fields Using ARRAY_AGG

INSERT INTO

`spikey-bq-279714.spikeysales_dataset.customer_product_price` (

CustomerID,

Products_Bought,

Price_Paid )

SELECT

CustomerID,

ARRAY_AGG(Product_Name) AS Products_Bought,

ARRAY_AGG(Sales) AS Price_Paid

FROM

`spikey-bq-279714.spikeysales_dataset.e_commerce_data`

GROUP BY

CustomerID 

Using Nested and Repeated Fields Together

SELECT 

Address_City,

ARRAY_AGG (STRUCT (CustomerID,

CustomerName)) AS CustomerInfo

FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data`

GROUP BY

  Address_City

ORDER BY

  Address_City

SELECT 

Address_State,

ARRAY_AGG (STRUCT (OrderDate, 

OrderID,

Sales,

Quantity)) AS OrderInfoInfo

FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data`

GROUP BY

  Address_State

ORDER BY

  Address_State

Using UNNEST to Query Repeated Fields

SELECT CustomerID, Products

FROM `spikey-bq-279714.spikeysales_dataset.customer_product_price`,UNNEST(Products_Bought) as Products

LIMIT 1000

SELECT CustomerID, Products, Price

FROM `spikey-bq-279714.spikeysales_dataset.customer_product_price`,UNNEST(Products_Bought) as Products, UNNEST(Price_paid) as

Price

LIMIT 1000

SELECT 

  Address_State,

  Orders

FROM `spikey-bq-279714.spikeysales_dataset.orders_by_state` ,

UNNEST( OrderInfoInfo ) as Orders

LIMIT 1000 

Aggregations

SELECT 

ANY_VALUE(OrderInfo) AS Any_Order

FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`

WHERE 

Address.State = “California”

LIMIT 1000

SELECT 

CustomerID, CustomerName, AVG( OrderInfo.Sales) AS Average_Price

FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`

GROUP BY

CustomerID, CustomerName

ORDER BY 

Average_Price DESC

SELECT 

CustomerID, CustomerName, COUNTIF( OrderInfo.Sales>100) AS Num_Over_100

FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`

GROUP BY

CustomerID, CustomerName

ORDER BY 

Num_Over_100 DESC

SELECT 

STRING_AGG(CustomerName)

FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`

SELECT 

FORMAT(“%T”, ARRAY_AGG(Address.City)) AS Cities

FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`

SELECT 

FORMAT(“%T”, ARRAY_AGG(DISTINCT Address.City)) AS Cities

FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`

SELECT 

FORMAT(“%T”, ARRAY_AGG(Address.City LIMIT 5)) AS Cities

FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`

Subqueries

SELECT 

r.OrderInfo.OrderID, 

r.Product_ID, 

r.CustomerName

FROM (

  SELECT * FROM

 `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`) As r

 ORDER BY

  OrderInfo.OrderDate ASC

SELECT 

r.segment,

SUM(r.sales)

FROM (

  SELECT * FROM

 `spikey-bq-279714.spikeysales_dataset.e_commerce_data` 

  WHERE Address_State =”California”) As r

 GROUP BY

  Segment

SELECT 

Address_State,

SUM(sales) AS Total_Sales

FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data` 

  WHERE Address_State IN (

  SELECT 

   Address_State

   FROM 

   `spikey-bq-279714.spikeysales_dataset.e_commerce_data` 

   WHERE

   Shipmode = ‘Same Day’) 

  GROUP BY Address_State 

Performing Window Operations Using “Partition By” and “Order By”

SELECT 

Segment, SUM(OrderInfo.Sales) OVER (PARTITION BY Segment)

FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested` 

SELECT DISTINCT

Segment, SUM(OrderInfo.Sales) OVER (PARTITION BY Segment)

FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested` 

SELECT CustomerID, CustomerName, OrderInfo.Sales,

Address.State,

RANK() OVER (PARTITION BY Address.State ORDER BY OrderInfo.Sales DESC)

FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested` 

SELECT 

state,

city,

Total_sales,

PERCENT_RANK() OVER (PARTITION BY State ORDER BY Total_Sales ASC) Percentile_Rank

FROM (

SELECT

Address.State AS State, 

Address.City AS City, 

SUM(OrderInfo.Sales) AS Total_Sales

FROM

`spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested` 

GROUP BY

Address.State,

Address.City )

Windowing Operations Using a Window Range

SELECT 

state,

city,

Total_sales,

SUM(Total_Sales) OVER (PARTITION BY State ORDER BY Total_Sales ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) 

FROM (

SELECT

Address.State AS State, 

Address.City AS City, 

SUM(OrderInfo.Sales) AS Total_Sales

FROM

`spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested` 

GROUP BY

Address.State,

Address.City )

datalab create bq-datalab-vm

One comment

  1. Pingback: Week of June 12th | SQL Squirrels

Leave a Reply