SELECT * FROM bigquery-public-data.samples.shakespeare Limit 10
Enable GCP Shell
gcloud auth login
$ bq ls
$ bq ls publicdata:samples
$ bq mk spikeysales_dataset
$ bq ls
$ bq rm spikeysales_dataset
$ bq show publicdata:samples
$ bq show publicdata:samples.shakespeare
$ bq query “SELECT * FROM publicdata:samples.shakespeare LIMIT 10”
Creating and Editing Access to a Dataset
bq show –format=prettyjson spikeysales_dataset > dataset_spikeysales.json
bq update –source dataset_spikeysales.json spikeysales_dataset
bq show –format=prettyjson spikeysales_dataset
Creating and Querying Ingestion Time Partitioned Tables
bq show –schema –format=prettyjson spikeysales_dataset.e_commerce_data
[
{
“mode”: “NULLABLE”,
“name”: “RowID”,
“type”: “INTEGER”
},
{
“mode”: “NULLABLE”,
“name”: “OrderID”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “OrderDate”,
“type”: “DATE”
},
{
“mode”: “NULLABLE”,
“name”: “ShipDate”,
“type”: “DATE”
},
{
“mode”: “NULLABLE”,
“name”: “ShipMode”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “CustomerID”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “CustomerName”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Segment”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Address_Country”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Address_City”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Address_State”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Address_Postal_Code”,
“type”: “INTEGER”
},
{
“mode”: “NULLABLE”,
“name”: “Address_Region”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Product_ID”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Category”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Sub_Category”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Product_Name”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Sales”,
“type”: “FLOAT”
},
{
“mode”: “NULLABLE”,
“name”: “Quantity”,
“type”: “INTEGER”
},
{
“mode”: “NULLABLE”,
“name”: “Discount”,
“type”: “FLOAT”
},
{
“mode”: “NULLABLE”,
“name”: “Profit”,
“type”: “FLOAT”
},
{
“mode”: “NULLABLE”,
“name”: “row_number”,
“type”: “INTEGER”
}
]
Creating Column Based Partitioned Tables Using the Command Line
bq mk –table –schema OrderID:STRING,OrderDate:DATE,Quantity:INTEGER –time_partitioning_field OrderDate spikey-bq-279714:spikeysales_dataset.order_data
bq –location=US query –nouse_legacy_sql –destination_table=spikey-bq-279714:spikeysales_dataset.order_data ‘SELECT OrderID,OrderDate,Quantity FROM spikeysales_dataset.e_commerce_data WHERE OrderDate is NOT NULL LIMIT 1000’
bq show spikeysales_dataset.order_data
bq –location=US query –nouse_legacy_sql ‘SELECT * FROM `spikey-bq-279714.spikeysales_dataset.order_data` WHERE Orderdate=DATE(“2014-11-01”)’
Populating Data into a Table with Nested Fields Using STRUCT
bq show –schema –format=prettyjson spikeysales_dataset.e_commerce_partitioned
[
{
“mode”: “NULLABLE”,
“name”: “RowID”,
“type”: “INTEGER”
},
{
“mode”: “NULLABLE”,
“name”: “OrderID”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “OrderDate”,
“type”: “DATE”
},
{
“mode”: “NULLABLE”,
“name”: “ShipDate”,
“type”: “DATE”
},
{
“mode”: “NULLABLE”,
“name”: “ShipMode”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “CustomerID”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “CustomerName”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Segment”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Address_Country”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Address_City”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Address_State”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Address_Postal_Code”,
“type”: “INTEGER”
},
{
“mode”: “NULLABLE”,
“name”: “Address_Region”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Product_ID”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Category”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Sub_Category”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Product_Name”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Sales”,
“type”: “FLOAT”
},
{
“mode”: “NULLABLE”,
“name”: “Quantity”,
“type”: “INTEGER”
},
{
“mode”: “NULLABLE”,
“name”: “Discount”,
“type”: “FLOAT”
},
{
“mode”: “NULLABLE”,
“name”: “Profit”,
“type”: “FLOAT”
},
{
“mode”: “NULLABLE”,
“name”: “row_number”,
“type”: “INTEGER”
}
]
Design Schema with STRUCT
[
{
“mode”: “NULLABLE”,
“name”: “RowID”,
“type”: “INTEGER”
},
{
“mode”: “NULLABLE”,
“name”: “OrderInfo”,
“type”: “RECORD”,
“fields”: [
{
“mode”: “NULLABLE”,
“name”: “OrderID”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “OrderDate”,
“type”: “DATE”
},
{
“mode”: “NULLABLE”,
“name”: “Sales”,
“type”: “FLOAT”
},
{
“mode”: “NULLABLE”,
“name”: “Quantity”,
“type”: “INTEGER”
}
]
},
{
“mode”: “NULLABLE”,
“name”: “ShipDate”,
“type”: “DATE”
},
{
“mode”: “NULLABLE”,
“name”: “ShipMode”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “CustomerID”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “CustomerName”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Segment”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Address”,
“type”: “RECORD”,
“fields”: [
{
“mode”: “NULLABLE”,
“name”: “Country”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “City”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “State”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Postal_Code”,
“type”: “INTEGER”
},
{
“mode”: “NULLABLE”,
“name”: “Region”,
“type”: “STRING”
}
]
},
{
“mode”: “NULLABLE”,
“name”: “Product_ID”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Category”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Sub_Category”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Product_Name”,
“type”: “STRING”
},
{
“mode”: “NULLABLE”,
“name”: “Discount”,
“type”: “FLOAT”
},
{
“mode”: “NULLABLE”,
“name”: “Profit”,
“type”: “FLOAT”
},
{
“mode”: “NULLABLE”,
“name”: “row_number”,
“type”: “INTEGER”
}
]
Populate Data from existing Table
INSERT INTO
`spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`
(ROWID,
OrderInfo,
ShipDate,
ShipMode,
CustomerID,
CustomerName,
Segment,
Address,
Product_ID,
Category,
Sub_Category,
Product_Name,
Discount,
Profit )
SELECT
RowID,
STRUCT (OrderID, OrderDate, Sales, Quantity),
ShipDate,
ShipMode,
CustomerID,
CustomerName,
Segment,
STRUCT (Address_Country,
Address_City,
Address_State,
Address_Postal_Code,
Address_Region),
Product_ID,
Category,
Sub_Category,
Product_Name,
Discount,
Profit
FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_partitioned`
Working with Repeated Fields
INSERT INTO
`spikey-bq-279714.spikeysales_dataset.customer_order_details`
(customer_name,order_id, product_names)
VALUES
(“John”, 434, [‘Trousers’, ‘Notebook’, ‘Football’,’Avocado’])
Populating Tables with Repeated Fields Using ARRAY_AGG
INSERT INTO
`spikey-bq-279714.spikeysales_dataset.customer_product_price` (
CustomerID,
Products_Bought,
Price_Paid )
SELECT
CustomerID,
ARRAY_AGG(Product_Name) AS Products_Bought,
ARRAY_AGG(Sales) AS Price_Paid
FROM
`spikey-bq-279714.spikeysales_dataset.e_commerce_data`
GROUP BY
CustomerID
Using Nested and Repeated Fields Together
SELECT
Address_City,
ARRAY_AGG (STRUCT (CustomerID,
CustomerName)) AS CustomerInfo
FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data`
GROUP BY
Address_City
ORDER BY
Address_City
SELECT
Address_State,
ARRAY_AGG (STRUCT (OrderDate,
OrderID,
Sales,
Quantity)) AS OrderInfoInfo
FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data`
GROUP BY
Address_State
ORDER BY
Address_State
Using UNNEST to Query Repeated Fields
SELECT CustomerID, Products
FROM `spikey-bq-279714.spikeysales_dataset.customer_product_price`,UNNEST(Products_Bought) as Products
LIMIT 1000
SELECT CustomerID, Products, Price
FROM `spikey-bq-279714.spikeysales_dataset.customer_product_price`,UNNEST(Products_Bought) as Products, UNNEST(Price_paid) as
Price
LIMIT 1000
SELECT
Address_State,
Orders
FROM `spikey-bq-279714.spikeysales_dataset.orders_by_state` ,
UNNEST( OrderInfoInfo ) as Orders
LIMIT 1000
Aggregations
SELECT
ANY_VALUE(OrderInfo) AS Any_Order
FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`
WHERE
Address.State = “California”
LIMIT 1000
SELECT
CustomerID, CustomerName, AVG( OrderInfo.Sales) AS Average_Price
FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`
GROUP BY
CustomerID, CustomerName
ORDER BY
Average_Price DESC
SELECT
CustomerID, CustomerName, COUNTIF( OrderInfo.Sales>100) AS Num_Over_100
FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`
GROUP BY
CustomerID, CustomerName
ORDER BY
Num_Over_100 DESC
SELECT
STRING_AGG(CustomerName)
FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`
SELECT
FORMAT(“%T”, ARRAY_AGG(Address.City)) AS Cities
FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`
SELECT
FORMAT(“%T”, ARRAY_AGG(DISTINCT Address.City)) AS Cities
FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`
SELECT
FORMAT(“%T”, ARRAY_AGG(Address.City LIMIT 5)) AS Cities
FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`
Subqueries
SELECT
r.OrderInfo.OrderID,
r.Product_ID,
r.CustomerName
FROM (
SELECT * FROM
`spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`) As r
ORDER BY
OrderInfo.OrderDate ASC
SELECT
r.segment,
SUM(r.sales)
FROM (
SELECT * FROM
`spikey-bq-279714.spikeysales_dataset.e_commerce_data`
WHERE Address_State =”California”) As r
GROUP BY
Segment
SELECT
Address_State,
SUM(sales) AS Total_Sales
FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data`
WHERE Address_State IN (
SELECT
Address_State
FROM
`spikey-bq-279714.spikeysales_dataset.e_commerce_data`
WHERE
Shipmode = ‘Same Day’)
GROUP BY Address_State
Performing Window Operations Using “Partition By” and “Order By”
SELECT
Segment, SUM(OrderInfo.Sales) OVER (PARTITION BY Segment)
FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`
SELECT DISTINCT
Segment, SUM(OrderInfo.Sales) OVER (PARTITION BY Segment)
FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`
SELECT CustomerID, CustomerName, OrderInfo.Sales,
Address.State,
RANK() OVER (PARTITION BY Address.State ORDER BY OrderInfo.Sales DESC)
FROM `spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`
SELECT
state,
city,
Total_sales,
PERCENT_RANK() OVER (PARTITION BY State ORDER BY Total_Sales ASC) Percentile_Rank
FROM (
SELECT
Address.State AS State,
Address.City AS City,
SUM(OrderInfo.Sales) AS Total_Sales
FROM
`spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`
GROUP BY
Address.State,
Address.City )
Windowing Operations Using a Window Range
SELECT
state,
city,
Total_sales,
SUM(Total_Sales) OVER (PARTITION BY State ORDER BY Total_Sales ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
FROM (
SELECT
Address.State AS State,
Address.City AS City,
SUM(OrderInfo.Sales) AS Total_Sales
FROM
`spikey-bq-279714.spikeysales_dataset.e_commerce_data_nested`
GROUP BY
Address.State,
Address.City )
datalab create bq-datalab-vm
One comment