The reason people use SQL is because you can run short queries like this:
Example Query:
$ SELECT country customer order 'sum{{price*quantity}}' --FROM cust_order_d.py --WHERE 'price*quantity>0' --GROUP_BY country,customer,order --HAVING 'sum{{price*quantity}}>0' --ORDER_BY 'sum{{price*quantity}}'
And get the answer without writing too much code...
Example INPUT cust_order_d.py with python or simple JASON
[
{'country': 'NZ', 'customer': 'Andrew', 'item': 'Apples', 'order': 1, 'price': 5.0, 'quantity': 2.0},
{'country': 'NZ', 'customer': 'Andrew', 'item': 'Bananas', 'order': 1, 'price': 1.0, 'quantity': 5.0},
{'country': 'NZ', 'customer': 'Andrew', 'item': 'Carrots', 'order': 2, 'price': 2.5, 'quantity': 2.0},
{'country': 'NZ', 'customer': 'Brenda', 'item': 'Apples', 'order': 3, 'price': 5.0, 'quantity': 1.0},
{'country': 'NZ', 'customer': 'Brenda', 'item': 'Banana', 'order': 3, 'price': 1.0, 'quantity': 1.0},
{'country': 'NZ', 'customer': 'Brenda', 'item': 'Apples', 'order': 4, 'price': 5.0, 'quantity': 1.0},
{'country': 'NZ', 'customer': 'Brenda', 'item': 'Carrots', 'order': 4, 'price': 2.5, 'quantity': 1.0},
{'country': 'AU', 'customer': 'Carol', 'item': 'Apples', 'order': 5, 'price': 5.0, 'quantity': 3.0},
{'country': 'AU', 'customer': 'Carol', 'item': 'Bananas', 'order': 5, 'price': 1.0, 'quantity': 6.0},
{'country': 'AU', 'customer': 'Carol', 'item': 'Carrots', 'order': 6, 'price': 2.5, 'quantity': 4.0}
]
Example Output:
[[('country', 'NZ'), ('customer', 'Andrew'), ('order', 2), ("sum(agg['price*quantity'])", 5.0)],
[('country', 'NZ'), ('customer', 'Brenda'), ('order', 3), ("sum(agg['price*quantity'])", 6.0)],
[('country', 'NZ'), ('customer', 'Brenda'), ('order', 4), ("sum(agg['price*quantity'])", 7.5)],
[('country', 'AU'), ('customer', 'Carol'), ('order', 6), ("sum(agg['price*quantity'])", 10.0)],
[('country', 'NZ'), ('customer', 'Andrew'), ('order', 1), ("sum(agg['price*quantity'])", 15.0)],
[('country', 'AU'), ('customer', 'Carol'), ('order', 5), ("sum(agg['price*quantity'])", 21.0)]]
Note that the order sub-total as correctly calculated and sorted on the right hand side...
And that didn't even hurt... :-)
This is only Alpha code, but I am happy to release the code under the following license...
Here is the Code:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
end=fi=od=done=esac=yrt=htiw=fed=ssalc=lambda *skip: skip # ¢ replace Bourne's "done" with "od" & add 2¢
import re,time,os,sys,time,string,pprint,pdb
pp=pprint.pprint; width=132
debug=False
from itertools import *
from collections import *
def get_group_by_func(*field_l):
def out(record_d):
return [ record_d[field] if isinstance(field,str) else field(record_d) for field in field_l ]
return out
def get_order_by_func(*field_l):
key_func=get_group_by_func(*field_l)
return lambda a,b: cmp(key_func(a), key_func(b))
def is_solo_arg(arg):
return isinstance(arg,(str,unicode))
class PathDict(object):
def __init__(self,local_d_l=[],global_d={}):
self.local_d_l=local_d_l # .copy()???
if global_d: self.local_d_l+=global_d,
def __getitem__(self,key):
for d in self.local_d_l:
try: return d[key]
except KeyError as error: pass
else: raise
#get=__getitem__
re_var_name=re.compile("^[a-z_][a-z0-9_]*$",re.IGNORECASE)
def threaded_eval(code_l,global_d,local_d_l):
#for code in code_l: yield eval(code,global_d, local_d_l)
try:
return [
(PathDict(local_d_l=local_d_l,global_d=global_d)[code]
if re_var_name.match(code)
else eval(code,global_d,PathDict(local_d_l=local_d_l)))
for code in code_l ]
return [ eval(code,global_d,PathDict(local_d_l)) for code in code_l ]
except Exception as error:
pp((error, code_l, local_d_l[:-2]), width=width)
print('TRACE mode: enter "c" to continue');pdb.set_trace()
def get_agg_dict(key_l, value_l_g):
if False and len(key_l)==1: # False? <= sometime the same agg is used in 2 of SHO :-(
return {key_l[0]:(value_l[0] for value_l in value_l_g)}
else: # ideally this next line would use pythreading
return dict(zip(key_l,zip(*list(value_l_g))))
def trace_group_by(gen_d, group_func):
out_l=[]
for enum, d in enumerate(gen_d):
k=group_func(d)
if not enum or k==prev_k:
out_l+=d,
else:
yield prev_k, out_l
out_l=[d]
prev_k=k
if out_l: yield prev_k, out_l
group_by=groupby
re_parse_middle_expr=re.compile(r"{{((?:[^{}]|}[^}]|{[^{])*)}}") # {{...}} to look up a agg
def parse_agg_expr(expr):
tok_l=re_parse_middle_expr.split(expr)
inner_l=tok_l[1::2]
tok_l[1::2]=["(agg[%r])"%inner for inner in inner_l]
inner_d=OrderedDict(zip(inner_l,tok_l[1::2]))
outer="".join(tok_l)
return outer,inner_d
def open_from(from_name):
if not isinstance(from_name,(unicode,str)):
return from_name
else:
if from_name.endswith(".py"):
return eval("".join(open(from_name,"r")),globals())
elif from_name=="-":
return sys.stdin
def gen_d_from_csv(col_key_l, csv_str, sep=",", conv_d={}):
for line in csv_str.splitlines():
if line:
out=OrderedDict(zip(col_key_l,line.split(sep)))
for key_l,conv_t in conv_d.items():
for key in key_l.split():
out[key]=conv_t(out[key])
yield out
def QUERY(SELECT=None,FROM=None,WHERE=None,GROUP_BY=None,HAVING=None,ORDER_BY=None):
if is_solo_arg(SELECT): SELECT=SELECT.split(",")
if is_solo_arg(GROUP_BY): GROUP_BY=GROUP_BY.split(",")
if not HAVING: HAVING=[]
if is_solo_arg(HAVING): HAVING=HAVING.split(",")
if not ORDER_BY: ORDER_BY=[]
if is_solo_arg(ORDER_BY): ORDER_BY=ORDER_BY.split(",")
if not ORDER_BY:
ORDER_BY=[]
else:
shared_eval_d_l=[]
SELECT=list(SELECT)
FROM=list(open_from(FROM))
if debug: pp((dict(
SELECT=SELECT,
FROM=FROM,
WHERE=WHERE,
GROUP_BY=GROUP_BY,
HAVING=HAVING,
ORDER_BY=ORDER_BY,
)),width=width)
if WHERE: record_d_where_g=( record_d for record_d in FROM if eval(WHERE,globals(), record_d) )
else: record_d_where_g=FROM
SHO=dict(SELECT=SELECT, HAVING=HAVING, ORDER_BY=ORDER_BY,)
outer_SHO_code_d=OrderedDict()
inner_SHO_agg_code_d=OrderedDict()
for keyword,expr_l in SHO.items():
for enum,expr in enumerate(expr_l):
outer,inner_d=parse_agg_expr(expr)
SHO[keyword][enum]=outer
outer_SHO_code_d[outer]=expr
inner_SHO_agg_code_d.update(inner_d)
"""
Wikipedia: The clauses of a query have a particular order of
execution[7], which is denoted by the number on the right hand
side. It is as follows:
* SELECT <columns>: 5
* FROM <table>: 1
* WHERE <predicate on rows>: 2
* GROUP BY <columns>: 3
* HAVING <predicate on groups>: 4
* ORDER BY <columns>: 6
"""
if not GROUP_BY:
out_g=record_d_where_g
else:
group_by_func=get_group_by_func(*GROUP_BY)
for group_value_l, group_record_d_g in group_by(record_d_where_g, group_by_func):
group_d=dict(zip(GROUP_BY,group_value_l))
# Finally!!! aggregate...
inner_agg_val_l_g=(threaded_eval(code_l=inner_SHO_agg_code_d.keys(),global_d=globals(),local_d_l=[group_record_d])
for group_record_d in group_record_d_g )
inner_agg_val_l_of_agg_code=get_agg_dict(inner_SHO_agg_code_d.keys(), inner_agg_val_l_g)
shared_eval_d=dict(zip(outer_SHO_code_d.keys(),
threaded_eval(code_l=outer_SHO_code_d.keys(),
global_d=globals(), local_d_l=[group_d, dict(agg=inner_agg_val_l_of_agg_code)]))
)
if HAVING and not shared_eval_d[HAVING[0]]: continue
if ORDER_BY: shared_eval_d_l+=shared_eval_d, # note the ","
else: yield[(select,shared_eval_d[select]) for select in SELECT]
if ORDER_BY:
shared_eval_d_l.sort(get_order_by_func(*ORDER_BY))
for shared_eval_d in shared_eval_d_l:
yield[(select,shared_eval_d[select]) for select in SELECT]
#yield[shared_eval_d[select] for select in SELECT]
####################################################################
# Unit Test Section
####################################################################
col_key_l="customer country order item price quantity".split()
order_str="""
Andrew,NZ,1,Apples,5.00,2
Andrew,NZ,1,Bananas,1.00,5
Andrew,NZ,2,Carrots,2.50,2
Brenda,NZ,3,Apples,5.00,1
Brenda,NZ,3,Banana,1.00,1
Brenda,NZ,4,Apples,5.00,1
Brenda,NZ,4,Carrots,2.50,1
Carol,AU,5,Apples,5.00,3
Carol,AU,5,Bananas,1.00,6
Carol,AU,6,Carrots,2.50,4
"""
cust_order_product_detail_d_g=gen_d_from_csv(col_key_l, order_str, conv_d={"price quantity":float, "order":int})
OrderedDict=dict
def unittest_1a(): # an example of call from another module
ans=QUERY(
SELECT="country,customer,order,sum{{price*quantity}}",
FROM=cust_order_product_detail_d_g,
WHERE="price*quantity>0.1",
GROUP_BY="country,customer,order",
HAVING='sum{{price*quantity}}>0',
ORDER_BY='sum{{price*quantity}}',
)
pp(list(ans),width=width)
if __name__=="__main__":
# unittest_1a(); sys.exit()
# https://stackoverflow.com/questions/3217673/why-use-argparse-rather-than-optparse
import argparse
parser = argparse.ArgumentParser(description='Pull target_ls from an html file or URL.', epilog="Good luck.")
parser.add_argument('SELECT', nargs='+', help="columns to select")
parser.add_argument("--debug","-X", action="store_true", help="drop into debug if there is a problem")
parser.add_argument("--FROM","-F", default="-", help="name of table to query")
parser.add_argument("--WHERE","-W", default=None, help="predcate on rows")
parser.add_argument("--GROUP_BY","-G", default=None, help="columns to group")
parser.add_argument("--HAVING","-H", default=None, help="predcate on groups")
parser.add_argument("--ORDER_BY","-O", default=None, help="columns to order")
arg_d = parser.parse_args()
debug=arg_d.debug
ans=QUERY(
SELECT=arg_d.SELECT,
# FROM=cust_order_product_detail_d_g,
FROM=arg_d.FROM,
WHERE=arg_d.WHERE,
GROUP_BY=arg_d.GROUP_BY,
HAVING=arg_d.HAVING,
ORDER_BY=arg_d.ORDER_BY,
)
pp(list(ans),width=width)
File: sum10.js
Now I do like anchovies and olives on my Neapolitan pizza … most kids don`t… so (to me) the above Javascript code is REALLY yummy, especially “for(i=1; i<=100; i++)…;” and wonderfully loaded with possibilities and flavour… I can almost smell the code a “C” compiler would generate…
On the other hand the Algol68 code is a bit “OD”…
File: sum10.a68
But ignoring the “OD” we still get “FOR i FROM 1 BY 1 TO 100 DO … OD” … Which I find totally vanilla, and deceptively readable… Maybe even boring.
With JS and a junior, I found it really surprising the number of ways a “newbie” can get the “for(i==1, i<100, i+=i)” code wrong… If you dont believe me, find a code fearing 11th-grader and ask them generate the following primary school multiplication triangle:
As a result, I also needed to tutor (and demonstrate) the difference between “curly brackets”, “round brackets” and “square brackets”…. as I said at the top, I like anchovies and olives, part of the fun of discovering a new Neapolitan pizza, but we should not inflict our acquired tastes on juniors.
I finish with a quote by Daniel Klein: “The only thing that separates us from the animals is superstition and mindless rituals.” (also attributed to – Latka Gravas (Andy Kauffman), Taxi)
ps. the “DO ~ OD” blocks I call “Guarded Blocks” inspired by Dijkstra around 1974. cf. https://www.cs.utexas.edu/users/EWD/transcriptions/EWD04xx/EWD472.html