编程语言
首页 > 编程语言> > C1PythonCookBook

C1PythonCookBook

作者:互联网

# set jupyter notebook theme
# jt -t monokai -f roboto -fs 12 -ofs 10
def SEE(var): # fail to write a format print function by myself
    pass
#use ##  (two pound signs for knowledge discription)
#use # (one spaces for common comments such as illustration of program,
    #  or some error information)
#     callers_local_vars = inspect.currentframe().f_back.f_locals.items()
#     a = [var_name for var_name, var_val in callers_local_vars if var_val is var]
#     print(a,var)
    # print(str(x)+":",x)

# import inspect
# def retrieve_name(var):
#     callers_local_vars = inspect.currentframe().f_back.f_locals.items()
#     return [var_name for var_name, var_val in callers_local_vars if var_val is var]

# A, B, C = [1, 2, 3], [4, 5, 6], [7, 8, 9]
# ls = [A, B, C]
# for i in ls:
#     i1 = retrieve_name(i)[0]
#     print('%s ='%i1, i)
# words = [
#  'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
#  'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
#  'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
#  'my', 'eyes', "you're", 'under'
# ]
# from collections import Counter
# word_counts = Counter(words)
# SEE(word_counts['eyes'])
# # word_counts['eyes']

1.1. Unpacking a Sequence into Separate Variables

data = ["monday",1,0.5,(1,2,3)]
x, y, z, t = data
print(x,y,z,t)
t1,t2,t3=t
_,middle,_ = t # a throwaway variable name
print(t1,t2,t3)
print(middle)
#x1,x2 = t
#ValueError: too many values to unpack (expected 2)
## unpack an iterable valuable such as a string, not just tuples or lists
str1 = "Hello"
a,b,c,d,e = str1
print(c)
monday 1 0.5 (1, 2, 3)
1 2 3
2
l

1.2. Unpacking Elements from Iterables of Arbitrary Length

## Star expression
def drop_first_and_last(grades):
    first,*middle,last = grades
    return avg(middle)
## Star expression
record = ["Crazy Dave",21,'wabbywabbo@PVZ.com','111-222-333','444-555-666',1]
name, age, mailaddress, *telephone = record
print(name,age,mailaddress,telephone)
# *telephone becomes a list whose elements are all two phonumbers and a number 1

## iterating over a sequence of tuples of varying length ,a sequence of tagged tuples
record = [
    ('foo',1,2),
    ('kee',10,11),
]
def do_foo(x, y):
    print('foo',x,y)
def do_kee(x, y):
    print('kee',x,y)
for tag, *args in record:
    if tag == 'foo':
        do_foo(*args) # don't forget the asterisk here
    elif tag == 'kee':
        do_kee(*args)

## Star unpacking can also be useful when combined with certain kinds of string processing
## operations, such as splitting.
line = 'nobody:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false'
uname, *fields, homedir, sh = line.split(':')
print(uname, homedir, sh)
print(f"fields:{fields}")

## Star unpacking binds throwable variable name
record = ('ACME',50, 123.45,(12,18,2012))
name, *_, (*_, year) = record
print(name, year)

## split a list into head and tail components
items = [0,1,2,3,4]
head, *tail = items
print(head)
print(f'tail:{tail}')
# print(f'*tail:{*tail}') SyntaxError: f-string: can't use starred expression here
print('*tail:',*tail)
Crazy Dave 21 wabbywabbo@PVZ.com ['111-222-333', '444-555-666', 1]
foo 1 2
kee 10 11
nobody /var/empty /usr/bin/false
fields:['*', '-2', '-2', 'Unprivileged User']
ACME 2012
0
tail:[1, 2, 3, 4]
*tail: 1 2 3 4

?1.3. Keeping the Last N Items

## what is "yields"? Page 24
## deque()
from collections import deque #NameError: name 'deque' is not defined
q = deque(maxlen=2) # deque() with maxlen setted
q.append(1)
q.append(2)
print(q)
q.append(3)
print(q)
q.appendleft(-1)
print(q)
q.popleft()
print(q)
q.pop()
print(q)
# if you don't set 'maxlen': there is something different
p=deque()
p.append(1)
p.append(2)
print(p)
p.append(3)
print(p)
p.appendleft(-1)
print(p)
p.pop()
print(p) # pop() will delete the newest elements in deque()
p.popleft()
print(p) # popleft() will delete the oldest elements in deque()
deque([1, 2], maxlen=2)
deque([2, 3], maxlen=2)
deque([-1, 2], maxlen=2)
deque([2], maxlen=2)
deque([], maxlen=2)
deque([1, 2])
deque([1, 2, 3])
deque([-1, 1, 2, 3])
deque([-1, 1, 2])
deque([1, 2])

1.4. Finding the Largest or Smallest N Items

## heapq module
import heapq
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3,nums)) #print [42,37,23]
print(heapq.nsmallest(3,nums))#print [-4,1,2]

# using a key parameter in complicated data structure
portfolio = [
 {'name': 'IBM', 'shares': 100, 'price': 91.1},
 {'name': 'AAPL', 'shares': 50, 'price': 543.22},
 {'name': 'FB', 'shares': 200, 'price': 21.09},
 {'name': 'HPQ', 'shares': 35, 'price': 31.75},
 {'name': 'YHOO', 'shares': 45, 'price': 16.35},
 {'name': 'ACME', 'shares': 75, 'price': 115.65}
]
cheap = heapq.nsmallest(3,portfolio,key= lambda s:s['price'])
expensive = heapq.nlargest(3,portfolio,key = lambda s:s['price'])
print(cheap)
print(expensive)
# they work by first converting the data into a list where items are ordered as a heap
# The most important feature of a heap is that heap[0] is always the smallest item.
# heapq.heappop() method, which pops off the first item and replaces it with the next smallest item
# If you are simply trying to find the single smallest or largest item (N=1), it is faster to use min() and max().
# Similarly, if N is about the same size as the collection itself, it is usually faster to sort it first and take a slice (i.e., use sorted(items)[:N] or sorted(items)[-N:]).
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(nums)
heap = list(nums)
print("before heapify:",heap)
heapq.heapify(heap)
print("after heapify: ",heap)
print("pop:",heapq.heappop(heap))
print(heap)
print("pop:",heapq.heappop(heap))
print(heap)
print("pop:",heapq.heappop(heap))
print(heap)
[42, 37, 23]
[-4, 1, 2]
[{'name': 'YHOO', 'shares': 45, 'price': 16.35}, {'name': 'FB', 'shares': 200, 'price': 21.09}, {'name': 'HPQ', 'shares': 35, 'price': 31.75}]
[{'name': 'AAPL', 'shares': 50, 'price': 543.22}, {'name': 'ACME', 'shares': 75, 'price': 115.65}, {'name': 'IBM', 'shares': 100, 'price': 91.1}]
[1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
before heapify: [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
after heapify:  [-4, 2, 1, 23, 7, 2, 18, 23, 42, 37, 8]
pop: -4
[1, 2, 2, 23, 7, 8, 18, 23, 42, 37]
pop: 1
[2, 2, 8, 23, 7, 37, 18, 23, 42]
pop: 2
[2, 7, 8, 23, 42, 37, 18, 23]

?1.5. Implementing a Priority Queue

## some problem in self: push(self, item, priority) ,q.push(Item('foo'),1)

1.6. Mapping Keys to Multiple Values in a Dictionary

## defaultdict
from collections import defaultdict
d = defaultdict(list) # list use .append() method to add elements
d['a'].append(1)
d['a'].append(2)
d['b'].append(4)
print(d)
d = defaultdict(set) # set use .add() method to add elements
d['a'].add(1)
d['a'].add(2)
d['a'].add(1)
d['b'].add(4)
print(d)
d = {} # a regular dictionary use setdefault() method to add elements
# you should use code as // d[key] = []; to initialize a regular dictionary before you add elements in it
d.setdefault('a',[]).append(1)
d.setdefault('a',[]).append(2)
d.setdefault('b',[]).append(3)
print(d)
defaultdict(<class 'list'>, {'a': [1, 2], 'b': [4]})
defaultdict(<class 'set'>, {'a': {1, 2}, 'b': {4}})
{'a': [1, 2], 'b': [3]}

1.7. Keeping Dictionaries in Order

## OrderedDict
from collections import OrderedDict
d = OrderedDict() 
# It exactly preserves the original insertion order of data when iterating
d['foo'] = 1
d['Crazy'] = 2
d['Dave'] = 3
for key in d:
    print(key, d[key])
import json # json.dumps()
json.dumps(d)
foo 1
Crazy 2
Dave 3





'{"foo": 1, "Crazy": 2, "Dave": 3}'

1.8. Calculating with Dictionaries

## sorted(zip())
prices = {
 'ACME': 45.23,
 'AAPL': 612.78,
 'IBM': 205.55,
 'HPQ': 37.20,
 'FB': 10.75
}
max_price = max(zip(prices.values(),prices.keys()))
min_price = min(zip(prices.values(),prices.keys()))
print("max price:",max_price)
print("min price:",min_price)
sorted_prices = sorted(zip(prices.values(),prices.keys()))
print(sorted_prices)
#  zip() creates an iterator that can only be consumed once.
price_and_names = zip(prices.values(),prices.keys())
print("price_and_names:",price_and_names)
print(min(price_and_names))
#print(max(price_and_names)) # ValueError: max() arg is an empty sequence
print(max(prices)) # max of key
print(max(prices.values())) # use values() to find max or min
print(min(prices, key=lambda k:prices[k])) # use lambda to find key
print(prices[min(prices, key=lambda k:prices[k])]) # use lambda and key to find value
values_equal_key_differ = {'AAA':12,'ZZZ':12}
print("when values are equal, compare to keys, max:",\
      max(zip(values_equal_key_differ.values(),values_equal_key_differ.keys())))
max price: (612.78, 'AAPL')
min price: (10.75, 'FB')
[(10.75, 'FB'), (37.2, 'HPQ'), (45.23, 'ACME'), (205.55, 'IBM'), (612.78, 'AAPL')]
price_and_names: <zip object at 0x0000025DC6C204C0>
(10.75, 'FB')
IBM
612.78
FB
10.75
when values are equal, compare to keys, max: (12, 'ZZZ')

1.9. Finding Commonalities in Two Dictionaries

## & - operations on dictionaries
a = {
    'x':1,
    'y':2,
    'z':3
}
b = {
    'w':1,
    'y':2,
    'z':-3
}
print("a:",a)
print("b:",b)
print("a.keys() & b.keys()",a.keys() & b.keys())
print("a.keys() - b.keys()",a.keys() - b.keys())
print("a.items() & b.items()",a.items() & b.items())
# make a new dictionary with certain keys removed
c = {key:a[key] for key in a.keys() - {'w','z'}}
print("c:",c)
a: {'x': 1, 'y': 2, 'z': 3}
b: {'w': 1, 'y': 2, 'z': -3}
a.keys() & b.keys() {'z', 'y'}
a.keys() - b.keys() {'x'}
a.items() & b.items() {('y', 2)}
c: {'x': 1, 'y': 2}

?1.10. Removing Duplicates from a Sequence while Maintaining Order

## I don't know what is a generator and what's its function here

1.11. Naming a Slice

## Nameing a slice
######     0123456789012345678901234567890123456789012345678901234567890'
record = '....................100          .......513.25     ..........'
# readability and maintenance mess
cost = int(record[20:32]) * float(record[40:50])
print(cost)
# naming a slice
SHARE = slice(20,32)
PRICE = slice(40,50)
print("SHARE=record[20:32]  :",SHARE)
SHARE_RECORD = slice(20,32,record)
print("SHARE_RECORD = slice(20,32,record)  :",SHARE_RECORD)
#print(int(record[SHARE_RECORD])) //TypeError: slice indices must be integers or None or have an __index__ method
print(int(record[SHARE]))
print(float(record[PRICE]))
result = int(record[SHARE]) * float(record[PRICE])
print(result)
# more about slice()
items = [0,1,2,3,4,5,6]
a = slice(2,4) # elements on index 4 is not included in a
print(items[2:4],items[a])
items[a] = [-2,-3]
print(items)
del items[a]
print(items)
# s.start , s.stop(), s.step()
b = slice(1, 5, 2)
print("b:",b)
print("b.start:",b.start)
print("b.stop:",b.stop)
print("b.step:",b.step)
print("b.indices:",b.indices)
items = [0,1,2,3,4,5,6]
print("items:",items)
print("items[b]:",items[b])
# map a slice onto a sequence of a specific size by using its indices(size) method
#  suitably limited to fit within bounds (as to avoid IndexError exceptions when indexing).
c = slice(10,50,2)
str = 'HelloWorld'
print("c:",c)
c.indices(len(str))
print("c.indices(len(str)):",c.indices(len(str)))
c = slice(20,50,2)
print("c:",c)
print("c.indices(len(str)):",c.indices(len(str)))
c = slice(1,20,2)
print("c:",c)
print("c.indices(len(str)):",c.indices(len(str)))
print("HelloWorld\n"+
      "0123456789")
for i in range(*c.indices(len(str))):
    print(str[i])
51325.0
SHARE=record[20:32]  : slice(20, 32, None)
SHARE_RECORD = slice(20,32,record)  : slice(20, 32, '....................100          .......513.25     ..........')
100
513.25
51325.0
[2, 3] [2, 3]
[0, 1, -2, -3, 4, 5, 6]
[0, 1, 4, 5, 6]
b: slice(1, 5, 2)
b.start: 1
b.stop: 5
b.step: 2
b.indices: <built-in method indices of slice object at 0x00000249C116C9C0>
items: [0, 1, 2, 3, 4, 5, 6]
items[b]: [1, 3]
c: slice(10, 50, 2)
c.indices(len(str)): (10, 10, 2)
c: slice(20, 50, 2)
c.indices(len(str)): (10, 10, 2)
c: slice(1, 20, 2)
c.indices(len(str)): (1, 10, 2)
HelloWorld
0123456789
e
l
W
r
d

1.12. Determining the Most Frequently Occurring Items in a Sequence

## collecions.Counter.mostcommon()
words = [
 'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
 'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
 'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
 'my', 'eyes', "you're", 'under'
]
from collections import Counter
word_counts = Counter(words)
top_three = Counter.most_common(word_counts,3)
# top_three = word_counts.most_common(3) //can replace
print(top_three)
# actually Counter is a dictionary that maps the items to the number of occurrences.
print("word_counts['eyes']:",word_counts['eyes'])
# increment the count manually, simply use addition:
print("before add morewords:",word_counts)
morewords = ['why','are','you','not','looking','in','my','eyes']
for word in morewords:
    word_counts[word] += 1
print("after add morewords:",word_counts)
# word_counts.update(morewords) // alternatively use .update() method
# use various mathematical operations on Counter instances
a = Counter(words)
b = Counter(morewords)
print("\n",a + b)
print(a - b)
[('eyes', 8), ('the', 5), ('look', 4)]
word_counts['eyes']: 8
before add morewords: Counter({'eyes': 8, 'the': 5, 'look': 4, 'into': 3, 'my': 3, 'around': 2, 'not': 1, "don't": 1, "you're": 1, 'under': 1})
after add morewords: Counter({'eyes': 9, 'the': 5, 'look': 4, 'my': 4, 'into': 3, 'not': 2, 'around': 2, "don't": 1, "you're": 1, 'under': 1, 'why': 1, 'are': 1, 'you': 1, 'looking': 1, 'in': 1})

 Counter({'eyes': 9, 'the': 5, 'look': 4, 'my': 4, 'into': 3, 'not': 2, 'around': 2, "don't": 1, "you're": 1, 'under': 1, 'why': 1, 'are': 1, 'you': 1, 'looking': 1, 'in': 1})
Counter({'eyes': 7, 'the': 5, 'look': 4, 'into': 3, 'my': 2, 'around': 2, "don't": 1, "you're": 1, 'under': 1})

1.13. Sorting a List of Dictionaries by a Common Key Problem

## operator.itemgetter()
rows = [
 {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
 {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
 {'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
 {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]
from operator import itemgetter
# sortBy_fname = sorted(rows,key='fname') //TypeError: 'str' object is not callable
rows_by_fname = sorted(rows,key=itemgetter('fname'))
rows_by_uid_reverse = sorted(rows,key=itemgetter('uid'),reverse=True)
# multiple keys
rows_by_lfnames = sorted(rows,key=itemgetter('lname','fname'))
print("rows_by_fname:\n",rows_by_fname)
print("rows_by_uid_reverse:\n",rows_by_uid_reverse)
print("rows_by_lfnames:\n",rows_by_lfnames)
# replaced by lambda expressions
rows_by_fname_lambda = sorted(rows,key= lambda k:k['fname'])
# rows_by_lfname_lambda = sorted(rows,key= lambda k:k['lname','fname']) //WRONG
# rows_by_lfname_lambda = sorted(rows,key= lambda k:k['lname'],['fname']) //WRONG
# //SyntaxError: positional argument follows keyword argument
# rows_by_lfname_lambda = sorted(rows,key= lambda k:k['lname'],k['fname']) //WRONG
# //SyntaxError: positional argument follows keyword argument
# rows_by_lfname_lambda = sorted(rows,key= lambda k:k(['lname'],['fname'])) //WRONG
# //TypeError: 'dict' object is not callable
rows_by_lfname_lambda = sorted(rows,key= lambda k:(k['lname'],k['fname']))
print("rows_by_fname_lambda:\n",rows_by_fname_lambda)
print("rows_by_lfname_lambda:\n",rows_by_lfname_lambda)
# max() min() method
print("\n",min(rows, key=itemgetter('uid')))
rows_by_fname:
 [{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
rows_by_uid_reverse:
 [{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
rows_by_lfnames:
 [{'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}]
rows_by_fname_lambda:
 [{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
rows_by_lfname_lambda:
 [{'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}]

 {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}

1.14. Sorting Objects Without Native Comparison Support

## use lambda or operator.attrgetter() to sort objects
class User:
    def __init__(self, user_id):
        self.user_id = user_id
    def __repr__(self):
        # return 'User({})'.format(user_id) // name 'user_id' is not defined
        return 'User({})'.format(self.user_id)
users = [User(3), User(250), User(-1)]
print("before sorting, users:",users)
# sorted(users, key= lambda u:u.user_id) //this will not exist when running next line
print("after sorting,  users:",sorted(users, key= lambda u:u.user_id))

#  an alternative approach is to use operator.attrgetter()
from operator import attrgetter
print("use attrgetter():",sorted(users,key=attrgetter('user_id')))
# attrgetter() is analogous to itemgetter(), they are both a tad bit faster than lambda 
# expression, and they both support multiple fields when sorting, for example, you can run
# by_name = sorted(users, key=attrgetter('last_name', 'first_name'))
# Also, max() min() methods are also OK
print("min user_id:",min(users, key=attrgetter('user_id')))
before sorting, users: [User(3), User(250), User(-1)]
after sorting,  users: [User(-1), User(3), User(250)]
use attrgetter(): [User(-1), User(3), User(250)]
min user_id: User(-1)

1.15. Grouping Records Together Based on a Field

## Group a list of dictionaries by 'date'
rows = [
 {'address': '5412 N CLARK', 'date': '07/01/2012'},
 {'address': '5148 N CLARK', 'date': '07/04/2012'},
 {'address': '5800 E 58TH', 'date': '07/02/2012'},
 {'address': '2122 N CLARK', 'date': '07/03/2012'},
 {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
 {'address': '1060 W ADDISON', 'date': '07/02/2012'},
 {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
 {'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]
# first sort by 'date'
#  Since groupby() only examines consecutive items, failing to sort first won’t group
#  the records as you want.
from operator import itemgetter
rows.sort(key=itemgetter('date')) # use itemgetter() 
print(rows)
# then group by 'date', use itertools.groupby()
from itertools import groupby
print("groupby(rows,itemgetter('date')): \n",groupby(rows,itemgetter('date')))
for date,items in groupby(rows,itemgetter('date')):
    print(date)
    print(items)
    for item in items:
        print("\t",item)
print("\n")
#  group the data together by dates into a large data structure
#  by this way, you don't need to sort the records first
from collections import defaultdict
rows_by_date = defaultdict(list)
rows = [
 {'address': '5412 N CLARK', 'date': '07/01/2012'},
 {'address': '5148 N CLARK', 'date': '07/04/2012'},
 {'address': '5800 E 58TH', 'date': '07/02/2012'},
 {'address': '2122 N CLARK', 'date': '07/03/2012'},
 {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
 {'address': '1060 W ADDISON', 'date': '07/02/2012'},
 {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
 {'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]
# group items by 'date' in a multidict 
for row in rows:
    rows_by_date[row['date']].append(row)
# print rows_by_date in a beautiful way
for date in rows_by_date:
    print(rows_by_date[date])
[{'address': '5412 N CLARK', 'date': '07/01/2012'}, {'address': '4801 N BROADWAY', 'date': '07/01/2012'}, {'address': '5800 E 58TH', 'date': '07/02/2012'}, {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}, {'address': '1060 W ADDISON', 'date': '07/02/2012'}, {'address': '2122 N CLARK', 'date': '07/03/2012'}, {'address': '5148 N CLARK', 'date': '07/04/2012'}, {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}]
groupby(rows,itemgetter('date')): 
 <itertools.groupby object at 0x00000249A87B23B0>
07/01/2012
<itertools._grouper object at 0x00000249C162A670>
	 {'address': '5412 N CLARK', 'date': '07/01/2012'}
	 {'address': '4801 N BROADWAY', 'date': '07/01/2012'}
07/02/2012
<itertools._grouper object at 0x00000249C1429370>
	 {'address': '5800 E 58TH', 'date': '07/02/2012'}
	 {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}
	 {'address': '1060 W ADDISON', 'date': '07/02/2012'}
07/03/2012
<itertools._grouper object at 0x00000249C0FD54C0>
	 {'address': '2122 N CLARK', 'date': '07/03/2012'}
07/04/2012
<itertools._grouper object at 0x00000249C0FE3940>
	 {'address': '5148 N CLARK', 'date': '07/04/2012'}
	 {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}


[{'address': '5412 N CLARK', 'date': '07/01/2012'}, {'address': '4801 N BROADWAY', 'date': '07/01/2012'}]
[{'address': '5148 N CLARK', 'date': '07/04/2012'}, {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}]
[{'address': '5800 E 58TH', 'date': '07/02/2012'}, {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}, {'address': '1060 W ADDISON', 'date': '07/02/2012'}]
[{'address': '2122 N CLARK', 'date': '07/03/2012'}]

1.16. Filtering Sequence Elements

# filter sequence data use a list comprehension
mylist = [1, 4, -5, 10, -7, 2, 3, -1]
print("mylist:",mylist)
print("filter sequence data use a list comprehension:")
print([n for n in mylist if n > 0 ])
print([n for n in mylist if n < 0 ])
# use generator expressions
pos = (n for n in mylist if n > 0)
print("generator expressions:",pos)
for x in pos:
    print(x)
# use filter when criteria is complicated or involves exception handling
values = ['1', '2', '-3', '-', '4', 'N/A', '5']
def is_int(val):
    try: # pay attention to how to write try-except clauses in Python
        x = int(val)
        return True
    except ValueError:
        return False
int_eles = list(filter(is_int,values))
print("values:",values)
print("use filter function to get int elements:",int_eles)
# transform the data when using a list comprehension
mylist = [1, 4, -5, 10, -7, 2, 3, -1]
import math
print("filter all positive elements and do sqrt() on them at the same time:")
print([math.sqrt(n) for n in mylist if n > 0])
# use if-else clause to replace values that don't meet the criteria
print("use if-else clause to replace values that don't meet the criteria:")
#  print([n for n in mylist if n > 0 else 0]) WRONG
print([n if n > 0 else 0 for n in mylist])# note that if-else clause should be placed ahead
# use itertools.compress
addresses = [
 '5412 N CLARK',
 '5148 N CLARK',
 '5800 E 58TH',
 '2122 N CLARK'
 '5645 N RAVENSWOOD',
 '1060 W ADDISON',
 '4801 N BROADWAY',
 '1039 W GRANVILLE',
]
counts = [ 0, 3, 10, 4, 1, 7, 6, 1]
from itertools import compress
more5 = [n > 5 for n in counts]
print("more5:",more5)
selected_addresses = list(compress(addresses, more5))
print("use compress:\n",selected_addresses)
mylist: [1, 4, -5, 10, -7, 2, 3, -1]
filter sequence data use a list comprehension:
[1, 4, 10, 2, 3]
[-5, -7, -1]
generator expressions: <generator object <genexpr> at 0x00000249C2D83C80>
1
4
10
2
3
values: ['1', '2', '-3', '-', '4', 'N/A', '5']
use filter function to get int elements: ['1', '2', '-3', '4', '5']
filter all positive elements and do sqrt() on them at the same time:
[1.0, 2.0, 3.1622776601683795, 1.4142135623730951, 1.7320508075688772]
use if-else clause to replace values that don't meet the criteria:
[1, 4, 0, 10, 0, 2, 3, 0]
more5: [False, False, True, False, False, True, True, False]
use compress:
 ['5800 E 58TH', '4801 N BROADWAY', '1039 W GRANVILLE']

1.17. Extracting a Subset of a Dictionary

# use dictionary comprehension
prices = {
 'ACME': 45.23,
 'AAPL': 612.78,
 'IBM': 205.55,
 'HPQ': 37.20,
 'FB': 10.75
}
print("all stocks:",prices)
# pMore200 = {key:value for key:value in prices if value > 200}WRONG
#                              ^ SyntaxError: invalid syntax 
# pMore200 = {key:value for value in prices.values() if value > 200}
#             ^  NameError: name 'key' is not defined
# pMore200 = {key:value for key,value in prices.values() if value > 200}
#              TypeError: cannot unpack non-iterable float object
pMore200 = {key:value for key,value in prices.items() if value > 200}
print("price more than 200:",pMore200)
# Make a dictionary of tech stocks
tech_names = { 'AAPL', 'IBM', 'HPQ', 'MSFT' }
tech_stocks = {key:value for key,value in prices.items() if key in tech_names}
print("tech_stocks:",tech_stocks)
# some other slower way 
pMore200_dict = dict((key,value) for key,value in prices.items() if value > 200)
print(pMore200_dict)
tech_stocks_only_key = {key:prices[key] for key in prices.keys() & tech_names}
print(tech_stocks_only_key)
all stocks: {'ACME': 45.23, 'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2, 'FB': 10.75}
price more than 200: {'AAPL': 612.78, 'IBM': 205.55}
tech_stocks: {'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2}
{'AAPL': 612.78, 'IBM': 205.55}
{'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2}

1.18. Mapping Names to Sequence Elements

# collection,namedtuple()
from collections import namedtuple
Subscriber = namedtuple('Subscriber',['addr','joined'])
sub = Subscriber('jonesy@sample.com','2012-10-18')
print("sub:",sub)
print("sub.addr:",sub.addr)
print("sub.joined:",sub.joined)
# namedtuple supports operations on tuple
print("len(sub):",len(sub))
addr, joined = sub
print(addr,joined)
# use name to access a namedtuple
Stock = namedtuple('Stock',['name','shares','price'])
def compute_cost_by_position(records):
    total = 0.0
    for rec in records:
        total += rec[1] * rec[2]
    return total
def compute_cost_by_name(records):
    total = 0.0
    for rec in records:
        s = Stock(*rec)
        total += s.shares * s.price
    return total
#  namedtuple as a replacement of a dictionary, namedtuple is immutable and saves memory
s = Stock('ALIBABA',100,123.45)
print("s:",s)
# s.shares = 75 //AttributeError: can't set attribute
#  use ._replace() to creat a new instances with values replaced
s = s._replace(shares = 75)
print("s:",s)
NewStock = namedtuple('NewStock',['name','shares','price','date','time'])
# create a prototype instance
newstock_prototype = NewStock('',0,0.0,None,None)
# Function to convert a dictionary to a NewStock
def dict_to_newstock(s):
    return newstock_prototype._replace(**s)
a = {'name': 'ACME', 'shares': 100, 'price': 123.45}
print("a:",a)
print("dict_to_newstock(a):",dict_to_newstock(a))
b = {'name': 'ACME', 'shares': 100, 'price': 123.45, 'date': '12/17/2012'}
print("b:",b)
print("dict_to_newstock(b):",dict_to_newstock(b))
sub: Subscriber(addr='jonesy@sample.com', joined='2012-10-18')
sub.addr: jonesy@sample.com
sub.joined: 2012-10-18
len(sub): 2
jonesy@sample.com 2012-10-18
s: Stock(name='ALIBABA', shares=100, price=123.45)
s: Stock(name='ALIBABA', shares=75, price=123.45)
a: {'name': 'ACME', 'shares': 100, 'price': 123.45}
dict_to_newstock(a): NewStock(name='ACME', shares=100, price=123.45, date=None, time=None)
b: {'name': 'ACME', 'shares': 100, 'price': 123.45, 'date': '12/17/2012'}
dict_to_newstock(b): NewStock(name='ACME', shares=100, price=123.45, date='12/17/2012', time=None)

1.19. Transforming and Reducing Data at the Same Time

# some examples of generator-expression agument
# Calculate the sum of squares
nums = [1,2,3,4]
s = sum(x*x for x in nums)
print(s)
# Determine if any .py files exist in a directory
import os
files = os.listdir('D:\CODE\LearnPythonHardWay')
if any(name.endswith('.py') for name in files):
    print('There be python!')
else:
    print("Sorry, no python!")
# # Output a tuple as CSV
s = ('ACME', 50, 123.45)
print(s)
# print(','.join(x if type(x)==type("a") else str(x) for x in s))
# print(','.join(str(x) for x in s)) // this statement in book can't run
# TypeError: 'str' object is not callable
# Too difficult to write by myself

# Data reduction across fields of a data structure
portfolio = [
 {'name':'GOOG', 'shares': 50},
 {'name':'YHOO', 'shares': 75},
 {'name':'AOL', 'shares': 20},
 {'name':'SCOX', 'shares': 65}
]
min_shares = min(s['shares'] for s in portfolio)
print(min_shares)
## single argument to a generator function
## you don’t need repeated parentheses
# round brackets
s = sum((x * x for x in nums)) # Pass generator-expr as argument
s = sum(x * x for x in nums) # More elegant syntax
## create an extra temporary list, waste extra memory
#square brackets
s = sum([x * x for x in nums])
## max()\min() accept a key argument you can use a generator
# Original: Returns 20
min_shares = min(s['shares'] for s in portfolio)
print("not use key argument in a min generator:",min_shares)
# Alternative: Returns {'name': 'AOL', 'shares': 20}
min_shares = min(portfolio, key=lambda s: s['shares'])
print("use key argument in a min generator:",min_shares)
30
There be python!
('ACME', 50, 123.45)
20
not use key argument in a min generator: 20
use key argument in a min generator: {'name': 'AOL', 'shares': 20}

1.20. Combining Multiple Mappings into a Single Mapping

# collections.chainmap
from collections import ChainMap
a = {'x':1,'y':2}
b = {'y':3,'z':4}
c = ChainMap(a,b)
print(c['x'])
print(c['y'])
print(c['z'])
## a ChainMap simply keeps a list of the underlying mappings and redefines common 
## dictionary operations to scan the list.
print("c:",c)
print("len(c):",len(c))
print("list(c.keys()):",list(c.keys()))
print("list(c.values()):",list(c.values()))
## Operations that mutate the mapping always affect the first mapping listed.
print("before change c, a:",a)
print("c:",c)
c['y'] = -2
c['z'] = -4
c['k'] = 100
print("after change c, a:",a)
print("c:",c)
# .new_child or .parent
d = ChainMap()
d['x'] = 1
print(d)
print("d['x']:",d['x'])
d = d.new_child()
d['x'] = 2
print(d)
print("d['x']:",d['x'])
d = d.new_child()
d['y'] = 100
print(d)
print("d['x']:",d['x'])
# d = d.parents()//ypeError: 'ChainMap' object is not callable
d = d.parents
print(d)
print("d['x']:",d['x'])
# merge a dict
a = {'x':1,'y':2}
b = {'y':3,'z':4}
merge_dict = dict(b)
merge_dict.update(a)# the same key, keep value in a
print("merge_dict:",merge_dict)
# don’t get reflected in the merged dictionary
a['x'] = 999
print(a['x'],merge_dict['x'])
a = {'x':1,'y':2}
b = {'y':3,'z':4}
# Notice change to merged dicts
c = ChainMap(a,b)
print("c:",c)
a['x'] = 999
print(a['x'],c['x'])
c['x'] = -999
print(a['x'],c['x'])
1
2
4
c: ChainMap({'x': 1, 'y': 2}, {'y': 3, 'z': 4})
len(c): 3
list(c.keys()): ['y', 'z', 'x']
list(c.values()): [2, 4, 1]
before change c, a: {'x': 1, 'y': 2}
c: ChainMap({'x': 1, 'y': 2}, {'y': 3, 'z': 4})
after change c, a: {'x': 1, 'y': -2, 'z': -4, 'k': 100}
c: ChainMap({'x': 1, 'y': -2, 'z': -4, 'k': 100}, {'y': 3, 'z': 4})
ChainMap({'x': 1})
d['x']: 1
ChainMap({'x': 2}, {'x': 1})
d['x']: 2
ChainMap({'y': 100}, {'x': 2}, {'x': 1})
d['x']: 2
ChainMap({'x': 2}, {'x': 1})
d['x']: 2
merge_dict: {'y': 2, 'z': 4, 'x': 1}
999 1
c: ChainMap({'x': 1, 'y': 2}, {'y': 3, 'z': 4})
999 999
-999 -999

标签:rows,date,name,C1PythonCookBook,key,print,2012
来源: https://www.cnblogs.com/Matrix-250/p/16441073.html