C1PythonCookBook
作者:互联网
# set jupyter notebook theme
# jt -t monokai -f roboto -fs 12 -ofs 10
def SEE(var): # fail to write a format print function by myself
pass
#use ## (two pound signs for knowledge discription)
#use # (one spaces for common comments such as illustration of program,
# or some error information)
# callers_local_vars = inspect.currentframe().f_back.f_locals.items()
# a = [var_name for var_name, var_val in callers_local_vars if var_val is var]
# print(a,var)
# print(str(x)+":",x)
# import inspect
# def retrieve_name(var):
# callers_local_vars = inspect.currentframe().f_back.f_locals.items()
# return [var_name for var_name, var_val in callers_local_vars if var_val is var]
# A, B, C = [1, 2, 3], [4, 5, 6], [7, 8, 9]
# ls = [A, B, C]
# for i in ls:
# i1 = retrieve_name(i)[0]
# print('%s ='%i1, i)
# words = [
# 'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
# 'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
# 'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
# 'my', 'eyes', "you're", 'under'
# ]
# from collections import Counter
# word_counts = Counter(words)
# SEE(word_counts['eyes'])
# # word_counts['eyes']
1.1. Unpacking a Sequence into Separate Variables
data = ["monday",1,0.5,(1,2,3)]
x, y, z, t = data
print(x,y,z,t)
t1,t2,t3=t
_,middle,_ = t # a throwaway variable name
print(t1,t2,t3)
print(middle)
#x1,x2 = t
#ValueError: too many values to unpack (expected 2)
## unpack an iterable valuable such as a string, not just tuples or lists
str1 = "Hello"
a,b,c,d,e = str1
print(c)
monday 1 0.5 (1, 2, 3)
1 2 3
2
l
1.2. Unpacking Elements from Iterables of Arbitrary Length
## Star expression
def drop_first_and_last(grades):
first,*middle,last = grades
return avg(middle)
## Star expression
record = ["Crazy Dave",21,'wabbywabbo@PVZ.com','111-222-333','444-555-666',1]
name, age, mailaddress, *telephone = record
print(name,age,mailaddress,telephone)
# *telephone becomes a list whose elements are all two phonumbers and a number 1
## iterating over a sequence of tuples of varying length ,a sequence of tagged tuples
record = [
('foo',1,2),
('kee',10,11),
]
def do_foo(x, y):
print('foo',x,y)
def do_kee(x, y):
print('kee',x,y)
for tag, *args in record:
if tag == 'foo':
do_foo(*args) # don't forget the asterisk here
elif tag == 'kee':
do_kee(*args)
## Star unpacking can also be useful when combined with certain kinds of string processing
## operations, such as splitting.
line = 'nobody:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false'
uname, *fields, homedir, sh = line.split(':')
print(uname, homedir, sh)
print(f"fields:{fields}")
## Star unpacking binds throwable variable name
record = ('ACME',50, 123.45,(12,18,2012))
name, *_, (*_, year) = record
print(name, year)
## split a list into head and tail components
items = [0,1,2,3,4]
head, *tail = items
print(head)
print(f'tail:{tail}')
# print(f'*tail:{*tail}') SyntaxError: f-string: can't use starred expression here
print('*tail:',*tail)
Crazy Dave 21 wabbywabbo@PVZ.com ['111-222-333', '444-555-666', 1]
foo 1 2
kee 10 11
nobody /var/empty /usr/bin/false
fields:['*', '-2', '-2', 'Unprivileged User']
ACME 2012
0
tail:[1, 2, 3, 4]
*tail: 1 2 3 4
?1.3. Keeping the Last N Items
## what is "yields"? Page 24
## deque()
from collections import deque #NameError: name 'deque' is not defined
q = deque(maxlen=2) # deque() with maxlen setted
q.append(1)
q.append(2)
print(q)
q.append(3)
print(q)
q.appendleft(-1)
print(q)
q.popleft()
print(q)
q.pop()
print(q)
# if you don't set 'maxlen': there is something different
p=deque()
p.append(1)
p.append(2)
print(p)
p.append(3)
print(p)
p.appendleft(-1)
print(p)
p.pop()
print(p) # pop() will delete the newest elements in deque()
p.popleft()
print(p) # popleft() will delete the oldest elements in deque()
deque([1, 2], maxlen=2)
deque([2, 3], maxlen=2)
deque([-1, 2], maxlen=2)
deque([2], maxlen=2)
deque([], maxlen=2)
deque([1, 2])
deque([1, 2, 3])
deque([-1, 1, 2, 3])
deque([-1, 1, 2])
deque([1, 2])
1.4. Finding the Largest or Smallest N Items
## heapq module
import heapq
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3,nums)) #print [42,37,23]
print(heapq.nsmallest(3,nums))#print [-4,1,2]
# using a key parameter in complicated data structure
portfolio = [
{'name': 'IBM', 'shares': 100, 'price': 91.1},
{'name': 'AAPL', 'shares': 50, 'price': 543.22},
{'name': 'FB', 'shares': 200, 'price': 21.09},
{'name': 'HPQ', 'shares': 35, 'price': 31.75},
{'name': 'YHOO', 'shares': 45, 'price': 16.35},
{'name': 'ACME', 'shares': 75, 'price': 115.65}
]
cheap = heapq.nsmallest(3,portfolio,key= lambda s:s['price'])
expensive = heapq.nlargest(3,portfolio,key = lambda s:s['price'])
print(cheap)
print(expensive)
# they work by first converting the data into a list where items are ordered as a heap
# The most important feature of a heap is that heap[0] is always the smallest item.
# heapq.heappop() method, which pops off the first item and replaces it with the next smallest item
# If you are simply trying to find the single smallest or largest item (N=1), it is faster to use min() and max().
# Similarly, if N is about the same size as the collection itself, it is usually faster to sort it first and take a slice (i.e., use sorted(items)[:N] or sorted(items)[-N:]).
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(nums)
heap = list(nums)
print("before heapify:",heap)
heapq.heapify(heap)
print("after heapify: ",heap)
print("pop:",heapq.heappop(heap))
print(heap)
print("pop:",heapq.heappop(heap))
print(heap)
print("pop:",heapq.heappop(heap))
print(heap)
[42, 37, 23]
[-4, 1, 2]
[{'name': 'YHOO', 'shares': 45, 'price': 16.35}, {'name': 'FB', 'shares': 200, 'price': 21.09}, {'name': 'HPQ', 'shares': 35, 'price': 31.75}]
[{'name': 'AAPL', 'shares': 50, 'price': 543.22}, {'name': 'ACME', 'shares': 75, 'price': 115.65}, {'name': 'IBM', 'shares': 100, 'price': 91.1}]
[1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
before heapify: [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
after heapify: [-4, 2, 1, 23, 7, 2, 18, 23, 42, 37, 8]
pop: -4
[1, 2, 2, 23, 7, 8, 18, 23, 42, 37]
pop: 1
[2, 2, 8, 23, 7, 37, 18, 23, 42]
pop: 2
[2, 7, 8, 23, 42, 37, 18, 23]
?1.5. Implementing a Priority Queue
## some problem in self: push(self, item, priority) ,q.push(Item('foo'),1)
1.6. Mapping Keys to Multiple Values in a Dictionary
## defaultdict
from collections import defaultdict
d = defaultdict(list) # list use .append() method to add elements
d['a'].append(1)
d['a'].append(2)
d['b'].append(4)
print(d)
d = defaultdict(set) # set use .add() method to add elements
d['a'].add(1)
d['a'].add(2)
d['a'].add(1)
d['b'].add(4)
print(d)
d = {} # a regular dictionary use setdefault() method to add elements
# you should use code as // d[key] = []; to initialize a regular dictionary before you add elements in it
d.setdefault('a',[]).append(1)
d.setdefault('a',[]).append(2)
d.setdefault('b',[]).append(3)
print(d)
defaultdict(<class 'list'>, {'a': [1, 2], 'b': [4]})
defaultdict(<class 'set'>, {'a': {1, 2}, 'b': {4}})
{'a': [1, 2], 'b': [3]}
1.7. Keeping Dictionaries in Order
## OrderedDict
from collections import OrderedDict
d = OrderedDict()
# It exactly preserves the original insertion order of data when iterating
d['foo'] = 1
d['Crazy'] = 2
d['Dave'] = 3
for key in d:
print(key, d[key])
import json # json.dumps()
json.dumps(d)
foo 1
Crazy 2
Dave 3
'{"foo": 1, "Crazy": 2, "Dave": 3}'
1.8. Calculating with Dictionaries
## sorted(zip())
prices = {
'ACME': 45.23,
'AAPL': 612.78,
'IBM': 205.55,
'HPQ': 37.20,
'FB': 10.75
}
max_price = max(zip(prices.values(),prices.keys()))
min_price = min(zip(prices.values(),prices.keys()))
print("max price:",max_price)
print("min price:",min_price)
sorted_prices = sorted(zip(prices.values(),prices.keys()))
print(sorted_prices)
# zip() creates an iterator that can only be consumed once.
price_and_names = zip(prices.values(),prices.keys())
print("price_and_names:",price_and_names)
print(min(price_and_names))
#print(max(price_and_names)) # ValueError: max() arg is an empty sequence
print(max(prices)) # max of key
print(max(prices.values())) # use values() to find max or min
print(min(prices, key=lambda k:prices[k])) # use lambda to find key
print(prices[min(prices, key=lambda k:prices[k])]) # use lambda and key to find value
values_equal_key_differ = {'AAA':12,'ZZZ':12}
print("when values are equal, compare to keys, max:",\
max(zip(values_equal_key_differ.values(),values_equal_key_differ.keys())))
max price: (612.78, 'AAPL')
min price: (10.75, 'FB')
[(10.75, 'FB'), (37.2, 'HPQ'), (45.23, 'ACME'), (205.55, 'IBM'), (612.78, 'AAPL')]
price_and_names: <zip object at 0x0000025DC6C204C0>
(10.75, 'FB')
IBM
612.78
FB
10.75
when values are equal, compare to keys, max: (12, 'ZZZ')
1.9. Finding Commonalities in Two Dictionaries
## & - operations on dictionaries
a = {
'x':1,
'y':2,
'z':3
}
b = {
'w':1,
'y':2,
'z':-3
}
print("a:",a)
print("b:",b)
print("a.keys() & b.keys()",a.keys() & b.keys())
print("a.keys() - b.keys()",a.keys() - b.keys())
print("a.items() & b.items()",a.items() & b.items())
# make a new dictionary with certain keys removed
c = {key:a[key] for key in a.keys() - {'w','z'}}
print("c:",c)
a: {'x': 1, 'y': 2, 'z': 3}
b: {'w': 1, 'y': 2, 'z': -3}
a.keys() & b.keys() {'z', 'y'}
a.keys() - b.keys() {'x'}
a.items() & b.items() {('y', 2)}
c: {'x': 1, 'y': 2}
?1.10. Removing Duplicates from a Sequence while Maintaining Order
## I don't know what is a generator and what's its function here
1.11. Naming a Slice
## Nameing a slice
###### 0123456789012345678901234567890123456789012345678901234567890'
record = '....................100 .......513.25 ..........'
# readability and maintenance mess
cost = int(record[20:32]) * float(record[40:50])
print(cost)
# naming a slice
SHARE = slice(20,32)
PRICE = slice(40,50)
print("SHARE=record[20:32] :",SHARE)
SHARE_RECORD = slice(20,32,record)
print("SHARE_RECORD = slice(20,32,record) :",SHARE_RECORD)
#print(int(record[SHARE_RECORD])) //TypeError: slice indices must be integers or None or have an __index__ method
print(int(record[SHARE]))
print(float(record[PRICE]))
result = int(record[SHARE]) * float(record[PRICE])
print(result)
# more about slice()
items = [0,1,2,3,4,5,6]
a = slice(2,4) # elements on index 4 is not included in a
print(items[2:4],items[a])
items[a] = [-2,-3]
print(items)
del items[a]
print(items)
# s.start , s.stop(), s.step()
b = slice(1, 5, 2)
print("b:",b)
print("b.start:",b.start)
print("b.stop:",b.stop)
print("b.step:",b.step)
print("b.indices:",b.indices)
items = [0,1,2,3,4,5,6]
print("items:",items)
print("items[b]:",items[b])
# map a slice onto a sequence of a specific size by using its indices(size) method
# suitably limited to fit within bounds (as to avoid IndexError exceptions when indexing).
c = slice(10,50,2)
str = 'HelloWorld'
print("c:",c)
c.indices(len(str))
print("c.indices(len(str)):",c.indices(len(str)))
c = slice(20,50,2)
print("c:",c)
print("c.indices(len(str)):",c.indices(len(str)))
c = slice(1,20,2)
print("c:",c)
print("c.indices(len(str)):",c.indices(len(str)))
print("HelloWorld\n"+
"0123456789")
for i in range(*c.indices(len(str))):
print(str[i])
51325.0
SHARE=record[20:32] : slice(20, 32, None)
SHARE_RECORD = slice(20,32,record) : slice(20, 32, '....................100 .......513.25 ..........')
100
513.25
51325.0
[2, 3] [2, 3]
[0, 1, -2, -3, 4, 5, 6]
[0, 1, 4, 5, 6]
b: slice(1, 5, 2)
b.start: 1
b.stop: 5
b.step: 2
b.indices: <built-in method indices of slice object at 0x00000249C116C9C0>
items: [0, 1, 2, 3, 4, 5, 6]
items[b]: [1, 3]
c: slice(10, 50, 2)
c.indices(len(str)): (10, 10, 2)
c: slice(20, 50, 2)
c.indices(len(str)): (10, 10, 2)
c: slice(1, 20, 2)
c.indices(len(str)): (1, 10, 2)
HelloWorld
0123456789
e
l
W
r
d
1.12. Determining the Most Frequently Occurring Items in a Sequence
## collecions.Counter.mostcommon()
words = [
'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
'my', 'eyes', "you're", 'under'
]
from collections import Counter
word_counts = Counter(words)
top_three = Counter.most_common(word_counts,3)
# top_three = word_counts.most_common(3) //can replace
print(top_three)
# actually Counter is a dictionary that maps the items to the number of occurrences.
print("word_counts['eyes']:",word_counts['eyes'])
# increment the count manually, simply use addition:
print("before add morewords:",word_counts)
morewords = ['why','are','you','not','looking','in','my','eyes']
for word in morewords:
word_counts[word] += 1
print("after add morewords:",word_counts)
# word_counts.update(morewords) // alternatively use .update() method
# use various mathematical operations on Counter instances
a = Counter(words)
b = Counter(morewords)
print("\n",a + b)
print(a - b)
[('eyes', 8), ('the', 5), ('look', 4)]
word_counts['eyes']: 8
before add morewords: Counter({'eyes': 8, 'the': 5, 'look': 4, 'into': 3, 'my': 3, 'around': 2, 'not': 1, "don't": 1, "you're": 1, 'under': 1})
after add morewords: Counter({'eyes': 9, 'the': 5, 'look': 4, 'my': 4, 'into': 3, 'not': 2, 'around': 2, "don't": 1, "you're": 1, 'under': 1, 'why': 1, 'are': 1, 'you': 1, 'looking': 1, 'in': 1})
Counter({'eyes': 9, 'the': 5, 'look': 4, 'my': 4, 'into': 3, 'not': 2, 'around': 2, "don't": 1, "you're": 1, 'under': 1, 'why': 1, 'are': 1, 'you': 1, 'looking': 1, 'in': 1})
Counter({'eyes': 7, 'the': 5, 'look': 4, 'into': 3, 'my': 2, 'around': 2, "don't": 1, "you're": 1, 'under': 1})
1.13. Sorting a List of Dictionaries by a Common Key Problem
## operator.itemgetter()
rows = [
{'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
{'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
{'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]
from operator import itemgetter
# sortBy_fname = sorted(rows,key='fname') //TypeError: 'str' object is not callable
rows_by_fname = sorted(rows,key=itemgetter('fname'))
rows_by_uid_reverse = sorted(rows,key=itemgetter('uid'),reverse=True)
# multiple keys
rows_by_lfnames = sorted(rows,key=itemgetter('lname','fname'))
print("rows_by_fname:\n",rows_by_fname)
print("rows_by_uid_reverse:\n",rows_by_uid_reverse)
print("rows_by_lfnames:\n",rows_by_lfnames)
# replaced by lambda expressions
rows_by_fname_lambda = sorted(rows,key= lambda k:k['fname'])
# rows_by_lfname_lambda = sorted(rows,key= lambda k:k['lname','fname']) //WRONG
# rows_by_lfname_lambda = sorted(rows,key= lambda k:k['lname'],['fname']) //WRONG
# //SyntaxError: positional argument follows keyword argument
# rows_by_lfname_lambda = sorted(rows,key= lambda k:k['lname'],k['fname']) //WRONG
# //SyntaxError: positional argument follows keyword argument
# rows_by_lfname_lambda = sorted(rows,key= lambda k:k(['lname'],['fname'])) //WRONG
# //TypeError: 'dict' object is not callable
rows_by_lfname_lambda = sorted(rows,key= lambda k:(k['lname'],k['fname']))
print("rows_by_fname_lambda:\n",rows_by_fname_lambda)
print("rows_by_lfname_lambda:\n",rows_by_lfname_lambda)
# max() min() method
print("\n",min(rows, key=itemgetter('uid')))
rows_by_fname:
[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
rows_by_uid_reverse:
[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
rows_by_lfnames:
[{'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}]
rows_by_fname_lambda:
[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
rows_by_lfname_lambda:
[{'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}]
{'fname': 'John', 'lname': 'Cleese', 'uid': 1001}
1.14. Sorting Objects Without Native Comparison Support
## use lambda or operator.attrgetter() to sort objects
class User:
def __init__(self, user_id):
self.user_id = user_id
def __repr__(self):
# return 'User({})'.format(user_id) // name 'user_id' is not defined
return 'User({})'.format(self.user_id)
users = [User(3), User(250), User(-1)]
print("before sorting, users:",users)
# sorted(users, key= lambda u:u.user_id) //this will not exist when running next line
print("after sorting, users:",sorted(users, key= lambda u:u.user_id))
# an alternative approach is to use operator.attrgetter()
from operator import attrgetter
print("use attrgetter():",sorted(users,key=attrgetter('user_id')))
# attrgetter() is analogous to itemgetter(), they are both a tad bit faster than lambda
# expression, and they both support multiple fields when sorting, for example, you can run
# by_name = sorted(users, key=attrgetter('last_name', 'first_name'))
# Also, max() min() methods are also OK
print("min user_id:",min(users, key=attrgetter('user_id')))
before sorting, users: [User(3), User(250), User(-1)]
after sorting, users: [User(-1), User(3), User(250)]
use attrgetter(): [User(-1), User(3), User(250)]
min user_id: User(-1)
1.15. Grouping Records Together Based on a Field
## Group a list of dictionaries by 'date'
rows = [
{'address': '5412 N CLARK', 'date': '07/01/2012'},
{'address': '5148 N CLARK', 'date': '07/04/2012'},
{'address': '5800 E 58TH', 'date': '07/02/2012'},
{'address': '2122 N CLARK', 'date': '07/03/2012'},
{'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
{'address': '1060 W ADDISON', 'date': '07/02/2012'},
{'address': '4801 N BROADWAY', 'date': '07/01/2012'},
{'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]
# first sort by 'date'
# Since groupby() only examines consecutive items, failing to sort first won’t group
# the records as you want.
from operator import itemgetter
rows.sort(key=itemgetter('date')) # use itemgetter()
print(rows)
# then group by 'date', use itertools.groupby()
from itertools import groupby
print("groupby(rows,itemgetter('date')): \n",groupby(rows,itemgetter('date')))
for date,items in groupby(rows,itemgetter('date')):
print(date)
print(items)
for item in items:
print("\t",item)
print("\n")
# group the data together by dates into a large data structure
# by this way, you don't need to sort the records first
from collections import defaultdict
rows_by_date = defaultdict(list)
rows = [
{'address': '5412 N CLARK', 'date': '07/01/2012'},
{'address': '5148 N CLARK', 'date': '07/04/2012'},
{'address': '5800 E 58TH', 'date': '07/02/2012'},
{'address': '2122 N CLARK', 'date': '07/03/2012'},
{'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
{'address': '1060 W ADDISON', 'date': '07/02/2012'},
{'address': '4801 N BROADWAY', 'date': '07/01/2012'},
{'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]
# group items by 'date' in a multidict
for row in rows:
rows_by_date[row['date']].append(row)
# print rows_by_date in a beautiful way
for date in rows_by_date:
print(rows_by_date[date])
[{'address': '5412 N CLARK', 'date': '07/01/2012'}, {'address': '4801 N BROADWAY', 'date': '07/01/2012'}, {'address': '5800 E 58TH', 'date': '07/02/2012'}, {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}, {'address': '1060 W ADDISON', 'date': '07/02/2012'}, {'address': '2122 N CLARK', 'date': '07/03/2012'}, {'address': '5148 N CLARK', 'date': '07/04/2012'}, {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}]
groupby(rows,itemgetter('date')):
<itertools.groupby object at 0x00000249A87B23B0>
07/01/2012
<itertools._grouper object at 0x00000249C162A670>
{'address': '5412 N CLARK', 'date': '07/01/2012'}
{'address': '4801 N BROADWAY', 'date': '07/01/2012'}
07/02/2012
<itertools._grouper object at 0x00000249C1429370>
{'address': '5800 E 58TH', 'date': '07/02/2012'}
{'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}
{'address': '1060 W ADDISON', 'date': '07/02/2012'}
07/03/2012
<itertools._grouper object at 0x00000249C0FD54C0>
{'address': '2122 N CLARK', 'date': '07/03/2012'}
07/04/2012
<itertools._grouper object at 0x00000249C0FE3940>
{'address': '5148 N CLARK', 'date': '07/04/2012'}
{'address': '1039 W GRANVILLE', 'date': '07/04/2012'}
[{'address': '5412 N CLARK', 'date': '07/01/2012'}, {'address': '4801 N BROADWAY', 'date': '07/01/2012'}]
[{'address': '5148 N CLARK', 'date': '07/04/2012'}, {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}]
[{'address': '5800 E 58TH', 'date': '07/02/2012'}, {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}, {'address': '1060 W ADDISON', 'date': '07/02/2012'}]
[{'address': '2122 N CLARK', 'date': '07/03/2012'}]
1.16. Filtering Sequence Elements
# filter sequence data use a list comprehension
mylist = [1, 4, -5, 10, -7, 2, 3, -1]
print("mylist:",mylist)
print("filter sequence data use a list comprehension:")
print([n for n in mylist if n > 0 ])
print([n for n in mylist if n < 0 ])
# use generator expressions
pos = (n for n in mylist if n > 0)
print("generator expressions:",pos)
for x in pos:
print(x)
# use filter when criteria is complicated or involves exception handling
values = ['1', '2', '-3', '-', '4', 'N/A', '5']
def is_int(val):
try: # pay attention to how to write try-except clauses in Python
x = int(val)
return True
except ValueError:
return False
int_eles = list(filter(is_int,values))
print("values:",values)
print("use filter function to get int elements:",int_eles)
# transform the data when using a list comprehension
mylist = [1, 4, -5, 10, -7, 2, 3, -1]
import math
print("filter all positive elements and do sqrt() on them at the same time:")
print([math.sqrt(n) for n in mylist if n > 0])
# use if-else clause to replace values that don't meet the criteria
print("use if-else clause to replace values that don't meet the criteria:")
# print([n for n in mylist if n > 0 else 0]) WRONG
print([n if n > 0 else 0 for n in mylist])# note that if-else clause should be placed ahead
# use itertools.compress
addresses = [
'5412 N CLARK',
'5148 N CLARK',
'5800 E 58TH',
'2122 N CLARK'
'5645 N RAVENSWOOD',
'1060 W ADDISON',
'4801 N BROADWAY',
'1039 W GRANVILLE',
]
counts = [ 0, 3, 10, 4, 1, 7, 6, 1]
from itertools import compress
more5 = [n > 5 for n in counts]
print("more5:",more5)
selected_addresses = list(compress(addresses, more5))
print("use compress:\n",selected_addresses)
mylist: [1, 4, -5, 10, -7, 2, 3, -1]
filter sequence data use a list comprehension:
[1, 4, 10, 2, 3]
[-5, -7, -1]
generator expressions: <generator object <genexpr> at 0x00000249C2D83C80>
1
4
10
2
3
values: ['1', '2', '-3', '-', '4', 'N/A', '5']
use filter function to get int elements: ['1', '2', '-3', '4', '5']
filter all positive elements and do sqrt() on them at the same time:
[1.0, 2.0, 3.1622776601683795, 1.4142135623730951, 1.7320508075688772]
use if-else clause to replace values that don't meet the criteria:
[1, 4, 0, 10, 0, 2, 3, 0]
more5: [False, False, True, False, False, True, True, False]
use compress:
['5800 E 58TH', '4801 N BROADWAY', '1039 W GRANVILLE']
1.17. Extracting a Subset of a Dictionary
# use dictionary comprehension
prices = {
'ACME': 45.23,
'AAPL': 612.78,
'IBM': 205.55,
'HPQ': 37.20,
'FB': 10.75
}
print("all stocks:",prices)
# pMore200 = {key:value for key:value in prices if value > 200}WRONG
# ^ SyntaxError: invalid syntax
# pMore200 = {key:value for value in prices.values() if value > 200}
# ^ NameError: name 'key' is not defined
# pMore200 = {key:value for key,value in prices.values() if value > 200}
# TypeError: cannot unpack non-iterable float object
pMore200 = {key:value for key,value in prices.items() if value > 200}
print("price more than 200:",pMore200)
# Make a dictionary of tech stocks
tech_names = { 'AAPL', 'IBM', 'HPQ', 'MSFT' }
tech_stocks = {key:value for key,value in prices.items() if key in tech_names}
print("tech_stocks:",tech_stocks)
# some other slower way
pMore200_dict = dict((key,value) for key,value in prices.items() if value > 200)
print(pMore200_dict)
tech_stocks_only_key = {key:prices[key] for key in prices.keys() & tech_names}
print(tech_stocks_only_key)
all stocks: {'ACME': 45.23, 'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2, 'FB': 10.75}
price more than 200: {'AAPL': 612.78, 'IBM': 205.55}
tech_stocks: {'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2}
{'AAPL': 612.78, 'IBM': 205.55}
{'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2}
1.18. Mapping Names to Sequence Elements
# collection,namedtuple()
from collections import namedtuple
Subscriber = namedtuple('Subscriber',['addr','joined'])
sub = Subscriber('jonesy@sample.com','2012-10-18')
print("sub:",sub)
print("sub.addr:",sub.addr)
print("sub.joined:",sub.joined)
# namedtuple supports operations on tuple
print("len(sub):",len(sub))
addr, joined = sub
print(addr,joined)
# use name to access a namedtuple
Stock = namedtuple('Stock',['name','shares','price'])
def compute_cost_by_position(records):
total = 0.0
for rec in records:
total += rec[1] * rec[2]
return total
def compute_cost_by_name(records):
total = 0.0
for rec in records:
s = Stock(*rec)
total += s.shares * s.price
return total
# namedtuple as a replacement of a dictionary, namedtuple is immutable and saves memory
s = Stock('ALIBABA',100,123.45)
print("s:",s)
# s.shares = 75 //AttributeError: can't set attribute
# use ._replace() to creat a new instances with values replaced
s = s._replace(shares = 75)
print("s:",s)
NewStock = namedtuple('NewStock',['name','shares','price','date','time'])
# create a prototype instance
newstock_prototype = NewStock('',0,0.0,None,None)
# Function to convert a dictionary to a NewStock
def dict_to_newstock(s):
return newstock_prototype._replace(**s)
a = {'name': 'ACME', 'shares': 100, 'price': 123.45}
print("a:",a)
print("dict_to_newstock(a):",dict_to_newstock(a))
b = {'name': 'ACME', 'shares': 100, 'price': 123.45, 'date': '12/17/2012'}
print("b:",b)
print("dict_to_newstock(b):",dict_to_newstock(b))
sub: Subscriber(addr='jonesy@sample.com', joined='2012-10-18')
sub.addr: jonesy@sample.com
sub.joined: 2012-10-18
len(sub): 2
jonesy@sample.com 2012-10-18
s: Stock(name='ALIBABA', shares=100, price=123.45)
s: Stock(name='ALIBABA', shares=75, price=123.45)
a: {'name': 'ACME', 'shares': 100, 'price': 123.45}
dict_to_newstock(a): NewStock(name='ACME', shares=100, price=123.45, date=None, time=None)
b: {'name': 'ACME', 'shares': 100, 'price': 123.45, 'date': '12/17/2012'}
dict_to_newstock(b): NewStock(name='ACME', shares=100, price=123.45, date='12/17/2012', time=None)
1.19. Transforming and Reducing Data at the Same Time
# some examples of generator-expression agument
# Calculate the sum of squares
nums = [1,2,3,4]
s = sum(x*x for x in nums)
print(s)
# Determine if any .py files exist in a directory
import os
files = os.listdir('D:\CODE\LearnPythonHardWay')
if any(name.endswith('.py') for name in files):
print('There be python!')
else:
print("Sorry, no python!")
# # Output a tuple as CSV
s = ('ACME', 50, 123.45)
print(s)
# print(','.join(x if type(x)==type("a") else str(x) for x in s))
# print(','.join(str(x) for x in s)) // this statement in book can't run
# TypeError: 'str' object is not callable
# Too difficult to write by myself
# Data reduction across fields of a data structure
portfolio = [
{'name':'GOOG', 'shares': 50},
{'name':'YHOO', 'shares': 75},
{'name':'AOL', 'shares': 20},
{'name':'SCOX', 'shares': 65}
]
min_shares = min(s['shares'] for s in portfolio)
print(min_shares)
## single argument to a generator function
## you don’t need repeated parentheses
# round brackets
s = sum((x * x for x in nums)) # Pass generator-expr as argument
s = sum(x * x for x in nums) # More elegant syntax
## create an extra temporary list, waste extra memory
#square brackets
s = sum([x * x for x in nums])
## max()\min() accept a key argument you can use a generator
# Original: Returns 20
min_shares = min(s['shares'] for s in portfolio)
print("not use key argument in a min generator:",min_shares)
# Alternative: Returns {'name': 'AOL', 'shares': 20}
min_shares = min(portfolio, key=lambda s: s['shares'])
print("use key argument in a min generator:",min_shares)
30
There be python!
('ACME', 50, 123.45)
20
not use key argument in a min generator: 20
use key argument in a min generator: {'name': 'AOL', 'shares': 20}
1.20. Combining Multiple Mappings into a Single Mapping
# collections.chainmap
from collections import ChainMap
a = {'x':1,'y':2}
b = {'y':3,'z':4}
c = ChainMap(a,b)
print(c['x'])
print(c['y'])
print(c['z'])
## a ChainMap simply keeps a list of the underlying mappings and redefines common
## dictionary operations to scan the list.
print("c:",c)
print("len(c):",len(c))
print("list(c.keys()):",list(c.keys()))
print("list(c.values()):",list(c.values()))
## Operations that mutate the mapping always affect the first mapping listed.
print("before change c, a:",a)
print("c:",c)
c['y'] = -2
c['z'] = -4
c['k'] = 100
print("after change c, a:",a)
print("c:",c)
# .new_child or .parent
d = ChainMap()
d['x'] = 1
print(d)
print("d['x']:",d['x'])
d = d.new_child()
d['x'] = 2
print(d)
print("d['x']:",d['x'])
d = d.new_child()
d['y'] = 100
print(d)
print("d['x']:",d['x'])
# d = d.parents()//ypeError: 'ChainMap' object is not callable
d = d.parents
print(d)
print("d['x']:",d['x'])
# merge a dict
a = {'x':1,'y':2}
b = {'y':3,'z':4}
merge_dict = dict(b)
merge_dict.update(a)# the same key, keep value in a
print("merge_dict:",merge_dict)
# don’t get reflected in the merged dictionary
a['x'] = 999
print(a['x'],merge_dict['x'])
a = {'x':1,'y':2}
b = {'y':3,'z':4}
# Notice change to merged dicts
c = ChainMap(a,b)
print("c:",c)
a['x'] = 999
print(a['x'],c['x'])
c['x'] = -999
print(a['x'],c['x'])
1
2
4
c: ChainMap({'x': 1, 'y': 2}, {'y': 3, 'z': 4})
len(c): 3
list(c.keys()): ['y', 'z', 'x']
list(c.values()): [2, 4, 1]
before change c, a: {'x': 1, 'y': 2}
c: ChainMap({'x': 1, 'y': 2}, {'y': 3, 'z': 4})
after change c, a: {'x': 1, 'y': -2, 'z': -4, 'k': 100}
c: ChainMap({'x': 1, 'y': -2, 'z': -4, 'k': 100}, {'y': 3, 'z': 4})
ChainMap({'x': 1})
d['x']: 1
ChainMap({'x': 2}, {'x': 1})
d['x']: 2
ChainMap({'y': 100}, {'x': 2}, {'x': 1})
d['x']: 2
ChainMap({'x': 2}, {'x': 1})
d['x']: 2
merge_dict: {'y': 2, 'z': 4, 'x': 1}
999 1
c: ChainMap({'x': 1, 'y': 2}, {'y': 3, 'z': 4})
999 999
-999 -999
标签:rows,date,name,C1PythonCookBook,key,print,2012 来源: https://www.cnblogs.com/Matrix-250/p/16441073.html