其他分享
首页 > 其他分享> > pandas tricks

pandas tricks

作者:互联网

# Check for equality
# 创建DataFrame
df = pd.DataFrame({'a':[10, 40, np.nan], 'b':[10, 40, np.nan]})
print('data:\n', df)
print()
# 查看a列与b列是否相同
print('df.a == df.b:')
print(df.a == df.b)
print()
# 查看两个空值是否相同,返回False
print('np.nan == np.nan:')
print(np.nan == np.nan)
# 可以用equals()方法
print()
print('df.a.equals(df.b):')
print(df.a.equals(df.b))
print()
# 可以使用assert_series_equal函数
print('pd.testing.assert_series_equal(df.a, df.b, check_names=False, check_dtype=False):')
print(pd.testing.assert_series_equal(df.a, df.b, check_names=False, check_dtype=False))
print()
# assert_frame_equal函数查看是否相同,异常则输出
df_new = df.copy()
pd.testing.assert_frame_equal(df, df_new)
# Use NumPy without importing NumPy
pd.np.random.seed(0)
d1 = pd.DataFrame(pd.np.random.rand(2, 4))
print('d1:\n', d1)
d1.loc[0,0] = pd.np.nan
print('d1:\n', d1)
# Calculate memory usage
df.info(memory_usage='deep')
# calculate memory used by each column
df.memory_usage(deep=True)
# Convert one set of values to another
df['c'] = df.a.factorize()[0]
print(df)
df = pd.DataFrame([[12, 25, 2019, 'christmas'], [11, 28, 2019, 'thanksgiving']],
columns=['month', 'day', 'year', 'holiday'])
print(df)
df['date'] = pd.to_datetime(df[['month', 'day', 'year']])
print(df)
# Create an example DataFrame
pd.util.testing.makeDataFrame().head()
pd.util.testing.makeMissingDataframe().head()
df = pd.util.testing.makeTimeDataFrame().head()
df.resample('M').A.mean()
df = pd.util.testing.makeTimeDataFrame().head()
df.reset_index(inplace=True)
df.resample('D', on='index').A.mean()
# 保持CSV可以压缩
df.to_csv('dataframe.csv.zip')
df.to_csv('dataframe.csv.gz')
df.to_csv('dataframe.csv.bz2')
df.to_csv('dataframe.csv.xz')
# Fill missing values using interpolation
df = pd.DataFrame({'a':[100, 120, 130, np.nan, 140], 'b':[9, 9, np.nan, 7.5, 6.5]})
df.index = pd.to_datetime(['2019-01', '2019-02', '2019-03', '2019-04', '2019-05'])
df
df.interpolate()
# Check for duplicate merge keys
left = pd.DataFrame({'color': ['green', 'yellow', 'red'], 'num':[1, 2, 3]})
left
right = pd.DataFrame({'color': ['green', 'yellow', 'pink', 'green'], 'size':['S', 'M', 'L', 'XL']})
right
pd.merge(left, right, how='inner', validate='one_to_many')
# 创建其他数据集方法
[x for x in dir(pd.util.testing) if x.startswith('make')]

标签:df,tricks,testing,nan,np,pd,print,pandas
来源: https://www.cnblogs.com/liyiyu/p/16268864.html