python基礎知識之索引與切片詳解
基本索引
In [4]: sentence = 'You are a nice girl'In [5]: L = sentence.split()In [6]: LOut[6]: ['You', 'are', 'a', 'nice', 'girl'] # 從0開始索引In [7]: L[2]Out[7]: 'a' # 負數索引,從列表右側開始計數In [8]: L[-2]Out[8]: 'nice' # -1表示列表最後一項In [9]: L[-1]Out[9]: 'girl' # 當正整數索引超過返回時In [10]: L[100]---------------------------------------------------------------------------IndexError Traceback (most recent call last) <ipython-input-10-78da2f882365> in <module>()----> 1 L[100]IndexError: list index out of range# 當負整數索引超過返回時In [11]: L[-100]---------------------------------------------------------------------------IndexError Traceback (most recent call last) <ipython-input-11-46b47b0ecb55> in <module>()----> 1 L[-100]IndexError: list index out of range# slice 索引In [193]: sl = slice(0,-1,1)In [194]: L[sl]Out[194]: ['You', 'are', 'a', 'nice']In [199]: sl = slice(0,100)In [200]: L[sl]Out[200]: ['You', 'are', 'a', 'nice', 'girl']
嵌套索引
In [14]: L = [[1,2,3],{'I':'You are a nice girl','She':'Thank you!'},(11,22),'My name is Kyles'] In [15]: L Out[15]: [[1, 2, 3], {'I': 'You are a nice girl', 'She': 'Thank you!'}, (11, 22), 'My name is Kyles']# 索引第1項,索引為0In [16]: L[0] Out[16]: [1, 2, 3]# 索引第1項的第2子項In [17]: L[0][1] Out[17]: 2# 索引第2項詞典In [18]: L[1] Out[18]: {'I': 'You are a nice girl', 'She': 'Thank you!'}# 索引第2項詞典的 “She”In [19]: L[1]['She'] Out[19]: 'Thank you!'# 索引第3項In [20]: L[2] Out[20]: (11, 22)# 索引第3項,第一個元組In [22]: L[2][0] Out[22]: 11# 索引第4項In [23]: L[3] Out[23]: 'My name is Kyles'# 索引第4項,前3個字符In [24]: L[3][:3] Out[24]: 'My '
切片
# 切片選擇,從1到列表末尾In [13]: L[1:]Out[13]: ['are', 'a', 'nice', 'girl']# 負數索引,選取列表後兩項In [28]: L[-2:]Out[28]: ['nice', 'girl']# 異常測試,這裡沒有報錯!In [29]: L[-100:]Out[29]: ['You', 'are', 'a', 'nice', 'girl']# 返回空In [30]: L[-100:-200]Out[30]: []# 正向索引In [32]: L[-100:3]Out[32]: ['You', 'are', 'a']# 返回空In [33]: L[-1:3]Out[33]: []# 返回空In [41]: L[0:0]Out[41]: []
看似簡單的索引,有的人不以為然,我們這裡采用精準的數字索引,很容易排查錯誤。若索引是經過計算出的一個變量,就千萬要小心瞭,否則失之毫厘差之千裡。
numpy.array 索引 一維
In [34]: import numpy as npIn [35]: arr = np.arange(10)In [36]: arrOut[36]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])In [40]: arr.shapeOut[40]: (10,)# [0,1) In [37]: arr[0:1]Out[37]: array([0])# [0,0) In [38]: arr[0:0]Out[38]: array([], dtype=int32)# 右側超出范圍之後In [42]: arr[:1000]Out[42]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])# 左側超出之後In [43]: arr[-100:1000]Out[43]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])# 兩側都超出In [44]: arr[100:101]Out[44]: array([], dtype=int32)# []In [45]: arr[-100:-2]Out[45]: array([0, 1, 2, 3, 4, 5, 6, 7])# []In [46]: arr[-100:-50]Out[46]: array([], dtype=int32)
numpy.array 索引 二維
In [49]: arr = np.arange(15).reshape(3,5) In [50]: arr Out[50]: array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14]]) In [51]: arr.shape Out[51]: (3, 5) # axis = 0 增長的方向 In [52]: arr[0] Out[52]: array([0, 1, 2, 3, 4]) # 選取第2行 In [53]: arr[1] Out[53]: array([5, 6, 7, 8, 9]) # axis = 1 增長的方向,選取每一行的第1列 In [54]: arr[:,0] Out[54]: array([ 0, 5, 10]) # axis = 1 增長的方向,選取每一行的第2列 In [55]: arr[:,1] Out[55]: array([ 1, 6, 11]) # 選取每一行的第1,2列 In [56]: arr[:,0:2] Out[56]: array([[ 0, 1], [ 5, 6], [10, 11]]) # 右側超出范圍之後 In [57]: arr[:,0:100] Out[57]: array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14]]) # 左側超出范圍之後 In [62]: arr[:,-10:2] Out[62]: array([[ 0, 1], [ 5, 6], [10, 11]]) # [] In [58]: arr[:,0:0] Out[58]: array([], shape=(3, 0), dtype=int32) # [] In [59]: arr[0:0,0:1] Out[59]: array([], shape=(0, 1), dtype=int32) # 異常 In [63]: arr[:,-10]---------------------------------------------------------------------------IndexError Traceback (most recent call last) <ipython-input-63-2ffa6627dc7f> in <module>()----> 1 arr[:,-10]IndexError: index -10 is out of bounds for axis 1 with size 5
numpy.array 索引 三維…N維
In [67]: import numpy as np In [68]: arr = np.arange(30).reshape(2,3,5) In [69]: arr Out[69]: array([[[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14]], [[15, 16, 17, 18, 19], [20, 21, 22, 23, 24], [25, 26, 27, 28, 29]]]) # 根據 axis = 0 選取 In [70]: arr[0] Out[70]: array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14]]) In [71]: arr[1] Out[71]: array([[15, 16, 17, 18, 19], [20, 21, 22, 23, 24], [25, 26, 27, 28, 29]]) # 根據 axis = 1 選取 In [72]: arr[:,0] Out[72]: array([[ 0, 1, 2, 3, 4], [15, 16, 17, 18, 19]]) In [73]: arr[:,1] Out[73]: array([[ 5, 6, 7, 8, 9], [20, 21, 22, 23, 24]]) # 異常指出 axis = 1 超出范圍 In [74]: arr[:,4]---------------------------------------------------------------------------IndexError Traceback (most recent call last) <ipython-input-74-9d489478e7c7> in <module>()----> 1 arr[:,4]IndexError: index 4 is out of bounds for axis 1 with size 3 # 根據 axis = 2 選取 In [75]: arr[:,:,0] Out[75]: array([[ 0, 5, 10], [15, 20, 25]]) # 降維 In [76]: arr[:,:,0].shape Out[76]: (2, 3) In [78]: arr[:,:,0:2] Out[78]: array([[[ 0, 1], [ 5, 6], [10, 11]], [[15, 16], [20, 21], [25, 26]]]) In [79]: arr[:,:,0:2].shape Out[79]: (2, 3, 2) # 左/右側超出范圍 In [81]: arr[:,:,0:100] Out[81]: array([[[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14]], [[15, 16, 17, 18, 19], [20, 21, 22, 23, 24], [25, 26, 27, 28, 29]]]) # 異常 axis = 0In [82]: arr[100,:,0:100]---------------------------------------------------------------------------IndexError Traceback (most recent call last) <ipython-input-82-21efcc74439d> in <module>()----> 1 arr[100,:,0:100]IndexError: index 100 is out of bounds for axis 0 with size 2
pandas Series 索引
In [84]: s = pd.Series(['You','are','a','nice','girl'])In [85]: sOut[85]:0 You1 are2 a3 nice4 girl dtype: object# 按照索引選擇In [86]: s[0]Out[86]: 'You'# []In [87]: s[0:0]Out[87]: Series([], dtype: object)In [88]: s[0:-1]Out[88]:0 You1 are2 a3 nice dtype: object# 易錯點,ix包含區間為 []In [91]: s.ix[0:0]Out[91]:0 You dtype: objectIn [92]: s.ix[0:1]Out[92]:0 You1 are dtype: object# ix索引不存在indexIn [95]: s.ix[400] KeyError: 400# 按照從0開始的索引In [95]: s.iloc[0]Out[95]: 'You'In [96]: s.iloc[1]Out[96]: 'are'In [97]: s.iloc[100] IndexError: single positional indexer is out-of-boundsIn [98]: s = pd.Series(['You','are','a','nice','girl'], index=list('abcde'))In [99]: sOut[99]: a You b are c a d nice e girl dtype: objectIn [100]: s.iloc[0]Out[100]: 'You'In [101]: s.iloc[1]Out[101]: 'are'# 按照 label 索引In [103]: s.loc['a']Out[103]: 'You'In [104]: s.loc['b']Out[104]: 'are'In [105]: s.loc[['b','a']]Out[105]: b are a You dtype: object# loc切片索引In [106]: s.loc['a':'c']Out[106]: a You b are c a dtype: objectIn [108]: s.indexOut[108]: Index(['a', 'b', 'c', 'd', 'e'], dtype='object')
pandas DataFrame 索引
In [114]: import pandas as pdIn [115]: df = pd.DataFrame({'open':[1,2,3],'high':[4,5,6],'low':[6,3,1]}, index=pd.period_range('30/12/2017',perio ...: ds=3,freq='H'))In [116]: dfOut[116]: high low open2017-12-30 00:00 4 6 12017-12-30 01:00 5 3 22017-12-30 02:00 6 1 3# 按列索引In [117]: df['high']Out[117]:2017-12-30 00:00 42017-12-30 01:00 52017-12-30 02:00 6Freq: H, Name: high, dtype: int64In [118]: df.highOut[118]:2017-12-30 00:00 42017-12-30 01:00 52017-12-30 02:00 6Freq: H, Name: high, dtype: int64In [120]: df[['high','open']]Out[120]: high open2017-12-30 00:00 4 12017-12-30 01:00 5 22017-12-30 02:00 6 3In [122]: df.ix[:] D:\CodeTool\Python\Python36\Scripts\ipython:1: DeprecationWarning: .ix is deprecated. Please use .loc for label based indexing or.iloc for positional indexingIn [123]: df.iloc[0:0]Out[123]:Empty DataFrame Columns: [high, low, open]Index: []In [124]: df.ix[0:0]Out[124]:Empty DataFrame Columns: [high, low, open]Index: [] # 按照 label 索引In [127]: df.indexOut[127]: PeriodIndex(['2017-12-30 00:00', '2017-12-30 01:00', '2017-12-30 02:00'], dtype='period[H]', freq='H')In [128]: df.loc['2017-12-30 00:00']Out[128]: high 4low 6open 1Name: 2017-12-30 00:00, dtype: int64 # 檢查參數In [155]: df.loc['2017-12-30 00:00:11']Out[155]: high 4low 6open 1Name: 2017-12-30 00:00, dtype: int64In [156]: df.loc['2017-12-30 00:00:66'] KeyError: 'the label [2017-12-30 00:00:66] is not in the [index]'
填坑
In [158]: df = pd.DataFrame({'a':[1,2,3],'b':[4,5,6]}, index=[2,3,4])In [159]: dfOut[159]: a b2 1 43 2 54 3 6# iloc 取第一行正確用法In [160]: df.iloc[0]Out[160]: a 1b 4Name: 2, dtype: int64 # loc 正確用法In [165]: df.loc[[2,3]]Out[165]: a b2 1 43 2 5# 註意此處 index 是什麼類型In [167]: df.loc['2'] KeyError: 'the label [2] is not in the [index]'# 索引 Int64IndexOut[172]: Int64Index([2, 3, 4], dtype='int64') # 索引為字符串In [168]: df = pd.DataFrame({'a':[1,2,3],'b':[4,5,6]}, index=list('234'))In [169]: dfOut[169]: a b2 1 43 2 54 3 6In [170]: df.indexOut[170]: Index(['2', '3', '4'], dtype='object') # 此處沒有報錯,千萬註意 index 類型In [176]: df.loc['2']Out[176]: a 1b 4Name: 2, dtype: int64 # ix 是一個功能強大的函數,但是爭議卻很大,往往是錯誤之源 # 咦,怎麼輸出與預想不一致!In [177]: df.ix[2] D:\CodeTool\Python\Python36\Scripts\ipython:1: DeprecationWarning: .ix is deprecated. Please use .loc for label based indexing or.iloc for positional indexing See the documentation here: http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecatedOut[177]: a 3b 6Name: 4, dtype: int64 # 註意開閉區間In [180]: df.loc['2':'3']Out[180]: a b2 1 43 2 5
總結
pandas中ix是錯誤之源,大型項目大量使用它時,往往造成不可預料的後果。0.20.x版本也標記為拋棄該函數,二義性 和 []區間,違背 “Explicit is better than implicit.” 原則。建議使用意義明確的 iloc和loc 函數。
當使用字符串時切片時是 []區間 ,一般是 [)區間
當在numpy.ndarry、list、tuple、pandas.Series、pandas.DataFrame 混合使用時,采用變量進行索引或者切割,取值或賦值時,別太自信瞭,千萬小心錯誤,需要大量的測試。
我在工程中使用matlab的矩陣和python混合使用以上對象,出現最多就是shape不對應,index,columns 錯誤。
最好不要混用不同數據結構,容易出錯,更增加轉化的性能開銷
到此這篇關於python基礎知識之索引與切片的文章就介紹到這瞭,更多相關python索引與切片內容請搜索WalkonNet以前的文章或繼續瀏覽下面的相關文章希望大傢以後多多支持WalkonNet!
推薦閱讀:
- Python Pandas學習之Pandas數據結構詳解
- 利用Pandas索引和選取數據方法詳解
- Python數據分析 Pandas Series對象操作
- python數學建模之三大模型與十大常用算法詳情
- IPython 8.0 Python 命令行交互工具