python基礎知識之索引與切片詳解

Posted on 2022-05-14 by WalkonNet

基本索引

In [4]: sentence = 'You are a nice girl'In [5]: L = sentence.split()In [6]: LOut[6]: ['You', 'are', 'a', 'nice', 'girl']

# 從0開始索引In [7]: L[2]Out[7]: 'a'

# 負數索引，從列表右側開始計數In [8]: L[-2]Out[8]: 'nice'

# -1表示列表最後一項In [9]: L[-1]Out[9]: 'girl'

# 當正整數索引超過返回時In [10]: L[100]---------------------------------------------------------------------------IndexError                                Traceback (most recent call last)
<ipython-input-10-78da2f882365> in <module>()----> 1 L[100]IndexError: list index out of range# 當負整數索引超過返回時In [11]: L[-100]---------------------------------------------------------------------------IndexError                                Traceback (most recent call last)
<ipython-input-11-46b47b0ecb55> in <module>()----> 1 L[-100]IndexError: list index out of range# slice 索引In [193]: sl = slice(0,-1,1)In [194]: L[sl]Out[194]: ['You', 'are', 'a', 'nice']In [199]: sl = slice(0,100)In [200]: L[sl]Out[200]: ['You', 'are', 'a', 'nice', 'girl']

嵌套索引

In [14]: L = [[1,2,3],{'I':'You are a nice girl','She':'Thank you!'},(11,22),'My name is Kyles']

In [15]: L
Out[15]:
[[1, 2, 3],
 {'I': 'You are a nice girl', 'She': 'Thank you!'},
 (11, 22),
 'My name is Kyles']# 索引第1項，索引為0In [16]: L[0]
Out[16]: [1, 2, 3]# 索引第1項的第2子項In [17]: L[0][1]
Out[17]: 2# 索引第2項詞典In [18]: L[1]
Out[18]: {'I': 'You are a nice girl', 'She': 'Thank you!'}# 索引第2項詞典的 “She”In [19]: L[1]['She']
Out[19]: 'Thank you!'# 索引第3項In [20]: L[2]
Out[20]: (11, 22)# 索引第3項，第一個元組In [22]: L[2][0]
Out[22]: 11# 索引第4項In [23]: L[3]
Out[23]: 'My name is Kyles'# 索引第4項，前3個字符In [24]: L[3][:3]
Out[24]: 'My '

切片

# 切片選擇,從1到列表末尾In [13]: L[1:]Out[13]: ['are', 'a', 'nice', 'girl']# 負數索引，選取列表後兩項In [28]: L[-2:]Out[28]: ['nice', 'girl']# 異常測試,這裡沒有報錯！In [29]: L[-100:]Out[29]: ['You', 'are', 'a', 'nice', 'girl']# 返回空In [30]: L[-100:-200]Out[30]: []# 正向索引In [32]: L[-100:3]Out[32]: ['You', 'are', 'a']# 返回空In [33]: L[-1:3]Out[33]: []# 返回空In [41]: L[0:0]Out[41]: []

看似簡單的索引，有的人不以為然，我們這裡采用精準的數字索引，很容易排查錯誤。若索引是經過計算出的一個變量，就千萬要小心瞭，否則失之毫厘差之千裡。

numpy.array 索引一維

In [34]: import numpy as npIn [35]: arr = np.arange(10)In [36]: arrOut[36]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])In [40]: arr.shapeOut[40]: (10,)# [0,1) In [37]: arr[0:1]Out[37]: array([0])# [0,0) In [38]: arr[0:0]Out[38]: array([], dtype=int32)# 右側超出范圍之後In [42]: arr[:1000]Out[42]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])# 左側超出之後In [43]: arr[-100:1000]Out[43]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])# 兩側都超出In [44]: arr[100:101]Out[44]: array([], dtype=int32)# []In [45]: arr[-100:-2]Out[45]: array([0, 1, 2, 3, 4, 5, 6, 7])# []In [46]: arr[-100:-50]Out[46]: array([], dtype=int32)

numpy.array 索引二維

In [49]: arr = np.arange(15).reshape(3,5)

In [50]: arr
Out[50]:
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [51]: arr.shape
Out[51]: (3, 5)

# axis = 0 增長的方向
In [52]: arr[0]
Out[52]: array([0, 1, 2, 3, 4])

# 選取第2行
In [53]: arr[1]
Out[53]: array([5, 6, 7, 8, 9])

# axis = 1 增長的方向，選取每一行的第1列
In [54]: arr[:,0]
Out[54]: array([ 0,  5, 10])

# axis = 1 增長的方向，選取每一行的第2列
In [55]: arr[:,1]
Out[55]: array([ 1,  6, 11])


# 選取每一行的第1,2列
In [56]: arr[:,0:2]
Out[56]:
array([[ 0,  1],
       [ 5,  6],
       [10, 11]])

# 右側超出范圍之後
In [57]: arr[:,0:100]
Out[57]:
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

# 左側超出范圍之後
In [62]: arr[:,-10:2]
Out[62]:
array([[ 0,  1],
       [ 5,  6],
       [10, 11]])

# []
In [58]: arr[:,0:0]
Out[58]: array([], shape=(3, 0), dtype=int32)

# []
In [59]: arr[0:0,0:1]
Out[59]: array([], shape=(0, 1), dtype=int32)

# 異常
In [63]: arr[:,-10]---------------------------------------------------------------------------IndexError                                Traceback (most recent call last)
<ipython-input-63-2ffa6627dc7f> in <module>()----> 1 arr[:,-10]IndexError: index -10 is out of bounds for axis 1 with size 5

numpy.array 索引三維…N維

In [67]: import numpy as np

In [68]: arr = np.arange(30).reshape(2,3,5)

In [69]: arr
Out[69]:
array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14]],       [[15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29]]])

# 根據 axis = 0 選取
In [70]: arr[0]
Out[70]:
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [71]: arr[1]
Out[71]:
array([[15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29]])

# 根據 axis = 1 選取
In [72]: arr[:,0]
Out[72]:
array([[ 0,  1,  2,  3,  4],
       [15, 16, 17, 18, 19]])

In [73]: arr[:,1]
Out[73]:
array([[ 5,  6,  7,  8,  9],
       [20, 21, 22, 23, 24]])

# 異常指出 axis = 1 超出范圍
In [74]: arr[:,4]---------------------------------------------------------------------------IndexError                                Traceback (most recent call last)
<ipython-input-74-9d489478e7c7> in <module>()----> 1 arr[:,4]IndexError: index 4 is out of bounds for axis 1 with size 3  # 根據 axis = 2 選取
In [75]: arr[:,:,0]
Out[75]:
array([[ 0,  5, 10],
       [15, 20, 25]])

# 降維
In [76]: arr[:,:,0].shape
Out[76]: (2, 3)

In [78]: arr[:,:,0:2]
Out[78]:
array([[[ 0,  1],
        [ 5,  6],
        [10, 11]],       [[15, 16],
        [20, 21],
        [25, 26]]])

In [79]: arr[:,:,0:2].shape
Out[79]: (2, 3, 2)

# 左/右側超出范圍
In [81]: arr[:,:,0:100]
Out[81]:
array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14]],       [[15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29]]])

# 異常 axis = 0In [82]: arr[100,:,0:100]---------------------------------------------------------------------------IndexError                                Traceback (most recent call last)
<ipython-input-82-21efcc74439d> in <module>()----> 1 arr[100,:,0:100]IndexError: index 100 is out of bounds for axis 0 with size 2

pandas Series 索引

In [84]: s = pd.Series(['You','are','a','nice','girl'])In [85]: sOut[85]:0     You1     are2       a3    nice4    girl
dtype: object# 按照索引選擇In [86]: s[0]Out[86]: 'You'# []In [87]: s[0:0]Out[87]: Series([], dtype: object)In [88]: s[0:-1]Out[88]:0     You1     are2       a3    nice
dtype: object# 易錯點,ix包含區間為 []In [91]: s.ix[0:0]Out[91]:0    You
dtype: objectIn [92]: s.ix[0:1]Out[92]:0    You1    are
dtype: object# ix索引不存在indexIn [95]: s.ix[400]
KeyError: 400# 按照從0開始的索引In [95]: s.iloc[0]Out[95]: 'You'In [96]: s.iloc[1]Out[96]: 'are'In [97]: s.iloc[100]
IndexError: single positional indexer is out-of-boundsIn [98]: s = pd.Series(['You','are','a','nice','girl'], index=list('abcde'))In [99]: sOut[99]:
a     You
b     are
c       a
d    nice
e    girl
dtype: objectIn [100]: s.iloc[0]Out[100]: 'You'In [101]: s.iloc[1]Out[101]: 'are'# 按照 label 索引In [103]: s.loc['a']Out[103]: 'You'In [104]: s.loc['b']Out[104]: 'are'In [105]: s.loc[['b','a']]Out[105]:
b    are
a    You
dtype: object# loc切片索引In [106]: s.loc['a':'c']Out[106]:
a    You
b    are
c      a
dtype: objectIn [108]: s.indexOut[108]: Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

pandas DataFrame 索引

In [114]: import pandas as pdIn [115]: df = pd.DataFrame({'open':[1,2,3],'high':[4,5,6],'low':[6,3,1]}, index=pd.period_range('30/12/2017',perio
     ...: ds=3,freq='H'))In [116]: dfOut[116]:
                  high  low  open2017-12-30 00:00     4    6     12017-12-30 01:00     5    3     22017-12-30 02:00     6    1     3# 按列索引In [117]: df['high']Out[117]:2017-12-30 00:00    42017-12-30 01:00    52017-12-30 02:00    6Freq: H, Name: high, dtype: int64In [118]: df.highOut[118]:2017-12-30 00:00    42017-12-30 01:00    52017-12-30 02:00    6Freq: H, Name: high, dtype: int64In [120]: df[['high','open']]Out[120]:
                  high  open2017-12-30 00:00     4     12017-12-30 01:00     5     22017-12-30 02:00     6     3In [122]: df.ix[:]
D:\CodeTool\Python\Python36\Scripts\ipython:1: DeprecationWarning:
.ix is deprecated. Please use
.loc for label based indexing or.iloc for positional indexingIn [123]: df.iloc[0:0]Out[123]:Empty DataFrame
Columns: [high, low, open]Index: []In [124]: df.ix[0:0]Out[124]:Empty DataFrame
Columns: [high, low, open]Index: []

# 按照 label 索引In [127]: df.indexOut[127]: PeriodIndex(['2017-12-30 00:00', '2017-12-30 01:00', '2017-12-30 02:00'], dtype='period[H]', freq='H')In [128]: df.loc['2017-12-30 00:00']Out[128]:
high    4low     6open    1Name: 2017-12-30 00:00, dtype: int64

# 檢查參數In [155]: df.loc['2017-12-30 00:00:11']Out[155]:
high    4low     6open    1Name: 2017-12-30 00:00, dtype: int64In [156]: df.loc['2017-12-30 00:00:66']
KeyError: 'the label [2017-12-30 00:00:66] is not in the [index]'

填坑

In [158]: df = pd.DataFrame({'a':[1,2,3],'b':[4,5,6]}, index=[2,3,4])In [159]: dfOut[159]:
   a  b2  1  43  2  54  3  6# iloc 取第一行正確用法In [160]: df.iloc[0]Out[160]:
a    1b    4Name: 2, dtype: int64

# loc 正確用法In [165]: df.loc[[2,3]]Out[165]:
   a  b2  1  43  2  5# 註意此處 index 是什麼類型In [167]: df.loc['2']
KeyError: 'the label [2] is not in the [index]'# 索引 Int64IndexOut[172]: Int64Index([2, 3, 4], dtype='int64')

# 索引為字符串In [168]: df = pd.DataFrame({'a':[1,2,3],'b':[4,5,6]}, index=list('234'))In [169]: dfOut[169]:
   a  b2  1  43  2  54  3  6In [170]: df.indexOut[170]: Index(['2', '3', '4'], dtype='object')

# 此處沒有報錯，千萬註意 index 類型In [176]: df.loc['2']Out[176]:
a    1b    4Name: 2, dtype: int64

# ix 是一個功能強大的函數，但是爭議卻很大，往往是錯誤之源
# 咦，怎麼輸出與預想不一致！In [177]: df.ix[2]
D:\CodeTool\Python\Python36\Scripts\ipython:1: DeprecationWarning:
.ix is deprecated. Please use
.loc for label based indexing or.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecatedOut[177]:
a    3b    6Name: 4, dtype: int64

# 註意開閉區間In [180]: df.loc['2':'3']Out[180]:
   a  b2  1  43  2  5

總結

pandas中ix是錯誤之源，大型項目大量使用它時，往往造成不可預料的後果。0.20.x版本也標記為拋棄該函數，二義性和 []區間，違背 “Explicit is better than implicit.” 原則。建議使用意義明確的 iloc和loc 函數。

當使用字符串時切片時是 []區間，一般是 [)區間

當在numpy.ndarry、list、tuple、pandas.Series、pandas.DataFrame 混合使用時，采用變量進行索引或者切割，取值或賦值時，別太自信瞭，千萬小心錯誤，需要大量的測試。

我在工程中使用matlab的矩陣和python混合使用以上對象，出現最多就是shape不對應，index，columns 錯誤。

最好不要混用不同數據結構，容易出錯，更增加轉化的性能開銷

到此這篇關於python基礎知識之索引與切片的文章就介紹到這瞭,更多相關python索引與切片內容請搜索WalkonNet以前的文章或繼續瀏覽下面的相關文章希望大傢以後多多支持WalkonNet！

python基礎知識之索引與切片詳解

目錄

基本索引

嵌套索引

切片

numpy.array 索引一維

numpy.array 索引二維

pandas Series 索引

pandas DataFrame 索引

填坑

總結

推薦閱讀：

發佈留言取消回覆

近期文章

目錄

基本索引

嵌套索引

切片

numpy.array 索引 一維

numpy.array 索引 二維

pandas Series 索引

pandas DataFrame 索引

填坑

總結

推薦閱讀：

發佈留言 取消回覆

近期文章

標籤

numpy.array 索引一維

numpy.array 索引二維

發佈留言取消回覆