import pandas as pd
data = {
"name": ["John", "Mary", "John", "Sally", "Mary"],
"age": [40, 30, 40, 50, 30],
"city": ["Bergen", "Oslo", "Stavanger", "Oslo", "Oslo"]
}
df = pd.DataFrame(data)
s = df.duplicated(keep=False)
print(s)
#With the keep parameter set to False, also the first occurence of a duplicate is set to True.
#Column 1 returns True because it is identical to column 4.
#Column 4 returns True because it is identical to column 1.
0 False 1 True 2 False 3 False 4 True dtype: bool