SQL | Pandas |
---|---|
select * from table_name |
df |
select * from table_name limit 3 |
df.head(3) |
select col_name_1 from table_name where col_name_2 = 'value' |
df[df.col_name_2 == 'value'].col_name_1 |
select distinct col_name_1 from table_name |
df.col_name_1.unique() |
select * from table_name where col_name_1 = 'val_1' and col_name_2 = 'val_2' |
df[(df.col_name_1 == 'val_1') & (df.col_name_2 == 'val_2')] |
select col_name_1, col_name_3, col_name_3 from table_name where col_name_4 = 'val_1' and col_name_5 = 'val_2' |
df[(df.col_name_4 == 'val_1') & (df.col_name_5 == 'val_2')][['col_name_1', 'col_name_2', 'col_name_3']] |
select * from table_name where col_name_1 = 'value' order by col_name_2 |
df[df.col_name_1 == 'value'].sort_values('col_name_2') |
select * from table_name where col_name_1 = 'value' order by col_name_2 desc |
df[df.col_name_1 == 'value'].sort_values('col_name_2', ascending=False) |
select * from table_name where col_name in ('val_1', 'val_2') |
df[df.col_name.isin(['val_1', 'val_2'])] |
select * from table_name where col_name not in ('val_1', 'val_2') |
df[~df.col_name.isin(['val_1', 'val_2'])] |
select col_name_1, col_name_2, count(*) from table_name group by col_name_1, col_name_2 order by col_name_1, col_name_2 |
df.groupby(['col_name_1', 'col_name_2']).size() |
select col_name_1, col_name_2, count(*) from table_name group by col_name_1, col_name_2 order by col_name_1, count(*) desc |
df.groupby(['col_name_1', 'col_name_2']).size().to_frame('size').reset_index().sort_values(['col_name_1', 'size'], ascending=[True, False]) |
select col_name_1, count(*) from table_name where col_name_2 = 'val_1' group by col_name_1 having count(*) > 1000 order by count(*) desc |
df[df.col_name_2 == 'val_1'].groupby('col_name_1').filter(lambda g: len(g) > 1000).groupby('col_name_1').size().sort_values(ascending=False) |
select col_name from table_name order by size desc limit 10 |
df.nlargest(10, columns='col_name') |
select col_name from table_name order by size desc limit 10 offset 10 |
df.nlargest(20, columns='col_name').tail(10) |
select max(col_name), min(col_name), mean(col_name), median(col_name) from table_name |
df.agg({'col_name': ['min', 'max', 'mean', 'median']}) |
select col_name_1, col_name_2, col_name_3, col_name_4 from table_name_1 join table_name_2 on table_name_1.col_name_id_1 = table_name_2.col_name_id_2 where table_name_2.col_name = 'val' |
df1.merge(df2[df2.col_name == 'val'][['col_name_id_2']], left_on='col_name_id_1', right_on='col_name_id_2', how='inner')[['col_name_1', 'col_name_2', 'col_name_3', 'col_name_4']] |
create table table_name (col_name_1 integer, col_name_2 text); insert into table_name values (1, 'val_1'); insert into table_name values (2, 'val_2'); insert into table_name values (3, 'val_3'); |
df1 = pd.DataFrame({'id': [1, 2], 'name': ['val_1', 'val_2']}) df2 = pd.DataFrame({'id': [3], 'name': ['val_3']}) pd.concat([df1, df2]).reset_index(drop=True) |
update table_name set col_name_1 = 'val_1' where col_name_2 == 'val_2' |
df.loc[df['col_name_2'] == 'val_2', 'col_name_1'] = 'val_1' |
delete from table_name where col_name = 'val' |
df = df[df.col_name != 'val' df.drop(df[df.col_name == 'val'].index) |