SQL Pandas
select
*
from table_name
df
select
*
from table_name
limit 3
df.head(3)
select
col_name_1
from table_name
where col_name_2 = 'value'
df[df.col_name_2 == 'value'].col_name_1
select
distinct col_name_1
from table_name
df.col_name_1.unique()
select
*
from table_name
where col_name_1 = 'val_1' and col_name_2 = 'val_2'
df[(df.col_name_1 == 'val_1') & (df.col_name_2 == 'val_2')]
select
col_name_1, col_name_3, col_name_3
from table_name
where col_name_4 = 'val_1' and col_name_5 = 'val_2'
df[(df.col_name_4 == 'val_1') & (df.col_name_5 == 'val_2')][['col_name_1', 'col_name_2', 'col_name_3']]
select
*
from table_name
where col_name_1 = 'value'
order by col_name_2
df[df.col_name_1 == 'value'].sort_values('col_name_2')
select
*
from table_name
where col_name_1 = 'value'
order by col_name_2 desc
df[df.col_name_1 == 'value'].sort_values('col_name_2', ascending=False)
select
*
from table_name
where col_name in ('val_1', 'val_2')
df[df.col_name.isin(['val_1', 'val_2'])]
select
*
from table_name
where col_name not in ('val_1', 'val_2')
df[~df.col_name.isin(['val_1', 'val_2'])]
select
col_name_1, col_name_2, count(*)
from table_name
group by col_name_1, col_name_2
order by col_name_1, col_name_2
df.groupby(['col_name_1', 'col_name_2']).size()
select
col_name_1, col_name_2, count(*)
from table_name
group by col_name_1, col_name_2
order by col_name_1, count(*) desc
df.groupby(['col_name_1', 'col_name_2']).size().to_frame('size').reset_index().sort_values(['col_name_1', 'size'], ascending=[True, False])
select
col_name_1, count(*)
from table_name
where col_name_2 = 'val_1'
group by col_name_1
having count(*) > 1000
order by count(*) desc
df[df.col_name_2 == 'val_1'].groupby('col_name_1').filter(lambda g: len(g) > 1000).groupby('col_name_1').size().sort_values(ascending=False)
select
col_name
from table_name
order by size
desc limit 10
df.nlargest(10, columns='col_name')
select
col_name
from table_name
order by size
desc limit 10 offset 10
df.nlargest(20, columns='col_name').tail(10)
select
max(col_name), min(col_name), mean(col_name), median(col_name)
from table_name
df.agg({'col_name': ['min', 'max', 'mean', 'median']})
select
col_name_1, col_name_2, col_name_3, col_name_4
from table_name_1
join table_name_2
on table_name_1.col_name_id_1 = table_name_2.col_name_id_2
where table_name_2.col_name = 'val'
df1.merge(df2[df2.col_name == 'val'][['col_name_id_2']], left_on='col_name_id_1', right_on='col_name_id_2', how='inner')[['col_name_1', 'col_name_2', 'col_name_3', 'col_name_4']]
create table table_name (col_name_1 integer, col_name_2 text);
insert into table_name values (1, 'val_1');
insert into table_name values (2, 'val_2');
insert into table_name values (3, 'val_3');
df1 = pd.DataFrame({'id': [1, 2], 'name': ['val_1', 'val_2']})
df2 = pd.DataFrame({'id': [3], 'name': ['val_3']})
pd.concat([df1, df2]).reset_index(drop=True)
update
table_name
set col_name_1 = 'val_1'
where col_name_2 == 'val_2'
df.loc[df['col_name_2'] == 'val_2', 'col_name_1'] = 'val_1'
delete
from table_name
where col_name = 'val'
df = df[df.col_name != 'val'
df.drop(df[df.col_name == 'val'].index)