from pandas import Series, DataFrame
import pandas as pd

emp = pd.read_csv("C:\\data\\employees.csv")
emp.info()

SELECT * 
FROM emp
WHERE employee = 100
OR employee_id = 101;
SELECT * 
FROM emp
WHERE employee_id IN (100,101);
(emp['EMPLOYEE_ID'] == 100) or (emp['EMPLOYEE_ID'] == 101)        # 오류 : series
(emp['EMPLOYEE_ID'] == 100) | (emp['EMPLOYEE_ID'] == 101)        # True or False
emp[(emp['EMPLOYEE_ID'] == 100) | (emp['EMPLOYEE_ID'] == 101)]        # 인덱스
emp[emp['EMPLOYEE_ID'].isin([100,101])]


isin()
 : SQL IN 연산자와 동일한 의미(== | ==)



SELECT * 
FROM emp
WHERE employee != 100
AND employee_id != 101;
SELECT * 
FROM emp
WHERE employee_id NOT IN (100,101);
emp[(emp['EMPLOYEE_ID'] != 100) & (emp['EMPLOYEE_ID'] != 101)]
~emp['EMPLOYEE_ID'].isin([100,101])
emp[~emp['EMPLOYEE_ID'].isin([100,101])]


~ isin() : SQL NOT IN 연산자와 동일한 의미(!= & !=)
~ : Not 의미


emp[(emp['JOB_ID'] == 'AD_VP') | (emp['JOB_ID'] == 'AD_PRES')] [['LAST_NAME','SALARY','JOB_ID']]
emp.loc[(emp['JOB_ID'] == 'AD_VP') | (emp['JOB_ID'] == 'AD_PRES'), ['LAST_NAME','SALARY','JOB_ID']]
emp.loc[emp['JOB_ID'].isin(['AD_VP','AD_PRES']), ['LAST_NAME','SALARY','JOB_ID']]

emp[(emp['JOB_ID'] != 'AD_VP') & (emp['JOB_ID'] != 'AD_PRES')] [['LAST_NAME','SALARY','JOB_ID']]
emp.loc[(emp['JOB_ID'] != 'AD_VP') & (emp['JOB_ID'] != 'AD_PRES'), ['LAST_NAME','SALARY','JOB_ID']]
emp.loc[~emp['JOB_ID'].isin(['AD_VP','AD_PRES']), ['LAST_NAME','SALARY','JOB_ID']]

emp.loc[(emp['COMMISSION_PCT'].notnull())&(emp['DEPARTMENT_ID'].isnull()),['EMPLOYEE_ID','COMMISSION_PCT','DEPARTMENT_ID']]

[문제] DEPARTMENT_ID가 50,60 부서 사원들 중 SALARY가 5000 이상인 사원들의
    LAST_NAME, DEPARTMENT_ID, SALARY 출력.
emp[((emp['DEPARTMENT_ID']==50) | (emp['DEPARTMENT_ID']==60)) & (emp['SALARY']>=5000)][['LAST_NAME', 'DEPARTMENT_ID', 'SALARY']]

result = emp.loc[(emp['DEPARTMENT_ID'].isin([50,60]) & (emp['SALARY']>=5000)), ['LAST_NAME', 'DEPARTMENT_ID', 'SALARY']]

result = result.sort_values(by=['DEPARTMENT_ID','SALARY'], ascending=[True,False]).reset_index(drop=True)
        # result.sort_values(부서ID 오름차순, 급여 내림차순).reset_index(열삭제)
result






from pandas import Series, DataFrame
import pandas as pd
import numpy as np
from numpy import nan as NA

■ null, 결측치(값)

s1 = Series([1,2,3,None,5])
s2 = Series([1,2,3,np.NaN,5])
s3 = Series([1,2,3,np.nan,5])
s4 = Series([1,2,3,NA,5])


# isnull() : NaN 체크
s1.isnull()
s2.isnull()
s3.isnull()
s4.isnull()
pd.isnull(s1)
s1[s1.isnull()]

# notnull() : NaN 아닌 체크
s1.notnull()
s2.notnull()
s3.notnull()
s4.notnull()
pd.notnull(s1)
s1[s1.notnull()]

# fillna() : NaN 채우는 방법
s1.fillna(0)    # 결측치를 0으로 채운다

s2.fillna(method='ffill')       # 앞의 있는값으로 채운다
s2.fillna(method='pad')         # 앞에 있는 값으로 채운다

s2.fillna(method='bfill')       # 뒤에 있는 값으로 채운다.
s2.fillna(method='backfill')    # 뒤에 있는 값으로 채운다.


df = DataFrame(data = [[1,2,3],[1,None,np.NaN],[np.nan,NA,NA],[NA,2,3]], columns=['a','b','c'])
df

df.fillna(0)    # NaN 값을 다른 값으로 채우기(미리보기)
df['a'].fillna(0)
df['b'].fillna(0)
df['c'].fillna(0)

df.fillna({'a':10, 'b':20, 'c':30})


# dropna()  : NaN 제거
df.dropna(how='any', axis=0)    # NaN이 하나라도 있으면 그 행은 삭제 (기본값)
df.dropna(how='all', axis=0)    # NaN이 전부 있는 행만 삭제

df.dropna(how='any', axis=1)    # NaN이 하나라도 있으면 그 열은 삭제
df.dropna(how='all', axis=1)    # NaN이 전부 있는 열만 삭제

[문제] COMMISSION_PCT 값이 NaN 인 사원중에 SALARY 값이 5000~10000인 사원의 정보 출력.
emp.loc[ (emp['COMMISSION_PCT'].isnull()) & ((emp['SALARY']>=5000) & (emp['SALARY']<=10000)) ]



def square(arg):
    return arg**2 

lst = [1,2,3]
square(lst[0])  # 1
square(lst[1])  # 4
square(lst[2])  # 9

[ square(i) for i in lst]

list(map(square, lst))
# 메모리에서 lst값을 square 함수에 적용, 반복문보다 빠르다.

list(map(lambda arg:arg**2, lst))

s = Series([1,2,3])
square(s) 0    1
1    4
2    9
dtype: int64
list(map(square,s)) [1, 4, 9]
Series(map(square,s)) 0    1
1    4
2    9
dtype: int64
Series(map(lambda arg:arg**2,s)) 0    1
1    4
2    9
dtype: int64
s.apply(square) 0    1
1    4
2    9
dtype: int64
s.apply(lambda arg:arg**2) 0    1
1    4
2    9
dtype: int64

 

df = DataFrame([[1,2,3],[4,5,6]])
df.apply(square)     0   1   2
0   1   4   9
1  16  25  36
square(df)
df.apply(lambda arg:arg**2)



apply() : 함수를 적용한다


if comm = null :
    salary * 12
else :
    salary * 12 + comm

emp['SALARY'] * 12 + emp['COMMISSION_PCT'].fillna(0)


Series(1).isnull() 0    False
dtype: bool
# bool이지만 Series(1차원배열)
Series(1).isnull().bool() False

# bool. 단일값 스칼라 타입
pd.isnull(1) False

# bool

 

def nvl(arg):
    if Series(arg).isnull().bool():
        return 0
    else:
        return arg
def nvl(arg):
    if pd.isnull(arg):
        return 0
    else:
        return arg


emp['SALARY'] * 12 + emp['COMMISSION_PCT'].apply(nvl)





'Smith'.startswith('S')             # 대문자 S로 시작하는지 체크. 단일값

emp['LAST_NAME'].startswith('S')    # 오류 : 시리즈타입으로 다수 값이 들어옴

emp['LAST_NAME'][0].startswith('S')     # 인덱싱을 통해 단일값나오게 사용

emp['LAST_NAME'].apply(lambda arg:arg.startswith('S'))

emp[emp['LAST_NAME'].apply(lambda arg:arg.startswith('S'))]

emp[emp['LAST_NAME'].apply(lambda arg:arg[0] == 'S')]

emp[emp['LAST_NAME'].str.startswith('S')]


str : 판다스에서 사용가능한 문자함수

s = Series([' big','data ',' big data '])
s
0           big
1         data 
2     big data 
dtype: object


len('data')     # 문자길이

s.apply(lambda arg:len(arg))

s.str.len()     # 문자길이

s.str.strip()   # 문자의 앞뒤 공백제거
s.str.strip().str.len()

s.str.lstrip()  # 앞 공백 제거
s.str.rstrip()  # 뒤 공백 제거

s.str.lower()   # 전부 소문자로 변환
s.str.upper()   # 전부 대문자로 변환
s.str.strip().str.capitalize()  # 공백제거 후  -> 문장의 첫글자는 대문자, 나머지 소문자

s.str.title()   # 단어별 첫글자만 대문자, 나머지 소문자

s.str.swapcase()    # 소문자-대문자 반전

s.str.replace('big','BIG')  # 문자 치환

s.str.index('a')  # 문자의 위치반환, 찾는 문자가 없으면 오류

s.str.find('a')  # 문자의 첫 위치반환, 없으면 -1
s.str.find('a',0)   # 기본값
s.str.find('a', 2)    # 두번째 자리부터 위치 검색
s.str.find('a', 2).values # 값만 확인 , array([-1,  3,  6], dtype=int64)

s.str.findall('a')  # 문자를 찾아서 해당 문자열을 반환

s.str.startswith('b')   # 문자열로 시작되는지 체크

s.str.endswith('a')     # 문자열로 끝나는지 체크

s.str.contains('a')  # 문자열이 포함되어 있는지 체크(위치무관)

s1 = s.str.replace('a','A')
s1.str.contains('a')
s1.str.contains('A')
s1.str.contains('a',case=True)     # 대소문자 구분, 기본값
s1.str.contains('A',case=False)    # 대소문자 구분하지않고 찾는다.

[문제] a 글자가 두번 이상 나온 LAST_NAME을 출력해주세요.
emp[ emp['LAST_NAME'] .str.findall('a') .str.len() >= 2 ] ['LAST_NAME']



emp['LAST_NAME'].str.get(0)     # 지정된 위치(인덱스)의 값을 반환하는 함수
emp['LAST_NAME'].str.get(1) == 'i'  # 두번째 위치에 소문자 i가 있는가
emp[emp['LAST_NAME'].str.get(1) == 'i']['LAST_NAME']

emp[emp['LAST_NAME'].str.get(2).isin(['a','e'])]['LAST_NAME']
# 세번째 위치에 a 또는 e 가 있는가

emp['LAST_NAME'].str.slice(start=0,stop=2)
# 지정된 인덱스 사이값을 반환하는 함수

emp['LAST_NAME'].str.slice(start=2,stop=3)  # 3번째 인덱스 값
emp[emp['LAST_NAME'].str.slice(start=2,stop=3).isin(['a','e'])]['LAST_NAME']
# 0,1,2 인덱스2번부터 3번째 값


obj = Series(['big','big data'])
obj.str.title()
0         Big
1    Big Data
dtype: object


'B' + obj.str.slice(start=1) # 인덱스1에서부터 나머지

obj.str.slice_replace(start=0,stop=1,repl='B')
# 인덱스 사이값을 다른값으로 수정하는 함수

obj.str.pad(width=20, side='left', fillchar='_')  # 왼쪽으로 정렬 후 오른쪽 나머지를 _ 채우기
obj.str.pad(width=20,side='right',fillchar='_')  # 오른쪽으로 정렬 후 왼쪽 나머지를 _ 채우기
obj.str.center(width=20,fillchar='_')  # 가운데 정렬 후 양쪽 나머지를 _ 채우기
# 지정된 길이 패딩

obj.str.ljust(width=20,fillchar='_') # 왼쪽 정렬 후 나머지 채우기
obj.str.rjust(width=20,fillchar='_') # 오른쪽 정렬 후 나머지 채우기

obj.str.rjust(width=20,fillchar='0')
obj.str.zfill(width=20)    # 문자앞에 0으로 채우기

emp['SALARY'].str.zfill(width=15)  # 문자함수라 숫자는 사용불가
emp['LAST_NAME'].str.zfill(width=15)


obj = Series(['itwill@itwill.com','itwill'])
obj
0    itwill@itwill.com
1               itwill
dtype: object
obj.str.split('@')
obj.str.split('@',expand=False) # 기본값
# 문자를 기준으로 분리하는 함수
0    [itwill, itwill.com]
1                [itwill]
dtype: object
obj.str.split('@',expand=True)         0           1
0  itwill  itwill.com
1  itwill        None
obj.str.split('@',expand=True)[0]
# 0번 열 확인
0    itwill
1    itwill
Name: 0, dtype: object
obj.str.partition('@')
# 기준으로 분리 후 왼쪽 정렬
        0  1           2
0  itwill  @  itwill.com
1  itwill               
obj.str.rpartition('@')
# 기준으로 분리 후 오른쪽 정렬
        0  1           2
0  itwill  @  itwill.com
1                 itwill

 

obj = Series(['1/7','7','seven','SEVEN','Seven','seven7','칠','칠 7',' ','7#'])
obj
0       1/7
1         7
2     seven
3     SEVEN
4     Seven
5    seven7
6         칠
7       칠 7
8          
9        7#
dtype: object
obj[obj.str.isalnum()]
# 한글, 알파벳, 숫자로만 구성여부 체크
1         7
2     seven
3     SEVEN
4     Seven
5    seven7
6         칠
dtype: object
obj[obj.str.isalpha()]
# 한글, 알파벳만 구성여부 체크
2    seven
3    SEVEN
4    Seven
6        칠
dtype: object
obj[obj.str.isdigit()]
# 숫자로만 구성여부 체크
1    7
dtype: object
obj[obj.str.isnumeric()]
# 숫자로만 구성여부 체크
1    7
dtype: object
obj[obj.str.islower()]
# 소문자 체크
2     seven
5    seven7
dtype: object
obj[obj.str.isupper()]
# 대문자 체크
3    SEVEN
dtype: object
obj[obj.str.istitle()]
# 첫글자 대문자, 나머지 소문자 체크
4    Seven
dtype: object
obj[obj.str.isspace()]
# 공백여부 체크
8     
dtype: object

 




■ 그룹함수

obj = Series([2,3,4,5,6,None,100])
obj

obj.sum()
obj.sum(skipna=True)    # NaN 먼저 제거 후 계산, 기본값
obj.sum(skipna=False)

obj.mean()      # 평균
obj.median()    # 중앙값
obj.var()       # 분산. 편차제곱이라 단위가 커짐
obj.std()       # 표준편차
obj.max()       # 최대값
obj.min()       # 최소값
obj.idxmax()    # 최대값의 인덱스위치
obj.idxmin()    # 최소값의 인덱스위치
obj.argmax()    # 최대값의 인덱스
obj.argmin()    # 최소값의 인덱스


obj = Series([2,3,100,4,5,6,None,100])
obj

obj.max()       
obj.idxmax()
obj.argmax()    

obj[obj == obj.max()].index         # 최대값의 모든 위치


obj.cumsum()    # 누적합
obj.cumprod()   # 누적곱


obj = Series([80,10,2,3,100,4,5,1,None,100])
obj

obj.cummin()    # 누적 최소값
obj.cummax()    # 누적 최대값

obj.count()     # 9, NaN 제외한 건수
len(obj)        # 10, NaN 포함한 건수


df = DataFrame([[60,80,70],[100,80,90],[60,90,50]],
               index=['홍길동','박찬호','손흥민'],
               columns=['영어','수학','국어'])
df

df.sum()
df.sum(axis=0,skipna=True) # 열의 합 기본값
axis=0    # 행기준, 각 칼럼의 합
axis='rows'

df.sum(axis=1)  # 행의 합
axis=1    # 열기준, 각 인덱스별 합
axis='columns'

skipna=True    # NaN 무시
skipna=False    # NaN 포함

# 추가
df.loc['제임스','영어'] = 100
df.loc['제임스','수학'] = None
df.loc['제임스','국어'] = 90
df

df['영어'].sum()


SELECT sum(salary) FROM emp;
emp['SALARY'].sum()

SELECT department_idsum(salary) FROM emp GROUP BY department_id;
emp['SALARY'].groupby(emp['DEPARTMENT_ID']).sum()
emp.groupby('DEPARTMENT_ID')['SALARY'].sum(0)

emp.groupby('DEPARTMENT_ID')['SALARY'].mean()
emp.groupby('DEPARTMENT_ID')['SALARY'].median()
emp.groupby('DEPARTMENT_ID')['SALARY'].var(0)
emp.groupby('DEPARTMENT_ID')['SALARY'].std()
emp.groupby('DEPARTMENT_ID')['SALARY'].max()
emp.groupby('DEPARTMENT_ID')['SALARY'].min()



SELECT department_id, sum(salary), avg(salary), max(salary), min(salary)
FROM emp
GROUP BY department_id;

emp['SALARY'].groupby(emp['DEPARTMENT_ID']).aggregate(['sum','mean','max','min'])

emp.groupby('DEPARTMENT_ID')['SALARY'].aggregate(['sum','mean','max','min'])



SELECT department_id, sum(salary), max(hire_date), min(hire_date)
FROM emp
GROUP BY department_id;

emp.groupby('DEPARTMENT_ID').aggregate({'SALARY':'sum','HIRE_DATE':['max','min']})

# DEPARTMENT_ID 결측값을 다른 값을 수정한 수 수행
emp.DEPARTMENT_ID = emp.DEPARTMENT_ID.fillna(999)
emp['SALARY'].groupby(emp['DEPARTMENT_ID']).sum()





emp = pd.read_csv("C:\\data\\employees.csv")
emp.info()


SELECT department_id, job_id, sum(salary)
FROM emp
GROUP BY department_id, job_id;

emp['SALARY'].groupby([emp['DEPARTMENT_ID'],emp['JOB_ID']]).sum()

x = emp['SALARY'].groupby([emp['DEPARTMENT_ID'].fillna(999),emp['JOB_ID']]).sum()
# fillna(999) : 부서아이디 1명 null값 -> 999 부여
type(x)     # Series, 다중인덱스

x.index

y = x.reset_index()
y
type(y)     # DataFrame

y.set_index('DEPARTMENT_ID')
y.set_index(['DEPARTMENT_ID','JOB_ID'])


x.unstack()     # 가로 방향전환
x.unstack().fillna(0)


그룹별 출력
for name, group in emp.groupby('DEPARTMENT_ID'):
    print(name)
    print(group)

for name, group in emp.groupby('DEPARTMENT_ID')['LAST_NAME']:
    print(name)
    print(group)

for name, group in emp.groupby('DEPARTMENT_ID')[['LAST_NAME','SALARY']]:
    print(name)
    print(group)

for name, group in emp[['LAST_NAME','SALARY']].groupby('DEPARTMENT_ID'):
    print(name)
    print(group)
>> 'DEPARTMENT_ID'그룹기준이 앞에 없으면 오류

for name, group in emp[['LAST_NAME','SALARY','DEPARTMENT_ID']].groupby('DEPARTMENT_ID'):
    print(name)
    print(group)





dept = pd.read_csv("C:\\data\\dept.csv")
dept.info()

emp.info()



■ merge
- SQL에서 JOIN과 같다.
- 서로 다른 데이터프레임의 특정한 열을 기준으로 연결하는 방법

# EQUI JOIN(등가조인) , INNER JOIN, SIMPLE JOIN
SELECT * FROM emp, dept WHERE emp.department_id = dept.department_id;


# NATURAL JOIN : 양쪽 데이터프레임의 동일한 이름의 모든 컬럼을 조인조건 술어를 수행한다.

SELECT * 
FROM emp NATURAL JOIN dept;
pd.merge(emp,dept)



# JOIN USING : 기준칼럼을 지정해서 조인조건 술어를 수행한다.
pd.merge(emp,dept,on='DEPARTMENT_ID')

SELECT * 
FROM emp JOIN dept USING(department_id);


# JOIN ON : 조인조건술어를 직접 명시한다.
pd.merge(emp,dept, left_on='DEPARTMENT_ID', right_on='DEPARTMENT_ID')

x = pd.merge(emp,dept, left_on='DEPARTMENT_ID', right_on='DEPARTMENT_ID', how='inner')
# how='inner' : 기본값

SELECT * FROM emp JOIN dept ON emp.department_id = dept.department_id;



# OUTER JOIN : 키 값이 불일치되는 데이터를 출력하는 조인

# LEFT OUTER JOIN

SELECT *
FROM emp, dept
WHERE emp.department_id = dept.department_id(+);
SELECT *
FROM emp LEFT OUTER JOIN dept
ON emp.department_id = dept.department_id;
pd.merge(emp, dept, left_on='DEPARTMENT_ID', right_on='DEPARTMENT_ID', how='left')



# RIGHT OUTER JOIN

SELECT *
FROM emp, dept
WHERE emp.department_id(+) = dept.department_id;
SELECT *
FROM emp RIGHT OUTER JOIN dept
ON emp.department_id = dept.department_id;
pd.merge(emp, dept, left_on='DEPARTMENT_ID', right_on='DEPARTMENT_ID', how='right')



# FULL OUTER JOIN

SELECT *
FROM emp, dept
WHERE emp.department_id(+) = dept.department_id
UNION
SELECT *
FROM emp, dept
WHERE emp.department_id = dept.department_id(+);
SELECT *
FROM emp FULL OUTER JOIN dept 
ON emp.department_id = dept.department_id;
pd.merge(emp, dept, left_on='DEPARTMENT_ID', right_on='DEPARTMENT_ID', how='outer')






dept.info()

dept.set_index('DEPARTMENT_ID')
dept

dept.set_index('DEPARTMENT_ID', inplace=True)   # inplace=True 데이터프레임 바로 적용
dept

x = pd.merge(emp,dept, left_on='DEPARTMENT_ID', right_index=True, how='inner')
x[['LAST_NAME','DEPARTMENT_NAME']]
>> 오른쪽 데이터는 오른쪽


emp.set_index('DEPARTMENT_ID', inplace=True)
emp.info()
emp.index

x = pd.merge(emp,dept, left_index=True, right_index=True, how='inner')
x[['LAST_NAME','DEPARTMENT_NAME']]


x = pd.merge(emp,dept, left_index=True, right_index=True, how='left')
x[['LAST_NAME','DEPARTMENT_NAME']]

x = pd.merge(emp,dept, left_index=True, right_index=True, how='right')
x[['LAST_NAME','DEPARTMENT_NAME']]

x = pd.merge(emp,dept, left_index=True, right_index=True, how='outer')
x[['LAST_NAME','DEPARTMENT_NAME']]



emp.info()
emp.index
emp = emp.reset_index()
emp.reset_index(inplace=True)
emp

dept.reset_index(inplace=True)
dept


[문제] 부서이름별 급여의 총액을 출력해주세요.
emp['SALARY'].groupby(emp['DEPARTMENT_ID']).sum()
dept_sal = emp['SALARY'].groupby(emp['DEPARTMENT_ID']).sum()
type(dept_sal)
dept_sal.index
result = pd.merge(dept_sal,dept,left_index=True,right_on='DEPARTMENT_ID')
result[['DEPARTMENT_NAME','SALARY']]

 

[문제] 부서이름별 급여의 총액을 출력해주세요. 소속부서가 없는 사원들의 총액도 출력.
emp['SALARY'].groupby(emp['DEPARTMENT_ID']).sum()
dept_sal = emp['SALARY'].groupby(emp['DEPARTMENT_ID'].fillna(999)).sum()
type(dept_sal)
dept_sal.index
result = pd.merge(dept_sal,dept,left_index=True,right_on='DEPARTMENT_ID',how='left')
result[['DEPARTMENT_NAME','SALARY']]

emp['DEPARTMENT_ID'].fillna(999) : 부서ID 결측값에 999 대입

 

조인은 2개씩!

loc = pd.read_csv("C:\\data\\loc.csv")
job = pd.read_csv("C:\\data\\job.csv")

emp.info()
loc.info()
job.info()

x = pd.merge(dept,loc,on='LOCATION_ID')
x.info()

pd.merge(x,emp,on='DEPARTMENT_ID',how='right')[['EMPLOYEE_ID','DEPARTMENT_NAME','CITY']]
how='right' : 부서없는사람포함


# self join
SELECT w.employee_id, w.last_name, m.last_name
FROM emp w, emp m
WHERE w.manager_id = m.employee_id;

pd.merge(emp,emp,left_on='MANAGER_ID',right_on='EMPLOYEE_ID')[['LAST_NAME_x','LAST_NAME_y']]
현재 사원들의 관리자

pd.merge(emp,emp,left_on='MANAGER_ID',right_on='EMPLOYEE_ID',how='left')[['LAST_NAME_x','LAST_NAME_y']]
관리자 없는사람도 포함



# non equi join
SELECT e.employee_id, e.salary, j.grade_level
FROM emp e, job j
WHERE e.salary between j.LOWEST_SAL AND j.HIGHEST_SAL;

job.info()
job

job[(10000>=job.LOWEST_SAL)&(10000<=job.HIGHEST_SAL)]['GRADE_LEVEL']    # series
job[(10000>=job.LOWEST_SAL)&(10000<=job.HIGHEST_SAL)]['GRADE_LEVEL'].values[0]
...반복

grade = emp['SALARY'].apply(lambda arg:job[(arg>=job.LOWEST_SAL)&(arg<=job.HIGHEST_SAL)]['GRADE_LEVEL'].values[0])

DataFrame({'이름':emp['LAST_NAME'],'급여':emp['SALARY'],'급여등급':grade})