1. 파일 읽기 및 저장하기

1-1. fileinput

텍스트 파일을 읽고, 쓰고 저장하는 기능을 편리하게 사용할 수 있도록 해주는 라이브러리
여러개의 파일을 읽어서 수정할 수 있음

import fileinput
import os
import glob

# 현재 경로 확인
os.getcwd() # 'C:\\hhj\\python\\jupyter'

# 디렉토리 내 파일 확인
os.listdir(os.getcwd()) # ['.ipynb_checkpoints', '25. 파일 입출력 라이브러리.ipynb']

# 경로 설정
path = 'sample/'

# glob() : 해당 경로의 파일 이름을 리스트로 반환

glob.glob(os.path.join(path, '*.txt'))

'''
['sample\\새파일1.txt',
 'sample\\새파일2.txt',
 'sample\\새파일3.txt',
 'sample\\새파일4.txt',
 'sample\\새파일5.txt']
'''

with fileinput.input(glob.glob(os.path.join(path, '*.txt'))) as f:
    for line in f:
        print(line)

txt_files = glob.glob(os.path.join(path, '*.txt'))

print(txt_files)

'''
['sample\\새파일1.txt',
 'sample\\새파일2.txt',
 'sample\\새파일3.txt',
 'sample\\새파일4.txt',
 'sample\\새파일5.txt']
'''

# 각 파일의 첫번째 라인을 찾아 변경하기
with fileinput.input(txt_files, inplace = True) as f:
    for line in f:
        if f.isfirstline(): # 첫번째 라인인지 검사해주는 메소드
            print('첫번째 라인입니다', end = '\n')
        else:
            print(line, end = '')

# 검색된 라인 변경하기
with fileinput.input(txt_files, inplace = True) as f:
    for line in f:
        if line == '첫번째 라인입니다\n': # 첫번째 라인인지 검사해주는 메소드
            print('1번째 라인입니다', end = '\n')
        else:
            print(line, end = '')

# 키워드 포함 라인 변경하기
with fileinput.input(txt_files, inplace = True) as f:
    for line in f:
        if '1번째' in line: # 첫번째 라인인지 검사해주는 메소드
            print('첫번째 줄입니다', end = '\n')
        else:
            print(line, end = '')

# 텍스트 치환하기
with fileinput.input(txt_files, inplace = True) as f:
    for line in f:
        if '12번째' in line: # 첫번째 라인인지 검사해주는 메소드
            print(line.replace('12번째', '열두번째'), end = '')
        else:
            print(line, end = '')

1-2. pickle

파이썬에서 사용하는 딕셔너리, 리스트, 클래스, 등의 자료형을 변환 없이 그대로 파일로 저장하고 불러올 때 사용하는 모듈

import pickle

data = ['apple', 'banana', 'orange']

# 파일 저장
with open('list.pkl', 'wb') as f:
    pickle.dump(data, f)

# 파일 읽기
with open('list.pkl', 'rb') as f:
    data = pickle.load(f)

print(type(data)) # list
print(data) # ['apple', 'banana', 'orange']

# 딕셔너리 저장
data = {}
data[1] = {'id':1, 'userid':'apple', 'name':'김사과', 'gender':'여자', 'age' : 20}

# 파일 저장
with open('dict.pkl', 'wb') as f:
    pickle.dump(data, f)

# 파일 읽기
with open('dict.pkl', 'rb') as f:
    data = pickle.load(f)

print(data)
# {1: {'id': 1, 'userid': 'apple', 'name': '김사과', 'gender': '여자', 'age': 20}}

2. 파일 찾기, 복사, 이동하기

2-1. 파일 확장자로 찾기

os.getcwd() # 'C:\\hhj\\python\\jupyter'

for filename in glob.glob('*.txt'):
    print(filename)
    
# 주피터노트북.txt

# txt 파일 찾기 : 하위 경로
for filename in glob.glob('**/*.txt'):
    print(filename)
    
'''
sample\새파일1.txt
sample\새파일2.txt
sample\새파일3.txt
sample\새파일4.txt
sample\새파일5.txt
'''

# txt 파일 찾기 : 현재와 하위 경로 모두 포함
for filename in glob.glob('**/*.txt', recursive=True):
    print(filename)
    
'''
주피터노트북.txt
sample\새파일1.txt
sample\새파일2.txt
sample\새파일3.txt
sample\새파일4.txt
sample\새파일5.txt
'''

# 파일명 글자수로 찾기
for filename in glob.glob('????.*', recursive=True): # 글자수 4개
    print(filename)
    
'''
dict.pkl
list.pkl
'''

for filename in glob.glob('??????.*', recursive=True): # 글자수 6개
    print(filename)
    
# 주피터노트북.txt

# 문자열 패턴 포함 파일명 찾기
for filename in glob.glob('[a-z][a-z][a-z][a-z].*', recursive=True): # 알파뱃 글자수 4개
    print(filename)
    
    
'''
aBCd.txt
dict.pkl
list.pkl
'''

for filename in glob.glob('**/새파일*.*'): #새파일로 시작하는 모든 파일 찾기
    print(filename)
    
'''
sample\새파일1.txt
sample\새파일2.txt
sample\새파일3.txt
sample\새파일4.txt
sample\새파일5.txt
'''

for filename in glob.glob('**/*프로젝트*.*'): # 프로젝트가 포함된 모든 파일 찾기
    print(filename)
    
'''
project\25.프로젝트 실습.ipynb
project\프로젝트 개요.txt
'''

2-2. fnmatch()

glob과 동일하게 특정한 패턴을 따르는 파일명을 찾아주는 모듈
파일명 매칭 여부를 True, False 형태로 반환하기 때문에 os.listdir() 함수와 함께 사용

import fnmatch

# 파일명은 '새' 시작하고 확장명은 .txt를 검색
# 확장자를 제외한 파일명의 길이는 4개이며, 파일명의 마지막 문자는 숫자
for filename in os.listdir('./sample'):
    if fnmatch.fnmatch(filename, '새??[0-9].txt'):
        print(filename)
        
        
'''
새파일1.txt
새파일2.txt
새파일3.txt
새파일4.txt
새파일5.txt
'''

2-3. shutil

파일을 복사하거나 이동할 때 사용하는 내장 모듈

import shutil

# 파일 복사하기
shutil.copy('./sample/새파일1.txt', './sample/새파일1_복사본.txt') # './sample/새파일1_복사본.txt'

# 파일 이동하기
shutil.move('./sample/새파일1_복사본.txt', './새파일1_복사본.txt') # './새파일1_복사본.txt'

# 확장명 바꾸기
shutil.move('./새파일1_복사본.txt', './새파일1_복사본.py') # './새파일1_복사본.py'

# 확장명 바꾸기
shutil.move('./새파일1_복사본.py', './새파일1_복사본.txt') # './새파일1_복사본.txt'

3. 파일 압축

3-1. 데이터 압축

대용량 데이터 및 대량의 파일을 전송 시, 전송 속도가 느리며 전송 문제가 발생할 가능성이 매우 높음
데이터 압축의 종류
- 손실 압축 : 사람이 눈치채지 못할 수준의 정보만 버리고 압축하는 방법
- 무손실 압축 : 데이터 손실이 전혀 없는 압축
압축률 : 압축된 자료량(압축된 데이터 크기) / 원시 자료량(원래 데이터 크기)
다양한 압축 알고리즘에 따라 압축 성능 및 시간이 좌우됨
압축 : 인코딩(Encoding)
압축 해제 : 디코딩(Decoding)

3-2. zlib

데이터를 압축하거나 해제할 때 사용하는 모듈
compress()와 decompress() 함수로 문자열을 압축하거나 해제
데이터 크기를 줄여서 전송이 필요한 경우 사용

import zlib

data = 'Hello Python!' * 10000

print(len(data)) # 130000 byte

compress_data = zlib.compress(data.encode(encoding='utf-8'))
print(len(compress_data)) # 293byte

compress_data 
# b'x\x9c\xed\xc71\r\x00 \x0c\x000+\xe0f\'&H8\x16\xf6\xf0\xe0\x1e\x1f\xa4\xfd\x1a3\xb3\xda\xb8g\xd5\xee!"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""?\xe6\x01le79'

org_data = zlib.decompress(compress_data).decode('utf-8')
print(len(org_data)) # 130000

3-3. gzip

파일을 압축하거나 해제할 때 사용하는 모듈
내부적으로 zlib 알고리즘을 사용

import gzip

with open('org_data.txt', 'w') as f:
    f.write(data)

# gzip으로 압축
with gzip.open('compressed.txt.gz', 'wb') as f:
    f.write(data.encode('utf-8'))

# gzip 압축 해제
with gzip.open('compressed.txt.gz', 'rb') as f:
    org_data = f.read().decode('utf-8')

print(len(org_data)) # 130000

3-4. zipfile

여러개 파일을 zip 확장자로 합쳐서 압축할 때 사용하는 모듈

import zipfile

# 파일 합치고 압축하기
with zipfile.ZipFile('./sample/새파일.zip', 'w') as myzip:
    myzip.write('./sample/새파일1.txt')
    myzip.write('./sample/새파일2.txt')
    myzip.write('./sample/새파일3.txt')
    myzip.write('./sample/새파일4.txt')
    myzip.write('./sample/새파일5.txt')

# 압축 해제하기
with zipfile.ZipFile('./sample/새파일.zip') as myzip:
    myzip.extractall()

3-5. tarfile

여러개 파일을 tar 확장자로 합쳐서 압축할 때 사용하는 모듈

import tarfile

# 파일 합치고 압축하기
with tarfile.open('./sample/새파일.tar', 'w') as mytar:
    mytar.add('./sample/새파일1.txt')
    mytar.add('./sample/새파일2.txt')
    mytar.add('./sample/새파일3.txt')
    mytar.add('./sample/새파일4.txt')
    mytar.add('./sample/새파일5.txt')

# 압축 해제하기
with tarfile.open('./sample/새파일.tar') as mytar:
    mytar.extractall()

'Python' 카테고리의 다른 글

27. 파이썬과 MySQL 연동 (0)	2023.03.22
26. DAO, DTO, VO (0)	2023.03.22
주피터 노트북 설치 (0)	2023.03.14
24. 변수 타입 어노테이션 (0)	2023.03.14
23. 클로저와 데코레이터 (0)	2023.03.14

hyeongjin76

25. 파일 입출력 라이브러리

1. 파일 읽기 및 저장하기

1-1. fileinput

1-2. pickle

2. 파일 찾기, 복사, 이동하기

2-1. 파일 확장자로 찾기

2-2. fnmatch()

2-3. shutil

3. 파일 압축

3-1. 데이터 압축

3-2. zlib

3-3. gzip

3-4. zipfile

3-5. tarfile

'Python' 카테고리의 다른 글

티스토리툴바

25. 파일 입출력 라이브러리

1. 파일 읽기 및 저장하기

1-1. fileinput

1-2. pickle

2. 파일 찾기, 복사, 이동하기

2-1. 파일 확장자로 찾기

2-2. fnmatch()

2-3. shutil

3. 파일 압축

3-1. 데이터 압축

3-2. zlib

3-3. gzip

3-4. zipfile

3-5. tarfile

'Python' 카테고리의 다른 글

'Python' Related Articles

티스토리툴바