Tutorial from fastai doc
!git clone --recurse-submodules https://github.com/fastai/fastai
!pip install -e "fastai[dev]"
import fastai
print(fastai.__version__)
from fastai.vision.all import *
path = untar_data(URLs.PETS)
print(path.ls())
# get_image_files
# - def get_image_files(path, recurse=True, folders=None):
# - Get image files in `path` recursively, only in `folders`, if specified.
files = get_image_files(path/"images")
print(len(files))
# just see filename examples for Cat and dog.
# - Cat - first capital letter
# - dog - all lowercase
files[0],files[100]
# takes filename
# return how to label it
# - if filename is in uppercase => it's a Cat => it is labeled as True
# - if filename is in lowercase => it's a dog => it is labeled as False
def label_func(filename):
return filename[0].isupper()
ImageDataLoaders with from_name_func
from forum about 'Why does ImageDataBunch.from_name_re() require a path argument?'
After reading the source code, my current understanding is that path is a required property of the DataBunch parent class, since methods like DataBunch.save() will save directly to path. Additionally, the Learner class usually copies Learner.path from its data.path. This is used for things like Learner.save(), Learner.load(), and Learner.export(), which write to self.path/self.model_dir or just self.path.
# ImageDatLoader"s"
# - why plurals? => multiple DataLoaders (from PyTorch) are in it
# - Training/Validation/Testing DataLoader
# from_name_func
# - path? => root path of dataset
# - fnames? => list of filenames (image filenames)
# - label_func? => how to label each data?
# - item_tfms? => item transforms
dls = ImageDataLoaders.from_name_func(path=path,
fnames=files,
label_func=label_func,
item_tfms=Resize(224))
dls.show_batch(max_n=12)
learn = cnn_learner(dls, resnet34, metrics=error_rate)
# fine_tune is not a simple method
# - but all the details are hidden, but also you can adjust the details
"""
- Learner.fine_tune(epochs,
base_lr=0.002,
freeze_epochs=1,
lr_mult=100,
pct_start=0.3,
div=5.0,
lr_max=None,
div_final=100000.0,
wd=None,
moms=None,
cbs=None,
reset_opt=False)
"""
learn.fine_tune(epochs=1)
# results
# - first item? => "predicted label"
# - second item? => index or predicted result tensor
# - third item? => actual output tensor
learn.predict(files[0])
# just show some randomly chosen data from validation set
# - but we can actually choose the target dataset
# - with ds_idx argument
# - ds_idx=1 is default (validation), 0 is for training set
learn.show_results(max_n=12)
# on training dataset
learn.show_results(ds_idx=0, max_n=12)
interp = Interpretation.from_learner(learn)
interp.top_losses(k=20)
interp.plot_top_losses(k=20)
classification_interp = ClassificationInterpretation.from_learner(learn)
classification_interp.print_classification_report()
classification_interp.most_confused()
classification_interp.plot_confusion_matrix()
# install wandb
!pip install wandb
# login
import wandb
wandb.login()
# initialize wandb project
# - it will create the project with endpoint
wandb.init(project='my_project')
# import wandb callback in fastai
from fastai.callback.wandb import *
# leave logs for everything
learn = cnn_learner(dls, resnet34, metrics=error_rate, cbs=WandbCallback())
learn.fine_tune(epochs=4)
# leave logs for training process only
# - learn.fine_tune(epochs=4, cbs=WandbCallback())
from fastai.vision.all import *
path = untar_data(URLs.PETS)
files = get_image_files(path/"images")
pat = r'^(.*)_\d+.jpg'
dls = ImageDataLoaders.from_name_re(path=path,
fnames=files,
pat=pat,
item_tfms=Resize(224))
dls.show_batch()
dls = ImageDataLoaders.from_name_re(path=path,
fnames=files,
pat=pat,
item_tfms=Resize(460),
batch_tfms=aug_transforms(size=224))
dls.show_batch()
learn = cnn_learner(dls, resnet34, metrics=error_rate)
learn.lr_find()
learn.fine_tune(4, 5e-3)
learn.show_results()
interp = Interpretation.from_learner(learn)
interp.plot_top_losses(9, figsize=(15,10))
classification_interp = ClassificationInterpretation.from_learner(learn)
classification_interp.print_classification_report()
classification_interp.most_confused(min_val=3)
classification_interp.plot_confusion_matrix(figsize=(20,15))