#
# yt workshop 2012 data download script
# 
# This script will assist downloading the datasets that the yt
# workshop organizers have made available.
#
# We recommend installing the multi-stream, multi-protocol, and
# open-source downloader 'aria2'. The script will still function
# without it, but it should go faster with it installed.
# Please see http://aria2.sourceforge.net/.
# Aria2 is available on most (all?) linux distributions through
# the package manager (e.g. "sudo apt-get install aria2")
# and on Mac OS X in Fink or MacPorts ("sudo (fink/port) install aria2c").
#
# This file should not need to be edited unless you're adding a dataset for
# download.
#

import subprocess, os, sys

class Downloader(object):
    def __init__(self):
        self._choose_method()
    
    def _aria2(self, path, outdir):
        print "Downloading %s with aria" % path
        res = subprocess.call(["aria2c", "-x4", "-c", "--file-allocation=none",
            "-d", "%s" % outdir, path])
        if res != 0:
            print "aria2 failed to download %s!" % path
            sys.exit()
        name = path.split("/")[-1]
        self._untar(name, outdir)
    
    def _wget(self, path, outdir):
        print "Downloading %s with wget" % path
        res = subprocess.call(["wget", "-nv", "-P", "%s" % outdir, path])
        if res != 0:
            print "wget failed to download %s!" % path
            sys.exit()
        name = path.split("/")[-1]
        self._untar(name, outdir)
    
    def _curl(self, path, outdir):
        print "Downloading %s with curl" % path
        # curl is kind of annoying with default output to stdout.
        name = path.split("/")[-1]
        res = subprocess.call(["curl", "-L", "-o", "%s/%s" % (outdir, name), path])
        if res != 0:
            print "curl failed to download %s!" % path
            sys.exit()
        self._untar(name, outdir)
    
    def _untar(self, filename, outdir):
        print "untarring %s to directory %s" % (filename, outdir)
        res = subprocess.call(["tar", "-xf", "%s/%s" % (outdir, filename),
            "-C", "%s" % outdir])
        if res != 0:
            print "tar failed to extract %s!" % filename
            sys.exit()
        os.remove("%s/%s" % (outdir, filename))
    
    def _choose_method(self):
        # I'd like to do this in subprocess, but I can't figure out how to call
        # the external for testing stuff quietly. But this is not so bad.
        paths = os.environ['PATH']
        execs = []
        for path in paths.split(":"):
            if not os.path.isdir(path): continue
            execs.extend(os.listdir(path))
        if 'aria2c' in execs:
            self.method = self._aria2
            return
        if 'wget' in execs:
            self.method = self._wget
            return
        if 'curl' in execs:
            self.method = self._curl
            return
        
    def download(self, path, outdir):
        self.method(path, outdir)


class Dataset(Downloader):
    def __init__(self, name, description, host, tars, size):
        self.name = name # Short name of the dataset, also the name of dir created.
        self.description = description # Long description of the dataset
        self.host = host # Path to the directory-level of the tar files.
        self.tars = tars # Tar file names.
        self.size = size # in GB of all files.
        self._choose_method()

    def _ensure_dir(self):
        try:
            os.mkdir(self.name)
        except OSError:
            pass

    def download_all(self):
        self._ensure_dir()
        self._choose_method()
        for tar in self.tars:
            path = self.host + "/" + tar
            self.download(path, self.name)
    
    def download_one(self, ID):
        self._ensure_dir()
        self._choose_method()
        path = self.host + "/" + self.tars[ID]
        self.download(path, self.name)

######
# Here are all the various datasets. Edit here if you're adding a new dataset.
######

enzo_tiny_cosmology = Dataset(
    "enzo_tiny_cosmology",
    "Enzo simulation, cosmology, 32^3 root grid, 5 levels AMR, star formation,\n32 Mpc/h on a side, 57 datasets, 680 MB total.",
    "http://yt-project.org/data/enzo_tiny_cosmology",
    ["sim.tar"] + ["DD%04d.tar" % i for i in range(47)] + ["RD%04d.tar" % i for i in range(10)],
    0.68,
    )

Enzo_64 = Dataset(
    "Enzo_64",
    "Enzo dataset, cosmology, 64^3 root grid, 5 levels AMR, star formation,\n128 Mpc/h on a side, 4GB total.",
    "http://yt-project.org/data/Enzo_64",
    ["64.tar"] + ["DD%04d.tar" % i for i in range(44)] + ["RD%04d.tar" % i for i in range(7)],
    4.0,
    )

IsolatedGalaxy = Dataset(
    "IsolatedGalaxy",
    "An isolated galaxy dataset.",
    "http://yt-project.org/data/IsolatedGalaxy",
    ["galaxy0030.tar"],
    0.5,
    )

PopIII = Dataset(
    "PopIII",
    "A Population III star dataset.  29 levels of refinement, 64 zones per" +
    " Jeans Length.",
    "http://yt-project.org/data/PopIII",
    ["DD0087.tar"],
    36.0,
    )

Sedov_3d = Dataset(
    "Sedov3D",
    "Sedov3D test",
    "http://yt-project.org/data/Sedov_3d/",
    ["sedov_hdf5_chk_%04i.tar" % i for i in range(4)],
    0.2,
    )

GasSloshing = Dataset(
    "GasSloshing",
    "Two timesteps of gas sloshing around",
    "http://yt-project.org/data/GasSloshing/",
    ["sloshing_nomag2_hdf5_plt_cnt_0100.tar",
     "sloshing_nomag2_hdf5_plt_cnt_0150.tar"],
     0.65,
     )

GasSloshingLowRes = Dataset(
    "GasSloshingLowRes",
    "Many timesteps of (low-res) gas sloshing around",
    "http://yt-project.org/data/GasSloshingLowRes/",
    ["sloshing_nomag2_hdf5_plt_cnt_0100.tar",
     "sloshing_nomag2_hdf5_plt_cnt_0150.tar"],
     0.65,
     )

WindTunnel = Dataset(
    "WindTunnel",
    "Wind tunnel test problem",
    "http://yt-project.org/data/WindTunnel/",
    ["windtunnel_4lev_hdf5_plt_cnt_%04i.tar" % i for i in range(41)],
     0.1,
     )

GalaxyClusterMerger = Dataset(
    "GalaxyClusterMerger",
    "A merger of two galaxy clsuters",
    "http://yt-project.org/data/GalaxyClusterMerger/",
    ["fiducial_1to3_b0.273d_hdf5_plt_cnt_0175.tar"],
     6.0,
     )

# Don't forget to add your dataset here!
Datasets = [enzo_tiny_cosmology, Enzo_64, IsolatedGalaxy,
            Sedov_3d, GasSloshing, WindTunnel, GalaxyClusterMerger]
# Datasets.append(PopIII)

#####
# Here's user-end of the operation.
#####

total_size = 0
for dataset in Datasets:
    total_size += dataset.size

all = raw_input("""~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
yt workshop 2012 data download script
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

This script will help you download and untar the datasets the organizers
have prepared for use during the workshop. Before running the script,
please make sure you have one of the following installed on your system:

* aria2, a fast multi-stream, multi-protocol, and open-source file downloader.
This is recommended because it has the capability of downloading files much
faster than the other two options. It also handles restarts very gracefully.
http://aria2.sourceforge.net/
install on ubuntu: sudo apt-get install aria2
install on mac: sudo (port/fink) install aria2
to use on command line: aria2c (note the extra 'c', it's not just aria2!)

* wget, a standard on most modern unixes.

* curl, a standard on most modern unixes.

If you aren't sure, hit "CRTL-C" now to exit, and come back.

Ok, let's move on. Just answer a (few) question(s) and then the downloading
will begin!

* Download all of the data.
The simplest option is to download all of the data. All of the data
available adds up to %.1f GB. Selecting n(o) will give you finer-grained
download options.

Download all the data? (y/n): """ % total_size)

if all == 'y' or all == 'yes':
    for dataset in Datasets:
        dataset.download_all()
    print "All done!"
    sys.exit()

ds_desc = ""
for i, dataset in enumerate(Datasets):
    ds_desc += "%d: %s - %.1f GB\n\n" % (i, dataset.description, dataset.size)

dsID = raw_input("""
The following datasets are available:

%s

* Download a single dataset.
Which dataset would you like to download all or part of?
Please specify an integer value from the list above: """ % ds_desc)

choice = Datasets[int(dsID)]

all = raw_input("""
Would you like to download all the snapshots from that dataset? (y/n): """)

if all == 'y' or all == 'yes':
    choice.download_all()
    print "All done!"
    sys.exit()

snaps = ""
for i, tar in enumerate(choice.tars):
    snaps += "%d: %s\n" % (i, tar)

singleID = raw_input("""
Your chosen dataset is:

%s

This dataset has the following data snapshots:

%s

# Download a single snapshot.
Which snapshot would you like to download?
Please specify an integer value from the list above: """ % \
(choice.description, snaps))

choice.download_one(int(singleID))
print "All done!"

