Fold All / Expand All

2008年10月27日 星期一

yam mymedia batch uploader

簡陋command line版本!

這次採用了兩個package,分別是ClientFormpytagger
ClientForm可以自動把html裡的input們全部抓出來,再透過api介面來填想要的value。
pytagger則是用來讀取mp3 tag,有支援到ID3V2.4喔。

目前的command line版本是透過login_info.txt來傳登入資料、uploads.m3u8記錄要上傳的mp3檔位置。上傳的標題採自動抓mp3 tag,目前是寫死在code裡,之後目標是改成可以修改。

google urlfetch那段可以忽略,那只是複製之前的code沒改掉@@
# coding: utf-8
import logging
import re
import os
import urllib2
from urllib import urlencode
import tagger
import ClientForm

logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(levelname)s %(message)s',)

class URL:
def __init__(self):
self.google = False
try:
# if under Google App Engine (we cannot urllib)
from google.appengine.api import urlfetch

self.google = True
except:
import cookielib
import urllib2
cj = cookielib.CookieJar()
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))

def do_request(self, url, data=None, headers=None):
if url == None:
raise UrlError, repr(None)

method = 'get'

if headers is None:
headers = {
'User-Agent': 'Google-urlfetch',
}

if data:
method = 'post'
headers = {
'Content-Type': 'application/x-www-form-urlencoded',
}

try:
# if under Google App Engine (we cannot urllib)
from google.appengine.api import urlfetch

self.handle = urlfetch.fetch(url, data, method, headers)
self.google = True
except:
# not in Google App Engine environment, then use traditional urllib2
import urllib2

req = urllib2.Request(url, data, headers)
self.handle = self.opener.open(req)

def get_content(self):

if self.google:
return self.handle.content
else:
return self.handle.read()

def get_info(self):
if self.google:
return self.handle.headers
else:
return self.handle.info()


class MymediaUploader:
post_media = 'post_media.txt'
login_info = 'login_info.txt'

def __init__(self):
self.opener = URL()

def login(self):
if not os.path.exists(self.login_info):
logging.error('Login Info ``%s\'\' not exists' % (self.login_info))
raise IOError
data = open(self.login_info, 'rb').read()
pattern = 'id:(.+)[\r\n]+pwd:(.+)'
obj = re.compile(pattern).search(data)
if obj:
id = obj.group(1)
pwd = obj.group(2)
else:
logging.error('No Login Info')
raise IOError

# host = 'mymedia.yam.com'
# url = '/merge/auth1.php'
url = 'http://member.yam.com/merge/auth1.php'
data = {
'id': id,
'pwd': pwd,
}
data = urlencode(data)

handle = self.opener
handle.do_request(url, data=data)

headers = handle.get_info()

def get_para(self):
url = 'http://admin.blog.yam.com/media_post.php'

obj = self.opener
obj.do_request(url)
data = obj.get_content()
open(self.post_media, 'wb').write(data)

def get_mp3_tag(self, files):
tags = {}

for name in files:
mp3_tag = tagger.ID3v2(name)

frames = {}
for frame in mp3_tag.frames:
frames[frame.fid] = frame.strings
logging.debug('[%s] = %s' % (frame.fid, frame.strings))

tag = {'artist': None, 'title': None,
'album': None, 'track_no': None}
if 'TPE1' in frames:
tag['artist'] = frames['TPE1'][0].strip()
if 'TIT2' in frames:
tag['title'] = frames['TIT2'][0].strip()
if 'TALB' in frames:
tag['album'] = frames['TALB'][0].strip()
if 'TRCK' in frames:
pattern = '([0-9]+)|([0-9]+)/[0-9]+'
obj = re.compile(pattern).search(frames['TRCK'][0])
if obj:
tag['track_no'] = '%02d' % int(obj.group(1))

tags[name] = tag

return tags

def upload(self, files, tags):
f = open(self.post_media, 'rb')

forms = ClientForm.ParseFile(f,
'http://upload.media.blog.yam.com/media/my_post.php',
backwards_compat=False)
f.close()

form = forms[0]

for name in files:
u = form.find_control('uploadfile')
u.clear()
u.add_file(open(name, 'rb'), 'audio/mpeg', name.encode('utf8'))

artist = tags[name]['artist']
title = tags[name]['title']
album = tags[name]['album']
track_no = tags[name]['track_no']

upload_title = ''
if artist:
upload_title += '[%s]' % (artist)
if album:
upload_title += '[%s]' % (album)
if track_no:
upload_title += '[%s]' % (track_no)
if title:
upload_title += '[%s]' % (title)

form['podtitle'] = upload_title.encode('utf8')
form['podTag'] = artist.replace(' ', '_').encode('utf8')
form['podcate[]'] = ('23',) # 23: pop music, 0: other

request = form.click()
logging.debug('%s' % ((form)))

self.send_request(request, name)

def send_request(self, request, name):
name = name[name.rfind('\\')+1:]
logging.info('file [%s] started uploading' % (name,))
response = urllib2.urlopen(request)
logging.info('file [%s] finished uploading' % (name,))

if __name__ == '__main__':
input = 'uploads.m3u8'
lines = open(input, 'rb').read().decode('utf_8_sig').split('\n')
files = []
for line in lines:
line = line.strip()
if len(line) == 0:
# skip empty line
continue
if line[0] == '#':
# skip comment
continue
if not os.path.exists(line):
continue

files.append(os.path.abspath(line))

mymedia = MymediaUploader()
mymedia.login()
mymedia.get_para()

tags = mymedia.get_mp3_tag(files)

mymedia.upload(files, tags)

2008年10月25日 星期六

本月未聽專輯列表(2008 Oct)

包括之前漏掉的@@

RAG FAIR「カラーズ」
RYTHEM「23」
EXILE「EXILE ENTERTAINMENT BEST」
Keane「Perfect Symmetry」
Code Geass R2 OST 1
Code Geass R2 OST 2
Macross Frontier OST 2
栗林みな実「dream link」
Suara「太陽と月」

堆到兩張OST在同一個列表…

jmp3 mp3 link retriever

input為某CD那頁網址,output為該CD所有歌曲的mp3連結。目前這個版本似乎不會有之前誤判同時多連線的問題,所以拿參數只需要一次,不像之前每個連結都要重拿一次才穩。

1. 取得 http://www.jmp3.net/it.php?q=json 給的三個參數。
2. base64解開檔案位置。
3. 把參數塞進檔案位置,下載位置就出來啦。
# -*- coding: utf8 -*-
# last modified by franklai, 2008 Oct 23
import re
import base64
from urllib import quote

class URL:
def __init__(self, url, data=None, headers=None):
if url == None:
raise UrlError, repr(None)

self.google = False

method = 'get'

if headers is None:
headers = {
'User-Agent': 'Google-urlfetch',
}

if data:
method = 'post'
headers = {
'Content-Type': 'application/x-www-form-urlencoded',
}

try:
# if under Google App Engine (we cannot urllib)
from google.appengine.api import urlfetch

self.handle = urlfetch.fetch(url, data, method, headers)
self.google = True
except:
# not in Google App Engine environment, then use traditional urllib2
import urllib2

req = urllib2.Request(url, data, headers)
self.handle = urllib2.urlopen(req)

def get_content(self):

if self.google:
return self.handle.content
else:
return self.handle.read()

def get_info(self):
if self.google:
return self.handle.headers
else:
return self.handle.info()


class Jmp3:
file_host = 'member.jmp3.net'
dpin_url = 'http://www.jmp3.net/it.php?q=json'

def __init__(self):
pass

def get_mp3_links(self, url):
data = URL(url).get_content().decode('utf-8')
list = self._get_raw_list(data)
dpin = self._get_dpin()
urls = self._get_full_url(list, dpin)

mp3_links = [x[0] for x in urls]

return mp3_links

def _get_raw_list(self, data):
pattern = 'class="s-3"><a name="([^"]+)".*title="([^"]+)"'
obj = re.compile(pattern).findall(data)

list = []

for b64, title in obj:
path = base64.b64decode(b64)
prefix = path[:path.rfind('/')+1]
suffix = path[path.rfind('/')+1:]

list.append((title, prefix, suffix))
return list

def _get_dpin(self):
data = URL(self.dpin_url).get_content()

dict = None

# {"is":"hh0mgdvc7rg12td2001","it":"1224745931","ik":"fd68199a9a809f06bd6a64b66c34d5c2"}
pattern = '{"is":"([^"]+)","it":"([^"]+)","ik":"([^"]+)"}'
obj = re.compile(pattern).search(data)
if obj:
dict = {
'is': obj.group(1),
'it': obj.group(2),
'ik': obj.group(3),
}
return dict

def _get_full_url(self, raw_list, dpin):
dpin_str = 'dpin/%s/%s/%s/' % (dpin['is'], dpin['it'], dpin['ik'])
list = []
for title, prefix, suffix in raw_list:
full_url = 'http://%s%s%s%s' % (self.file_host, prefix, dpin_str, suffix)
full_url = quote(full_url, ':/')
list.append(((full_url), title))
return list

if __name__ == '__main__':
# url = 'http://www.jmp3.net/Cd_html/E81B28/E8FCDC6.html'

input = 'cd_urls.txt'
output = 'mp3_links.txt'
f = open(input, 'rb')
out = open(output, 'wb')

jmp3 = Jmp3()

for url in f:
mp3_links = jmp3.get_mp3_links(url)

out.write('\n'.join(mp3_links))
out.write('\n')
out.close()

# import urllib
# for link in mp3_links:
# urllib.urlretrieve(link)