summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/snow.txt259
1 files changed, 259 insertions, 0 deletions
diff --git a/doc/snow.txt b/doc/snow.txt
new file mode 100644
index 0000000000..b9ffe9f8c7
--- /dev/null
+++ b/doc/snow.txt
@@ -0,0 +1,259 @@
+=============================================
+SNOW Video Codec Specification Draft 20070103
+=============================================
+
+
+Definitions:
+============
+
+MUST the specific part must be done to conform to this standard
+SHOULD it is recommended to be done that way, but not strictly required
+
+ilog2(x) is the rounded down logarithm of x with basis 2
+ilog2(0) = 0
+
+Type definitions:
+=================
+
+b 1-bit range coded
+u unsigned scalar value range coded
+s signed scalar value range coded
+
+
+Bitstream syntax:
+=================
+
+frame:
+ header
+ prediction
+ residual
+
+header:
+ keyframe b MID_STATE
+ if(keyframe || always_reset)
+ reset_contexts
+ if(keyframe){
+ version u header_state
+ always_reset b header_state
+ temporal_decomposition_type u header_state
+ temporal_decomposition_count u header_state
+ spatial_decomposition_count u header_state
+ colorspace_type u header_state
+ chroma_h_shift u header_state
+ chroma_v_shift u header_state
+ spatial_scalability b header_state
+ max_ref_frames-1 u header_state
+ qlogs
+ }
+
+ spatial_decomposition_type s header_state
+ qlog s header_state
+ mv_scale s header_state
+ qbias s header_state
+ block_max_depth s header_state
+
+qlogs:
+ for(plane=0; plane<2; plane++){
+ quant_table[plane][0][0] s header_state
+ for(level=0; level < spatial_decomposition_count; level++){
+ quant_table[plane][level][1]s header_state
+ quant_table[plane][level][3]s header_state
+ }
+ }
+
+reset_contexts
+ *_state[*]= MID_STATE
+
+prediction:
+ for(y=0; y<block_count_vertical; y++)
+ for(x=0; x<block_count_horizontal; x++)
+ block(0)
+
+block(level):
+ if(keyframe){
+ intra=1
+ y_diff=cb_diff=cr_diff=0
+ }else{
+ if(level!=max_block_depth){
+ s_context= 2*left->level + 2*top->level + topleft->level + topright->level
+ leaf b block_state[4 + s_context]
+ }
+ if(level==max_block_depth || leaf){
+ intra b block_state[1 + left->intra + top->intra]
+ if(intra){
+ y_diff s block_state[32]
+ cb_diff s block_state[64]
+ cr_diff s block_state[96]
+ }else{
+ ref_context= ilog2(2*left->ref) + ilog2(2*top->ref)
+ if(ref_frames > 1)
+ ref u block_state[128 + 1024 + 32*ref_context]
+ mx_context= ilog2(2*abs(left->mx - top->mx))
+ my_context= ilog2(2*abs(left->my - top->my))
+ mvx_diff s block_state[128 + 32*(mx_context + 16*!!ref)]
+ mvy_diff s block_state[128 + 32*(my_context + 16*!!ref)]
+ }
+ }else{
+ block(level+1)
+ block(level+1)
+ block(level+1)
+ block(level+1)
+ }
+ }
+
+
+residual:
+ FIXME
+
+
+
+Tag description:
+----------------
+
+version
+ 0
+ this MUST NOT change within a bitstream
+
+always_reset
+ if 1 then the range coder contexts will be reset after each frame
+
+temporal_decomposition_type
+ 0
+
+temporal_decomposition_count
+ 0
+
+spatial_decomposition_count
+ FIXME
+
+colorspace_type
+ 0
+ this MUST NOT change within a bitstream
+
+chroma_h_shift
+ log2(luma.width / chroma.width)
+ this MUST NOT change within a bitstream
+
+chroma_v_shift
+ log2(luma.height / chroma.height)
+ this MUST NOT change within a bitstream
+
+spatial_scalability
+ 0
+
+max_ref_frames
+ maximum number of reference frames
+ this MUST NOT change within a bitstream
+
+ref_frames
+ minimum of the number of available reference frames and max_ref_frames
+ for example the first frame after a key frame always has ref_frames=1
+
+spatial_decomposition_type
+ wavelet type
+ 0 is a 9/7 symmetric compact integer wavelet
+ 1 is a 5/3 symmetric compact integer wavelet
+ others are reserved
+ stored as delta from last, last is reset to 0 if always_reset || keyframe
+
+qlog
+ quality (logarthmic quantizer scale)
+ stored as delta from last, last is reset to 0 if always_reset || keyframe
+
+mv_scale
+ stored as delta from last, last is reset to 0 if always_reset || keyframe
+ FIXME check that everything works fine if this chanes between frames
+
+qbias
+ dequantization bias
+ stored as delta from last, last is reset to 0 if always_reset || keyframe
+
+block_max_depth
+ maximum depth of the block tree
+ stored as delta from last, last is reset to 0 if always_reset || keyframe
+
+quant_table
+ quantiztation table
+
+Range Coder:
+============
+FIXME
+
+Neighboring Blocks:
+===================
+left and top are set to the respective blocks unless they are outside of
+the image in which case they are set to the Null block
+
+top-left is set to the top left block unless its outside of the image in
+which case it is set to the left block
+
+if this block has no larger parent block or its at the left side of its
+parent block and the top right block is not outside of the image then the
+top right block is used for top-right else the top-left block is used
+
+Null block
+y,cb,cr are 128
+level, ref, mx and my are 0
+
+
+Motion Vector Prediction:
+=========================
+1. the motion vectors of all the neighboring blocks are scaled to
+compensate for the difference of reference frames
+
+scaled_mv= (mv * (256 * (current_reference+1) / (mv.reference+1)) + 128)>>8
+
+2. the median of the scaled left, top and top-right vectors is used as
+motion vector prediction
+
+3. the used motion vector is the sum of the predictor and
+ (mvx_diff, mvy_diff)*mv_scale
+
+
+Intra DC Predicton:
+======================
+the luma and chroma values of the left block are used as predictors
+
+the used luma and chroma is the sum of the predictor and y_diff, cb_diff, cr_diff
+
+
+Motion Compensation:
+====================
+FIXME
+
+LL band prediction:
+===================
+FIXME
+
+Dequantizaton:
+==============
+FIXME
+
+Wavelet Transform:
+==================
+FIXME
+
+TODO:
+=====
+Important:
+finetune initial contexts
+spatial_decomposition_count per frame?
+flip wavelet?
+try to use the wavelet transformed predicted image (motion compensated image) as context for coding the residual coefficients
+try the MV length as context for coding the residual coefficients
+use extradata for stuff which is in the keyframes now?
+the MV median predictor is patented IIRC
+
+Not Important:
+spatial_scalability b vs u (!= 0 breaks syntax anyway so we can add a u later)
+
+
+Credits:
+========
+Michael Niedermayer
+Loren Merritt
+
+
+Copyright:
+==========
+GPL + GFDL + whatever is needed to make this a RFC