減色プログラム
Revision | ec393489c021af1d5498d68597e98300079ebdcb (tree) |
---|---|
Time | 2011-05-19 03:43:15 |
Author | berupon <berupon@gmai...> |
Commiter | berupon |
optimized
@@ -51,19 +51,28 @@ struct Array2D | ||
51 | 51 | } |
52 | 52 | } |
53 | 53 | |
54 | + __forceinline | |
54 | 55 | T* operator[] (int row) { |
55 | 56 | return &pBuff_[row * width_]; |
56 | 57 | } |
58 | + | |
59 | + __forceinline | |
57 | 60 | const T* operator[] (int row) const { |
58 | 61 | return &pBuff_[row * width_]; |
59 | 62 | } |
60 | 63 | |
61 | 64 | Array2D<T>& operator *= (const T& scalar) { |
65 | +#if 1 | |
66 | + for (size_t i=0; i<width_*height_; ++i) { | |
67 | + pBuff_[i] *= scalar; | |
68 | + } | |
69 | +#else | |
62 | 70 | for (int i=0; i<width_; i++) { |
63 | 71 | for (int j=0; j<height_; j++) { |
64 | 72 | (*this)[j][i] *= scalar; |
65 | 73 | } |
66 | 74 | } |
75 | +#endif | |
67 | 76 | return *this; |
68 | 77 | } |
69 | 78 |
@@ -174,12 +183,30 @@ struct Array3D | ||
174 | 183 | delete pBuff_; |
175 | 184 | } |
176 | 185 | |
186 | +/* | |
177 | 187 | Array2D<T> operator[] (int depth) { |
178 | 188 | return Array2D<T>(width_, height_, &pBuff_[depth * width_ * height_]); |
179 | 189 | } |
180 | 190 | Array2D<T> operator[] (int depth) const { |
181 | 191 | return Array2D<T>(width_, height_, &pBuff_[depth * width_ * height_]); |
182 | 192 | } |
193 | +*/ | |
194 | + __forceinline | |
195 | + T& operator() (size_t z, size_t y, size_t x) { | |
196 | + return pBuff_[ | |
197 | + z * width_ * height_ | |
198 | + + y * width_ | |
199 | + + x | |
200 | + ]; | |
201 | + } | |
202 | + __forceinline | |
203 | + const T& operator() (size_t z, size_t y, size_t x) const { | |
204 | + return pBuff_[ | |
205 | + z * width_ * height_ | |
206 | + + y * width_ | |
207 | + + x | |
208 | + ]; | |
209 | + } | |
183 | 210 | }; |
184 | 211 | |
185 | 212 |
@@ -119,7 +119,10 @@ | ||
119 | 119 | AdditionalIncludeDirectories="./" |
120 | 120 | PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS" |
121 | 121 | RuntimeLibrary="2" |
122 | + BufferSecurityCheck="false" | |
122 | 123 | EnableFunctionLevelLinking="true" |
124 | + EnableEnhancedInstructionSet="2" | |
125 | + FloatingPointModel="2" | |
123 | 126 | UsePrecompiledHeader="2" |
124 | 127 | WarningLevel="3" |
125 | 128 | DebugInformationFormat="3" |
@@ -175,6 +178,10 @@ | ||
175 | 178 | UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}" |
176 | 179 | > |
177 | 180 | <File |
181 | + RelativePath=".\lib.cpp" | |
182 | + > | |
183 | + </File> | |
184 | + <File | |
178 | 185 | RelativePath=".\main.cpp" |
179 | 186 | > |
180 | 187 | </File> |
@@ -0,0 +1,3 @@ | ||
1 | +#include "stdafx.h" | |
2 | + | |
3 | +#pragma comment(lib, "winmm.lib") |
@@ -48,6 +48,8 @@ typedef Array2D<Color4f> Image4f; | ||
48 | 48 | |
49 | 49 | // http://proglab.aki.gs/mediaproc/colors.html |
50 | 50 | |
51 | +size_t call_count = 0; | |
52 | + | |
51 | 53 | int _tmain(int argc, _TCHAR* argv[]) |
52 | 54 | { |
53 | 55 | if (argc < 1+3 || argc > 1+5) { |
@@ -183,8 +185,14 @@ int _tmain(int argc, _TCHAR* argv[]) | ||
183 | 185 | NULL, |
184 | 186 | &filter5_weights |
185 | 187 | }; |
188 | + | |
189 | + DWORD started = ::timeGetTime(); | |
190 | + | |
186 | 191 | spatial_color_quant(image, *filters[filter_size], quantized_image, palette, num_colors, coarse_variables, 1.0, 0.001, 3, 1); |
187 | 192 | |
193 | + DWORD timeTaken = ::timeGetTime() - started; | |
194 | + printf("time taken : %d ms, call count : %d\n", timeTaken, call_count); | |
195 | + | |
188 | 196 | { |
189 | 197 | FILE* out = _tfopen(argv[2], _T("wb")); |
190 | 198 | if (out == NULL) { |
@@ -34,7 +34,7 @@ void fill_random(Array3D<float>& a) | ||
34 | 34 | for (int i=0; i<a.width_; i++) { |
35 | 35 | for (int j=0; j<a.height_; j++) { |
36 | 36 | for (int k=0; k<a.depth_; k++) { |
37 | - a[k][j][i] = ((float)rand())/RAND_MAX; | |
37 | + a(k,j,i) = ((float)rand())/RAND_MAX; | |
38 | 38 | } |
39 | 39 | } |
40 | 40 | } |
@@ -118,6 +118,7 @@ void compute_b_array( | ||
118 | 118 | } |
119 | 119 | } |
120 | 120 | |
121 | +__forceinline | |
121 | 122 | Color4f b_value(const Image4f& b, int i_x, int i_y, int j_x, int j_y) |
122 | 123 | { |
123 | 124 | int radius_width = (b.width_ - 1)/2, |
@@ -210,11 +211,11 @@ int best_match_color( | ||
210 | 211 | ) |
211 | 212 | { |
212 | 213 | int max_v = 0; |
213 | - float max_weight = vars[0][i_y][i_x]; | |
214 | + float max_weight = vars(0,i_y,i_x); | |
214 | 215 | for (size_t v=1; v<num_colors; ++v) { |
215 | - if (vars[v][i_y][i_x] > max_weight) { | |
216 | + if (vars(v,i_y,i_x) > max_weight) { | |
216 | 217 | max_v = v; |
217 | - max_weight = vars[v][i_y][i_x]; | |
218 | + max_weight = vars(v,i_y,i_x); | |
218 | 219 | } |
219 | 220 | } |
220 | 221 | return max_v; |
@@ -228,34 +229,43 @@ void zoom(const Array3D<float>& small, Array3D<float>& big) | ||
228 | 229 | // is 1.2 fine pixels wide and high. |
229 | 230 | for (int y=0; y<big.height_/2*2; y++) { |
230 | 231 | for (int x=0; x<big.width_/2*2; x++) { |
231 | - float left = max(0.0, (x-0.1)/2.0), right = min(small.width_-0.001, (x+1.1)/2.0); | |
232 | - float top = max(0.0, (y-0.1)/2.0), bottom = min(small.height_-0.001, (y+1.1)/2.0); | |
232 | + float left = max(0.0, (x-0.1)/2.0); | |
233 | + float right = min(small.width_-0.001, (x+1.1)/2.0); | |
234 | + float top = max(0.0, (y-0.1)/2.0); | |
235 | + float bottom = min(small.height_-0.001, (y+1.1)/2.0); | |
233 | 236 | int x_left = (int)floor(left), x_right = (int)floor(right); |
234 | 237 | int y_top = (int)floor(top), y_bottom = (int)floor(bottom); |
235 | 238 | float area = (right-left)*(bottom-top); |
236 | - float top_left_weight = (ceil(left) - left)*(ceil(top) - top)/area; | |
237 | - float top_right_weight = (right - floor(right))*(ceil(top) - top)/area; | |
238 | - float bottom_left_weight = (ceil(left) - left)*(bottom - floor(bottom))/area; | |
239 | - float bottom_right_weight = (right - floor(right))*(bottom - floor(bottom))/area; | |
240 | - float top_weight = (right-left)*(ceil(top) - top)/area; | |
241 | - float bottom_weight = (right-left)*(bottom - floor(bottom))/area; | |
242 | - float left_weight = (bottom-top)*(ceil(left) - left)/area; | |
243 | - float right_weight = (bottom-top)*(right - floor(right))/area; | |
239 | + float inv_area = 1.0 / area; | |
240 | + float left2 = (ceil(left) - left); | |
241 | + float top2 = (ceil(top) - top); | |
242 | + float bottom2 = (bottom - floor(bottom)); | |
243 | + float right2 = (right - floor(right)); | |
244 | + float top_left_weight = left2 * top2 * inv_area; | |
245 | + float top_right_weight = right2 * top2 * inv_area; | |
246 | + float bottom_left_weight = left2 * bottom2 * inv_area; | |
247 | + float bottom_right_weight = right2 * bottom2 * inv_area; | |
248 | + float top_weight = (right-left) * top2 * inv_area; | |
249 | + float bottom_weight = (right-left) * bottom2 * inv_area; | |
250 | + float left_weight = (bottom-top) * left2 * inv_area; | |
251 | + float right_weight = (bottom-top) * right2 * inv_area; | |
244 | 252 | for (int z=0; z<big.depth_; z++) { |
253 | + float val; | |
245 | 254 | if (x_left == x_right && y_top == y_bottom) { |
246 | - big[z][y][x] = small[z][y_top][x_left]; | |
255 | + val = small(z,y_top,x_left); | |
247 | 256 | } else if (x_left == x_right) { |
248 | - big[z][y][x] = top_weight*small[z][y_top][x_left] + | |
249 | - bottom_weight*small[z][y_bottom][x_left]; | |
257 | + val = top_weight * small(z,y_top,x_left) + | |
258 | + bottom_weight * small(z,y_bottom,x_left); | |
250 | 259 | } else if (y_top == y_bottom) { |
251 | - big[z][y][x] = left_weight*small[z][y_top][x_left] + | |
252 | - right_weight*small[z][y_top][x_right]; | |
260 | + val = left_weight * small(z,y_top,x_left) + | |
261 | + right_weight * small(z,y_top,x_right); | |
253 | 262 | } else { |
254 | - big[z][y][x] = top_left_weight*small[z][y_top][x_left] + | |
255 | - top_right_weight*small[z][y_top][x_right] + | |
256 | - bottom_left_weight*small[z][y_bottom][x_left] + | |
257 | - bottom_right_weight*small[z][y_bottom][x_right]; | |
263 | + val = top_left_weight * small(z,y_top,x_left) + | |
264 | + top_right_weight * small(z,y_top,x_right) + | |
265 | + bottom_left_weight * small(z,y_bottom,x_left) + | |
266 | + bottom_right_weight * small(z,y_bottom,x_right); | |
258 | 267 | } |
268 | + big(z,y,x) = val; | |
259 | 269 | } |
260 | 270 | } |
261 | 271 | } |
@@ -289,16 +299,16 @@ void compute_initial_s( | ||
289 | 299 | if (i_x == j_x && i_y == j_y) continue; |
290 | 300 | Color4f b_ij = b_value(b,i_x,i_y,j_x,j_y); |
291 | 301 | for (int v=0; v<palette_size; v++) { |
292 | - float vcv = coarse_variables[v][i_y][i_x]; | |
302 | + float vcv = coarse_variables(v,i_y,i_x); | |
293 | 303 | for (int alpha=v; alpha<palette_size; alpha++) { |
294 | - float mult = vcv * coarse_variables[alpha][j_y][j_x]; | |
304 | + float mult = vcv * coarse_variables(alpha,j_y,j_x); | |
295 | 305 | s[alpha][v] += mult * b_ij; |
296 | 306 | } |
297 | 307 | } |
298 | 308 | } |
299 | 309 | } |
300 | 310 | for (int v=0; v<palette_size; v++) { |
301 | - s[v][v] += coarse_variables[v][i_y][i_x]*center_b; | |
311 | + s[v][v] += coarse_variables(v,i_y,i_x)*center_b; | |
302 | 312 | } |
303 | 313 | } |
304 | 314 | } |
@@ -326,11 +336,11 @@ void update_s( | ||
326 | 336 | if (i_x == j_x && i_y == j_y) continue; |
327 | 337 | Color4f* ps = s[alpha]; |
328 | 338 | for (int v=0; v <= alpha; v++) { |
329 | - float mult = coarse_variables[v][i_y][i_x]; | |
339 | + float mult = coarse_variables(v,i_y,i_x); | |
330 | 340 | ps[v] += mult * delta_b_ij; |
331 | 341 | } |
332 | 342 | for (int v=alpha; v<palette_size; v++) { |
333 | - float mult = coarse_variables[v][i_y][i_x]; | |
343 | + float mult = coarse_variables(v,i_y,i_x); | |
334 | 344 | s[v][alpha] += mult * delta_b_ij; |
335 | 345 | } |
336 | 346 | } |
@@ -359,7 +369,7 @@ void refine_palette( | ||
359 | 369 | sum.zero(); |
360 | 370 | for (int i_y=0; i_y<coarse_variables.height_; i_y++) { |
361 | 371 | for (int i_x=0; i_x<coarse_variables.width_; i_x++) { |
362 | - float cv = coarse_variables[v][i_y][i_x]; | |
372 | + float cv = coarse_variables(v,i_y,i_x); | |
363 | 373 | Color4f av = a[i_y][i_x]; |
364 | 374 | Color4f result = cv * av; |
365 | 375 | sum += result; |
@@ -371,7 +381,7 @@ void refine_palette( | ||
371 | 381 | for (unsigned int k=0; k<3; k++) { |
372 | 382 | Array2D<float> S_k = extract_vector_layer_2d(s, k); |
373 | 383 | vector<float> R_k = extract_vector_layer_1d(&r[0], num_colors, k); |
374 | - vector<float> palette_channel = -1.0f*((2.0f*S_k).matrix_inverse())*R_k; | |
384 | + vector<float> palette_channel = -1.0f * ((2.0f*S_k).matrix_inverse()) * R_k; | |
375 | 385 | for (unsigned int v=0; v<num_colors; v++) { |
376 | 386 | float val = palette_channel[v]; |
377 | 387 | if (val < 0) val = 0; |
@@ -399,7 +409,7 @@ void compute_initial_j_palette_sum( | ||
399 | 409 | Color4f palette_sum; |
400 | 410 | palette_sum.zero(); |
401 | 411 | for (size_t alpha=0; alpha<num_colors; ++alpha) { |
402 | - palette_sum += coarse_variables[alpha][j_y][j_x]*palette[alpha]; | |
412 | + palette_sum += coarse_variables(alpha,j_y,j_x)*palette[alpha]; | |
403 | 413 | } |
404 | 414 | j_palette_sum[j_y][j_x] = palette_sum; |
405 | 415 | } |
@@ -559,8 +569,8 @@ void spatial_color_quant( | ||
559 | 569 | // Prevent the matrix S from becoming singular |
560 | 570 | if (new_val <= 0) new_val = 1e-10; |
561 | 571 | if (new_val >= 1) new_val = 1 - 1e-10; |
562 | - float delta_m_iv = new_val - coarse_variables[v][i_y][i_x]; | |
563 | - coarse_variables[v][i_y][i_x] = new_val; | |
572 | + float delta_m_iv = new_val - coarse_variables(v,i_y,i_x); | |
573 | + coarse_variables(v,i_y,i_x) = new_val; | |
564 | 574 | j_pal += delta_m_iv * palette[v]; |
565 | 575 | if (abs(delta_m_iv) > 0.001 && !skip_palette_maintenance) { |
566 | 576 | update_s(s, coarse_variables, b, i_x, i_y, v, delta_m_iv); |
@@ -636,7 +646,7 @@ void spatial_color_quant( | ||
636 | 646 | temperature *= temperature_multiplier; |
637 | 647 | } |
638 | 648 | } |
639 | - | |
649 | + | |
640 | 650 | // This is normally not used, but is handy sometimes for debugging |
641 | 651 | while (coarse_level > 0) { |
642 | 652 | coarse_level--; |